Script retrieves first 5 products from first 3 categories from hub.shop.ebay.com and saves them to CSV file and XLS file. Output is refreshing each 15 minutes from cron (so there can be several seconds blank output).
输出:















脚本源代码:
# File: ebay_main.w
# Name: eBay - The World's Online Marketplace
# Description: Script retrieves first 5 products from first 3 categories from hub.shop.ebay.com
# and saves them to CSV file and XLS file
# Input: URL [http://hub.shop.ebay.com/]
# Output format: CSV file, XLS file
# Output fields: Source URL, Category URL, Category Name, Expiration Time, Offer Name,
# Bid History, Current Bid, Image URL, Image Name, Image Alternative Text
#enable logging for debug purpose
#<Logger File>
# Global
# FileName ebay.log
# Level debug
#</Logger>
#main section of script
<Section>
#define name of section
Name ebay_main
#define website url
Define $main http://hub.shop.ebay.com/
Define $path http://www.qualityunit.com/fileadmin/scripts/ebay/img/
#create directory for image storage if it does not exist
<Action PHP>
TemplateText if (!file_exists("./img")) mkdir("./img", 0777);
</Action>
#delete old output CSV and HTML files and stored images
<Action Exec>
cmd rm -f *.csv
</Action>
<Action Exec>
cmd rm -f *.html
</Action>
<Action Exec>
cmd rm -f img/*.jpg
</Action>
#load content
<Action ContentURL>
#load content from the following URL saved in variable main
URL {$main}
#removes newlines from downloaded content for easier matching
RemoveNewLine
</Action>
#in this section we will iterate trough all main categories
<Section While>
#set maximum of iterations
MaxIterations 3
#this pattern should match category url and category name
<Pattern>
#defines expression which should match the data
RegExp <div id="FontGradientLink5"*><a href="{$category_url:re([^"]*)}">{$category_name}
#replace html encoded characters into their normal representation
HtmlSpecialChars
Trim
Compact
</Pattern>
#evaluate script saved in file ebay_productlist.w
<Action Eval>
File ebay_productlist.w
</Action>
</Section>
#delete old output XLS file
<Action Exec>
cmd rm -f *.xls
</Action>
#convert data from csv file into xls file
<Action ConvertToXLS>
InputFile ebay_output.csv
OutputFile ebay_output.xls
Separator ;
</Action>
</Section>
#run section with name "ebay_main"
Main ebay_main# File: ebay_productlist.w
#main section of script
<Section>
#define name of section
Name ebay_productlist
Optional
#load content
<Action ContentURL>
#load content from the following URL saved in variable category_url
URL {$category_url}
#removes newlines from downloaded content for easier matching
RemoveNewLine
</Action>
#in this section we will iterate trough all products in particular category
<Section While>
#set maximum of iterations
MaxIterations 5
<Section Or>
NoContext
#this pattern should match first version of product url and product name
<Pattern>
#defines expression which should match the data
RegExp <div class="ittl"*>{:re(.*?)}<a href="{$product_url:re([^"]*)}"*>{$product_name}
Trim
Compact
</Pattern>
#this pattern should match second version of product url and product name
<Pattern>
#defines expression which should match the data
RegExp <div class="ttl"*><a href="{$product_url:re([^"]*)}"*>{$product_name}
Trim
Compact
</Pattern>
</Section>
#evaluate script saved in file ebay_product.w
<Action Eval>
File ebay_product.w
</Action>
</Section>
</Section>
#run section with name "ebay_productlist"
Main ebay_productlist# File: ebay_product.w
#main section of script
<Section>
#define name of section
Name ebay_product
Optional
#load content
<Action ContentURL>
#load content from the following URL saved in variable product_url
URL {$product_url}
#removes newlines from downloaded content for easier matching
RemoveNewLine
</Action>
#this pattern should match image url and image alternative text
<Pattern>
#defines expression which should match the data
RegExp <img src="{$image_url:re([^"]*)}" id="i_vv4{:re([^"]*)}"*alt="{$image_alternative:re([^"]*)}"{:re([^>]*)}>
#replace html encoded characters into their normal representation
HtmlSpecialChars
Optional
Trim
Compact
</Pattern>
#define image name, which will be stored in csv file
<Action Php>
Code $context->setVariable('$image_name',\
substr(md5($context->getVariable('$product_name')),0,10).".jpg");
</Action>
<Section>
Optional
#load image from the image url saved in variable image_url and save image to specified file
<Action URLToFile>
URL {$image_url}
FileName img/{$image_name}
</Action>
# add img to HTML
<Action Print>
FileName ebay_images.html
Text <img src="{$path}{$image_name}" alt="{$category_name} - {$product_name}" \
title="{$category_name} - {$product_name}" width="60" />
</Action>
</Section>
#this pattern should match expiration time
<Pattern>
#defines expression which should match the data
RegExp <td*><span class="vi-is1-dt"*><span*>{:re(.*?)}<span><span>{$expiration_time:re(.*?)}</span></td>
Optional
TagsToStrip span
Trim
Compact
</Pattern>
#this pattern should match bid history
<Pattern>
#defines expression which should match the data
RegExp <span class="vi-is1-s6"*><span><a*><span*>{$bid_history}
Optional
Trim
Compact
</Pattern>
#this pattern should match current bid
<Pattern>
#defines expression which should match the data
RegExp <span*class="vi-is1-prcp"*>{$current_bid}
Optional
Trim
Compact
</Pattern>
#print matched data to csv file
<Action SaveCSV>
FileName ebay_output.csv
Separator ;
Column $product_url, Source URL
Column $category_url, Category URL
Column $category_name, Category Name
Column $expiration_time, Expiration Time
Column $product_name, Offer Name
Column $bid_history, Bid History
Column $current_bid, Current Bid
Column $image_url, Image URL
Column $image_name, Image Name
Column $image_alternative, Image Alternative Text
</Action>
</Section>
#run section with name "ebay_product"
Main ebay_product