
El resultado HTML obtiene las primeras 10 imágenes disponibles enhttp://news.bbc.co.uk en.wikipedia.org. El resultado se actualiza cada 15 minutos.
Resultados:
Source URL;Image URL;Name "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/commons/thumb/7/73/Linden3cropped1.JPG/100px-Linden3cropped1.JPG";"100px-Linden3cropped1.JPG" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/commons/thumb/3/30/William_Fawcett_%281902%E2%80%931941%29.png/95px-William_Fawcett_%281902%E2%80%931941%29.png";"95px-William_Fawcett_%281902%E2%80%931941%29.png" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/commons/thumb/0/09/DrW.jpg/80px-DrW.jpg";"80px-DrW.jpg" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/commons/thumb/9/98/Bernadette_Soubirous.jpg/78px-Bernadette_Soubirous.jpg";"78px-Bernadette_Soubirous.jpg" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/commons/thumb/f/f8/Tramp_smoking_cigar_with_cane_over_arm_-_restoration.jpg/250px-Tramp_smoking_cigar_with_cane_over_arm_-_restoration.jpg";"250px-Tramp_smoking_cigar_with_cane_over_arm_-_restoration.jpg" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/en/9/9d/Commons-logo-31px.png";"Commons-logo-31px.png" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/en/4/46/Wikiquote-logo-51px.png";"Wikiquote-logo-51px.png" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/en/e/e3/Wikiversity-logo-41px.png";"Wikiversity-logo-41px.png" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/en/7/7f/Wikibooks-logo-35px.png";"Wikibooks-logo-35px.png" "http://en.wikipedia.org/wiki/Main_Page";"http://en.wikipedia.org/wiki/Main_Page///upload.wikimedia.org/wikipedia/en/b/b6/Wikisource-logo-35px.png";"Wikisource-logo-35px.png"









Código fuente del script:
# File: imgdownloader_main.w
# Name: Image Downloader
# Description: Script opens defined URL, finds first 10 available images,
# downloads them into folder 'images', saves basic information into CSV file
# and makes html with downloaded images.
# Input: URL
# Output format: CSV file, images, HTML
# Output CSV fields: Source URL, Image URL, Name
#<Logger File>
# Global
# FileName imgdownloader_log.log
# Level debug
#</Logger>
<Section>
Name imgdownloader_main
Define $output_file imgdownloader_output.csv
Define $output_file2 imgdownloader_output.html
Define $path http://www.qualityunit.com/fileadmin/scripts/imgdownloader/images/
# clean output files and download dir
<Action Print>
FileName {$output_file}
FileMode Write
</Action>
<Action Print>
FileName {$output_file2}
FileMode Write
</Action>
<Action Exec>
cmd rm ./images/*.*
</Action>
Define $url http://en.wikipedia.org/wiki/Main_Page
# downloading the content of URL
<Action ContentURL>
URL {$url}
RemoveNewLine
</Action>
<Section While>
MaxIterations 10
# two types of patterns
<Section Or>
NoContext
<Pattern>
RegExp <img{:re(.*?)}src="{$url_img:re([^"]*)}"
Trim
Compact
MultiLine
</Pattern>
<Pattern>
RegExp <img{:re(.*?)}src={$url_img:re([^ ]*)}
Trim
Compact
MultiLine
</Pattern>
</Section>
# relative address -> absolute address
<Action Php>
Code if (!eregi("^http",$context->getVariable('$url_img')))\
$context->setVariable('$url_img',parse_url($context->getVariable('$url'), PHP_URL_SCHEME)\
."://".parse_url($context->getVariable('$url'),PHP_URL_HOST)\
.parse_url($context->getVariable('$url'),PHP_URL_PATH)."/".$context->getVariable('$url_img'));
</Action>
# image name
<Action Php>
Code $context->setVariable('$name_img',pathinfo($context->getVariable('$url_img'), PATHINFO_FILENAME)\
.".".pathinfo($context->getVariable('$url_img'), PATHINFO_EXTENSION));
</Action>
# downloading image
<Action URLToFile>
URL {$url_img}
FileName images/{$name_img}
</Action>
# saving basic information into SCV file
<Action SaveCSV>
FileName {$output_file}
Separator ;
Column $url, Source URL
Column $url_img, Image URL
Column $name_img, Name
</Action>
# add img to HTML
<Action Print>
FileName {$output_file2}
Text <img src="{$path}{$name_img}" alt="img-{$_ITERATION}" title="image - {$_ITERATION}" />
</Action>
</Section>
</Section>
Main imgdownloader_main