DATA RETRIEVED FOR NHL
EXTRACTION SCRIPT USED
# we define global logger
# log messages to file
#<Logger File>
# Global
# FileName output-main.txt
# # log all messages up to debug messages
# Level debug
#</Logger>
<Section>
Name main
# load content
<Action ContentURL>
URL http://www.nhl.com/
RemoveNewLine
TagsToStrip br,nobr,b
</Action>
Define $output_file nhl-output.php
# truncate file
<Action Print>
FileName {$output_dir}{$output_file}
FileMode Write
</Action>
<Section While>
# match content of the news
Optional
<Section>
EndAt nTarget.push('0');
<Pattern>
# NOTE: * is helper character for [^<]* see documentation
# NOTE: you can split long line with \ followed by new line
# NOTE: syntax of variable is {$variable:type} see documentation
RegExp nImage.push('{$image:re(.*?)}');*\
nHeadline.push('{$title:re(.*?)}');*\
nTeaser.push('{$article_desc:re(.*?)}');*\
nLink.push('{$link:re(.*?)}');
</Pattern>
# print results as html
<Action Print>
FileName {$output_dir}{$output_file}
Text <p><h1><a href="{$link}">{$title}</a></h1></p><p><img src="{$image}" /></p><p>{$article_desc}</p><br><hr>\n
</Action>
</Section>
<Pattern>
RegExp nTarget.push('0');
</Pattern>
</Section>
</Section>
Main main