
输出:
脚本源代码:
# File: expekt_main.w
# Name: WALL STREET JOURNAL in Chinese
# Description: output html retrieves all news from the left column at http://chinese.wsj.com/gb/strhrd.asp
# Input: URL [http://chinese.wsj.com/gb/strhrd.asp]
# Output format: HTML file
# Output fields: linked url, title, text(description)
#<Logger File>
# Global
# FileName wsj_log.log
# # log all messages up to debug messages
# Level debug
#</Logger>
<Section>
Name wsj_main
# define name of output file
Define $output_file wsj_output.html
# remove old cookies
<Action Exec>
cmd rm mcookies.txt
</Action>
# clean output file
<Action Print>
FileName {$output_file}
FileMode Write
</Action>
# load content
<Action ContentURL>
URL http://chinese.wsj.com/gb/strhrd.asp
RemoveNewLine
</Action>
<Action Php>
Code $context->setVariable('$output', $context->getVariable('$output').'<head><meta http-equiv="Content-Type" content="text/html; charset=GB2312"></head>\n');
</Action>
<Pattern>
RegExp <div id="t2lnews2">
</Pattern>
# finds all dates
<Section While>
EndAt <div id="top2right">
NoContext
# pattern for linked url and title
<Pattern>
RegExp <a href="{$relative_link_url:re([^"]*)}" target=_blank><span style="font-weight:bolder;">{$title:re([^<]*)}</span>
</Pattern>
# pattern of text under title
<Pattern>
Optional
RegExp </a>{$text:re([^<]*)}<span style="font-size:11px;color:#666666"></span></div>
</Pattern>
<Action Php>
Code $context->setVariable('$output', $context->getVariable('$output').'http://chinese.wsj.com/gb/'.$context->getVariable('$relative_link_url').' - '.$context->getVariable('$title').' - '.$context->getVariable('$text').' \n<br>');
</Action>
</Section>
<Action Print>
FileName {$output_file}
Text {$output}
</Action>
</Section>
Main wsj_main