DATA RETRIEVED FOR WSJ
EXTRACTION SCRIPT USED
<Section>
Name main
Define $output_file wsj-output1.html
<Action ContentURL>
URL http://chinese.wsj.com/gb/index.asp
RemoveNewLine
</Action>
<Action Print>
FileName {$output_file}
FileMode Write
Text <head><meta http-equiv="Content-Type" content="text/html; charset=GB2312"></head>
</Action>
<Section While>
<Pattern>
RegExp <tr>*<td><a class=c href="{$relative_link_url}"><span >{$titel}</span>\
</a><span class="text1">{$text}</span>*</td>*</tr>*<tr>*<td height="30">*\
<div align="center">* * * </div>*</td>*</tr>
</Pattern>
<Action Print>
FileName {$output_file}
Text {$relative_link_url} - {$titel} - {$text}\n<br>
</Action>
</Section>
</Section>
Main main