Yahoo! search interface

Create a class for searching Yahoo! results. It must implement a Next Page method, and read URL, Title and Content from results.

AutoHotkey

translated from python example <lang AutoHotkey>test: yahooSearch("test", 1) yahooSearch("test", 2) return

yahooSearch(query, page) {

 global
 start := ((page - 1) * 10) + 1
 filedelete, search.txt
 urldownloadtofile, % "http://search.yahoo.com/search?p=" . query
 . "&b=" . start, search.txt
 fileread, content, search.txt

reg = <a class="yschttl spt" href=".+?" >(.+?)</a>

(.+?)

 index := found := 1
 while (found := regexmatch(content, reg, self, found + 1))
 {
   msgbox % title%A_Index% := fix(self1)
   content%A_Index% := fix(self2)
   url%A_Index% := fix(self3)
 }

}

fix(url) {

if pos := instr(url, "</a>")

StringLeft, url, url, pos - 1 url := regexreplace(url, "<.*?>") return url }</lang>

GUISS

<lang guiss>Start,Programs,Applications,Mozilla Firefox,Inputbox:address bar>www.yahoo.co.uk, Button:Go,Area:browser window,Inputbox:searchbox>elephants,Button:Search</lang>

<lang java>import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern;

class YahooSearch {

   private String query;
   // Page number
   private int page = 1;
   // Regexp to look for the individual results in the returned page
   private static final Pattern pattern = Pattern.compile(

"<a class=\"yschttl spt\" href=\"[^*]+?\\*\\*([^\"]+?)\">(.+?)</a>.*?

(.+?)

");

   public YahooSearch(String query) {
       this.query = query;
   }

   public List<YahooResult> search() throws MalformedURLException, URISyntaxException, IOException {
       // Build the search string, starting with the Yahoo search URL,
       // then appending the query and optionally the page number (if > 1)
       StringBuilder searchUrl = new StringBuilder("http://search.yahoo.com/search?");
       searchUrl.append("p=").append(URLEncoder.encode(query, "UTF-8"));
       if (page > 1) {searchUrl.append("&b=").append((page - 1) * 10 + 1);}
       // Query the Yahoo search engine
       URL url = new URL(searchUrl.toString());
       List<YahooResult> result = new ArrayList<YahooResult>();
       StringBuilder sb = new StringBuilder();
       // Get the search results using a buffered reader
       BufferedReader in = null;
       try {
           in = new BufferedReader(new InputStreamReader(url.openStream()));
           // Read the results line by line
           String line = in.readLine();
           while (line != null) {
               sb.append(line);
               line = in.readLine();
           }
       }
       catch (IOException ioe) {
           ioe.printStackTrace();
       }
       finally {
           try {in.close();} catch (Exception ignoreMe) {}
       }
       String searchResult = sb.toString();
       // Look for the individual results by matching the regexp pattern
       Matcher matcher = pattern.matcher(searchResult);
       while (matcher.find()) {
           // Extract the result URL, title and excerpt
           String resultUrl = URLDecoder.decode(matcher.group(1), "UTF-8");
           String resultTitle = matcher.group(2).replaceAll("</?b>", "").replaceAll("", "");
           String resultContent = matcher.group(3).replaceAll("</?b>", "").replaceAll("", "");
           // Create a new YahooResult and add to the list
           result.add(new YahooResult(resultUrl, resultTitle, resultContent));
       }
       return result;
   }

   public List<YahooResult> search(int page) throws MalformedURLException, URISyntaxException, IOException {
       // Set the page number and search
       this.page = page;
       return search();
   }

   public List<YahooResult> nextPage() throws MalformedURLException, URISyntaxException, IOException {
       // Increment the page number and search
       page++;
       return search();
   }

   public List<YahooResult> previousPage() throws MalformedURLException, URISyntaxException, IOException {
       // Decrement the page number and search; if the page number is 1 return an empty list
       if (page > 1) {
           page--;
           return search();
       } else return new ArrayList<YahooResult>();
   }

}

class YahooResult {

   private URL url;
   private String title;
   private String content;

   public URL getUrl() {
       return url;
   }

   public void setUrl(URL url) {
       this.url = url;
   }

   public void setUrl(String url) throws MalformedURLException {
       this.url = new URL(url);
   }

   public String getTitle() {
       return title;
   }

   public void setTitle(String title) {
       this.title = title;
   }

   public String getContent() {
       return content;
   }

   public void setContent(String content) {
       this.content = content;
   }

   public YahooResult(URL url, String title, String content) {
       setUrl(url);
       setTitle(title);
       setContent(content);
   }

   public YahooResult(String url, String title, String content) throws MalformedURLException {
       setUrl(url);
       setTitle(title);
       setContent(content);
   }

   @Override
   public String toString() {
       StringBuilder sb = new StringBuilder();
       if (title != null) {
           sb.append(",title=").append(title);
       }
       if (url != null) {
           sb.append(",url=").append(url);
       }
       return sb.charAt(0) == ',' ? sb.substring(1) : sb.toString();
   }

}

public class TestYahooSearch {

   public static void main(String[] args) throws MalformedURLException, URISyntaxException, IOException {
       // Create a new search
       YahooSearch search = new YahooSearch("Rosetta code");
       // Get the search results
       List<YahooResult> results = search.search();
       // Show the search results
       for (YahooResult result : results) {
           System.out.println(result.toString());
       }
   }

}</lang>

Oz

Library: OzHttpClient

Instead of a class the implementation defines a function which returns a lazy list of result pages. This also makes it possible to request e.g. the first and the third page without any resources wasted on an unneeded second page.

We implement some simple parsing with logic programming. Regular expressions in Oz don't seem to support lazy quantification which makes parsing the result pages with them difficult. <lang oz>declare

 [HTTPClient] = {Module.link ['x-ozlib://mesaros/net/HTTPClient.ozf']}
 [StringX] = {Module.link ['x-oz://system/String.ozf']}
 [Regex] = {Module.link ['x-oz://contrib/regex']}

 %% Displays page 1 and 3 of the search results.
 %% The user can request and display more with context menu->Actions->Make Needed.
 proc {ExampleUsage}    
    Pages = {YahooSearch "Rosetta code"}
 in
    {Inspector.configure widgetShowStrings true}
    {ForAll {Nth Pages 1} Value.makeNeeded}
    {ForAll {Nth Pages 3} Value.makeNeeded}
    %% Display the infinite list of search result pages.
    {Inspect Pages}
 end

 %% Returns a lazy list of pages.
 %% A page is a lazy list of entries like this: result(url:U title:T content:C).
 fun {YahooSearch Query}
    FetchURL = {CreateURLFetcher}
    
    fun {Page Nr}

StartResult = (Nr-1)*10+1 %% only retrieve it when really needed Doc = {Value.byNeed fun {$} {FetchURL "http://search.yahoo.com/search" ["p"#Query "b"#{Int.toString StartResult}]} end} RE = "<a class=\"yschttl spt\" href="

in

%% Lazily returns results. %% In this way it is possible to build the pages list structure %% without creating the single elements %% (e.g. retrieve page 1 and 3 but not 2). for Match in {Regex.allMatches RE Doc} yield:Yield do Xs = {List.drop Doc Match.0.2} in {Yield {ParseEntry Xs}} end

    end
 in
    for PageNr in 1;PageNr+1 yield:Yield do

{Yield {Page PageNr}}

    end
 end

 fun {CreateURLFetcher}
    Client = {New HTTPClient.cgiGET

init(inPrms(toFile:false toStrm:true) httpReqPrms )}

    %% close when no longer used
    {Finalize.register Client proc {$ C} {C closeAll(true)} end}

    fun {FetchURL Url Params}

OutParams

in

{Client getService(Url Params ?OutParams ?_)} OutParams.sOut

    end
 in
    FetchURL
 end

 %% Xs: String containing HtmL
 %% Result: "result(url:U title:T content:C)" or "parseError"
 fun {ParseEntry Xs}
    proc {Parse Root}

R1 R2 R3 R4 R4 R5 R6 R7 Url = {Fix {QuotedString Xs R1}} {Const ">" R1 R2} Title = {Fix {Until "</a>" R2 R3}}

{Const "" R3 R4}

choice %% "enchanted" result?

{Const "

" R4 R5} {Until "

" R5 R6 _}

[] %% result with links into document

{Const "

" R4 R5} {Until "

" R5 R6 _}

[] %% PDF file

{Const "

" R4 R5} {Until "</a>

" R5 R6 _}

[] %% With Review

{Const "

" R4 R5}

R6 = nil %% no nice abstract when a review is there [] %% normal result R6 = R4 end Abstract = choice

{Const "

" R6 R7} {Fix {Until "

" R7 _}} [] {Const "

" R6 R7} {Fix {Until "

" R7 _}}

[] "" end

in

Root = result(url:Url title:Title content:Abstract)

    end
 in
    {CondSelect {SearchOne Parse} 1 parseError}
 end

 %% Result: contents of Xs until M is found.
 %% Xs = {Append M Yr}
 fun {Until M Xs ?Yr}
    L R
 in
    {List.takeDrop Xs {Length M} L R}
    if L == M then Yr = R nil
    elsecase Xs of X|Xr then X|{Until M Xr Yr}
    [] nil then Yr = nil nil
    end
 end

 %% Asserts that Xs starts with C. Returns the remainder in Ys.
 proc {Const C Xs ?Ys}
    {List.takeDrop Xs {Length C} C Ys}
 end

 %% Assert that a quoted string follows.
 %% Returns the unquoted string and binds Ys to the remainder of Xs.
 fun {QuotedString &"|Xs ?Ys}
    fun {Loop Xs Ys}

case Xs of &\\|&"|Xr then &\\|&"|{Loop Xr Ys} [] &"|Xr then Ys = Xr nil [] X|Xr then X|{Loop Xr Ys} end

    end
 in
    {Loop Xs Ys}
 end

 %% Remove formatting tags.
 fun {Fix Xs}

{Until "</a>"

     {FoldL ["" "" "" "" "..."]
      fun {$ Ys Z}

{StringX.replace Ys Z ""}

      end
      Xs}
     _}
 end

in

 {ExampleUsage}</lang>

Perl

<lang perl>package YahooSearch;

use Encode; use HTTP::Cookies; use WWW::Mechanize;

--- Internals -------------------------------------------------

sub apply (&$)

{my $f = shift; local $_ = shift; $f->(); return $_;}

We construct a cookie to get 100 results per page and prevent
"enhanced results".

my $search_prefs = 'v=1&n=100&sm=' .

   apply {s/([^a-zA-Z0-9])/sprintf '%%%02X', ord $1/ge}
   join '|',
   map {'!' . $_}
   qw(hsb Zq0 XbM sss dDO VFM RQh uZ0 Fxe yCl GP4 FZK yNC mEG niH);

my $cookies = HTTP::Cookies->new; $cookies->set_cookie(0, 'sB', $search_prefs, '/', 'search.yahoo.com');

my $mech = new WWW::Mechanize

  (cookie_jar => $cookies,
   stack_depth => 0);

sub read_page

{my ($next, $page, @results) =
    ($mech->find_link(text => 'Next >')->url,
     decode 'iso-8859-1', $mech->content);
 while ($page =~ m

{

<a \s class="yschttl \s spt" \s href=" ([^"]+) " \s* > #" (.+?) </a> .+?

(.+?)

}xg)

    {push @results, {url => $1, title => $2, content => $3};
     foreach ( @{$results[-1]}{qw(title content)} )
        {s/<.+?>//g;
         $_ = encode 'utf8', $_;}}
 return $next, \@results;}

--- Methods ---------------------------------------------------

sub new

{my $invocant = shift;
 my $class = ref($invocant) || $invocant;
 $mech->get('http://search.yahoo.com/search?p=' . apply
    {s/([^a-zA-Z0-9 ])/sprintf '%%%02X', ord $1/ge;
     s/ /+/g;}
   shift);
 my ($next, $results) = read_page();
 return bless {link_to_next => $next, results => $results}, $class;}

sub results

{@{shift()->{results}};}

sub next_page

{my $invocant = shift;
 my $next = $invocant->{link_to_next};
 unless ($next)
    {$invocant->{results} = [];
     return undef;}
 $mech->get($next);
 ($next, my $results) = read_page();
 $invocant->{link_to_next} = $next;
 $invocant->{results} = $results;
 return 1;}</lang>

PicoLisp

<lang PicoLisp>(load "@lib/http.l")

(de yahoo (Query Page)

  (default Page 1)
  (client "search.yahoo.com" 80
     (pack
        "search?p=" (ht:Fmt Query)
        "&b=" (inc (* 10 (dec Page))) )
     (make
        (while (from "<a class=\"yschttl spt\" href=\"")
           (link
              (make
                 (link (till "\"" T))       # Url
                 (from "")
                 (link (till "<" T))        # Title
                 (from "class=\"abstr\"")
                 (from ">")
                 (link                      # Content
                    (pack
                       (make
                          (loop
                             (link (till "<" T))
                             (T (eof))
                             (T (= "</div" (till ">" T)))
                             (char) ) ) ) ) ) ) ) ) ) )</lang>

Output:

: (more (yahoo "test")) ("http://www.test.com/" "Test" "Offers practice online tests for many ... ("http://www.test.com/aboutus.htm" "Test" "Test.com has a successful ... ("http://en.wikipedia.org/wiki/Test" "Test" "YUI Test is a testing ... ("http://en.wikipedia.org/wiki/F-test" "test " "test n. A procedure for ... ...

Python

<lang python>import urllib import re
def fix(x):

p = re.compile(r'<[^<]*?>') return p.sub(, x).replace('&', '&')

class YahooSearch:

def __init__(self, query, page=1): self.query = query self.page = page self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) self.content = urllib.urlopen(self.url).read() def getresults(self): self.results = []
for i in re.findall('<a class="yschttl spt" href=".+?">(.+?)</a>

(.+?).*?(.+?)', self.content):

title = fix(i[0]) content = fix(i[1]) url = fix(i[2]) self.results.append(YahooResult(title, content, url)) return self.results def getnextpage(self): return YahooSearch(self.query, self.page+1) search_results = property(fget=getresults) nextpage = property(fget=getnextpage)
class YahooResult:
def __init__(self,title,content,url): self.title = title self.content = content self.url = url
Usage:
x = YahooSearch("test")
for result in x.search_results:
print result.title</lang>
R
Library: RCurl
Library: XML
Rather than using regexes to find the content (like some of the other solutions here) this method parses the HTML and finds the appropriate sections. <lang R>YahooSearch <- function(query, page=1, .opts=list(), ignoreMarkUpErrors=TRUE) {
if(!require(RCurl) || !require(XML)) { stop("Could not load required packages") } # Replace " " with "%20", etc query <- curlEscape(query) # Retrieve page b <- 10*(page-1)+1 theurl <- paste("http://uk.search.yahoo.com/search?p=", query, "&b=", b, sep="") webpage <- getURL(theurl, .opts=.opts) # Save search for nextpage function .Search <- list(query=query, page=page, .opts=.opts, ignoreMarkUpErrors=ignoreMarkUpErrors) assign(".Search", .Search, envir=globalenv()) # Parse HTML; retrieve results block webpage <- readLines(tc <- textConnection(webpage)); close(tc) if(ignoreMarkUpErrors) { pagetree <- htmlTreeParse(webpage, error=function(...){}) } else { pagetree <- htmlTreeParse(webpage) } findbyattr <- function(x, id, type="id") { ids <- sapply(x, function(x) x$attributes[type]) x[ids==id] } body <- pagetree$children$html$children$body bd <- findbyattr(body$children$div$children, "bd") left <- findbyattr(bd$div$children$div$children, "left") web <- findbyattr(left$div$children$div$children, "web") resol <- web$div$children$ol #Get url, title, content from results gettextfromnode <- function(x) { un <- unlist(x$children) paste(un[grep("value", names(un))], collapse=" ") } n <- length(resol) results <- list() length(results) <- n for(i in 1:n) { mainlink <- resoli$children$div$children[1]$div$children$h3$children$a url <- mainlink$attributes["href"] title <- gettextfromnode(mainlink) contenttext <- findbyattr(resoli$children$div$children[2], "abstr", type="class") if(length(contenttext)==0) { contenttext <- findbyattr(resoli$children$div$children[2]$div$children$div$children, "sm-abs", type="class") } content <- gettextfromnode(contenttext$div) resultsi <- list(url=url, title=title, content=content) } names(results) <- as.character(seq(b, b+n-1)) results
}
nextpage <- function() {
if(exists(".Search", envir=globalenv())) { .Search <- get(".Search", envir=globalenv()) .Search$page <- .Search$page + 1L do.call(YahooSearch, .Search) } else { message("No search has been performed yet") }
}
Usage
YahooSearch("rosetta code") nextpage()</lang>
Ruby

This example is in need of improvement:

Should not call URI.escape, because it fails to encode = signs and some other characters that might appear in the term. See URL encoding#Ruby.

Uses

Library: RubyGems
Library: Hpricot

to parse the HTML. Someone more skillful than I at XPath or CSS could tighten up the parse_html method.

<lang ruby>require 'open-uri' require 'hpricot'
SearchResult = Struct.new(:url, :title, :content)
class SearchYahoo
@@urlinfo = [nil, 'ca.search.yahoo.com', 80, '/search', nil, nil]
def initialize(term) @term = term @page = 1 @results = nil @url = URI::HTTP.build(@@urlinfo) end
def next_result if not @results @results = [] fetch_results elsif @results.empty? next_page end @results.shift end
def fetch_results @url.query = URI.escape("p=%s&b=%d" % [@term, @page]) doc = open(@url) { |f| Hpricot(f) } parse_html(doc) end
def next_page @page += 10 fetch_results end
def parse_html(doc) doc.search("div#main").search("div").each do |div| next unless div.has_attribute?("class") and div.get_attribute("class").index("res") == 0 result = SearchResult.new div.search("a").each do |link| next unless link.has_attribute?("class") and link.get_attribute("class") == "yschttl spt" result.url = link.get_attribute("href") result.title = link.inner_text end div.search("div").each do |abstract| next unless abstract.has_attribute?("class") and abstract.get_attribute("class").index("abstr") result.content = abstract.inner_text end @results << result end end
end
s = SearchYahoo.new("test") 15.times do |i|
result = s.next_result puts i+1 puts result.title puts result.url puts result.content puts
end</lang>
Tcl
Translation of: Python
<lang tcl>package require http
proc fix s {
string map {... "" "" "" "" "" ""} \

[regsub "</a>.*" $s ""]

} proc YahooSearch {term {page 1}} {
# Build the (ugly) scraper URL

append re {<a class="yschttl spt" href=".+?" >(.+?)</a>} append re {

(.+?)} append re {

(.+?)}

# Perform the query; note that this handles special characters # in the query term correctly set q [http::formatQuery p $term b [expr {$page*10-9}]] set token [http::geturl http://search.yahoo.com/search?$q] set data [http::data $token] http::cleanup $token
# Assemble the results into a nice list set results {} foreach {- title content url} [regexp -all -inline $re $data] { lappend results [fix $title] [fix $content] [fix $url] }
# set up the call for the next page interp alias {} Nextpage {} YahooSearch $term [incr page]
return $results
}
Usage: get the first two pages of results
foreach {title content url} [YahooSearch "test"] {
puts $title
} foreach {title content url} [Nextpage] {
puts $title
}</lang>
Works with: Tcl version 8.6
With Tcl 8.6, more options are available for managing the global state, through objects and coroutines. First, an object-based solution that takes the basic YahooSearch functionality and dresses it up to be more Tcl-like: <lang tcl>package require Tcl 8.6
oo::class create WebSearcher {
variable page term results constructor searchTerm { set page 0 set term $searchTerm my nextPage } # This next method *is* a very Tcl-ish way of doing iteration. method for {titleVar contentsVar urlVar body} { upvar 1 $titleVar t $contentsVar c $urlVar v foreach {t c v} $results { uplevel 1 $body } } # Reuse the previous code for simplicity rather than writing it anew # Of course, if we were serious about this, we'd put the code here properly method nextPage {} { set results [YahooSearch $term [incr page]] return }
}
How to use. Note the 'foreach' method use below; new "keywords" as methods!
set ytest [WebSearcher new "test"] $ytest for title - url {
puts "\"$title\" : $url"
} $ytest nextPage $ytest for title - url {
puts "\"$title\" : $url"
} $ytest delete ;# standard method that deletes the object</lang> However, the paradigm of an iterator is also interesting and is more appropriately supported through a coroutine. This version conceals the fact that the service produces output in pages; care should be taken with it because it can produce rather a lot of network traffic... <lang tcl>package require Tcl 8.6
proc yahoo! term {
coroutine yahoo![incr ::yahoo] apply {term { yield [info coroutine] while 1 { set results [YahooSearch $term [incr step]] if {[llength $results] == 0} { return -code break } foreach {t c u} $results { yield [dict create title $t content $c url $u] } } }} $term
}
test by getting first fifty titles...
set it [yahoo! "test"] for {set i 50} {$i>0} {incr i -1} {
puts [dict get [$it] title] after 300 ;# Slow the code down... :-)
}</lang>
Another approach: uses a class as specified in the task. Also, uses an html parser from tcllib (parsing html with regular expressions is a particular annoyance of mine).
Works with: Tcl version 8.6
Library: Tcllib (Package: htmlparse)
Library: Tcllib (Package: textutil::adjust)
<lang tcl>package require Tcl 8.6 package require http package require htmlparse package require textutil::adjust
oo::class create yahoosearch {
method search {s} { my variable searchterm page baseurl set searchterm $s set page 1 set baseurl {http://ca.search.yahoo.com/search} }
method getresults {} { my variable state results current_data set results [list] set current_data [dict create] set state looking_for_results htmlparse::parse -cmd [list [self] html_parser_callback] [my gethtml] }
method nextpage {} { my variable page incr page 10 my getresults } method nextresult {} { my variable results page if { ! [info exists results]} { my getresults } elseif {[llength $results] == 0} { my nextpage } set results [lassign $results result] return $result }
method gethtml {} { my variable searchterm page baseurl set url [format {%s?%s} $baseurl [::http::formatQuery p $searchterm b $page]] set response [http::geturl $url] set html [http::data $response] http::cleanup $response return $html }
method html_parser_callback {tag slash param textBehindTheTag} { my variable state results current_data switch -exact -- $state { looking_for_results { if {$tag eq "div" && [string first {id="main"} $param] != -1} { set state ready } } ready { if {($tag eq "div" && [string first {class="res} $param] != -1) || ($tag eq "html" && $slash eq "/") } { #" -- unbalanced quote disturbs syntax highlighting if {[dict size $current_data] > 0} {lappend results $current_data} set current_data [dict create] set state getting_url } } getting_url { if {$tag eq "a" && [string match "*yschttl spt*" $param]} { if {[regexp {href="(.+?)"} $param - url]} { dict set current_data url $url } else { dict set current_data url "no href in tag params: '$param'" } dict set current_data title $textBehindTheTag set state getting_title } } getting_title { if {$tag eq "a" && $slash eq "/"} { set state looking_for_abstract } else { dict append current_data title $textBehindTheTag } } looking_for_abstract { if {$tag eq "span" && [string first {class="url} $param] != -1} { set state ready } elseif {$tag eq "div" && [string first {class="abstr} $param] != -1} { dict set current_data abstract $textBehindTheTag set state getting_abstract } } getting_abstract { if {$tag eq "div" && $slash eq "/"} { set state ready } else { dict append current_data abstract $textBehindTheTag } } } }
}
yahoosearch create searcher searcher search "search text here"
for {set x 1} {$x <= 15} {incr x} {
set result [searcher nextresult] dict with result { puts $title puts $url puts [textutil::adjust::indent [textutil::adjust::adjust $abstract] " "] puts "" }
}</lang>
TXR
The following gives us a shell utility which we can invoke with arguments like "rosetta 0" to get the first page of search results for "rosetta".
The two arguments are handled as if they were two lines of text from a data source using @(next :args). We throw an exception if there is no match (insufficient arguments are supplied). The @(cases) directive has strictly ordered evaluation, so the throw in the second branch does not happen if the first branch has a successful pattern match. If the similar @(maybe) or @(some) directives were used, this wouldn't work.
A little sprinkling of regex is used.
<lang txr>#!/usr/bin/txr -f @(next :args) @(cases) @ QUERY @ PAGE @(or) @ (throw error "specify query and page# (from zero)") @(end) @(next `!wget -O - http://search.yahoo.com/search?p=@QUERY\&b=@{PAGE}1 2> /dev/null`) @(all) @ (coll)<a class="yschttl spt" href="@URL" @/[^>]+/>@TITLE</a>@(end) @(and)

@ (coll)

@ABSTR

@(end)

@(end) @(output) @ (repeat) TITLE: @TITLE URL: @URL TEXT: @ABSTR --- @ (end) @(end) </lang>
Sample run:
$ ./yahoosearch.txr rosetta 0 TITLE: Rosetta | Partner With Our Interactive Marketing Agency Today URL: http://www.rosetta.com/Pages/default.aspx TEXT: Learn about the fastest growing interactive marketing agency in the country - Rosetta. Our strategic marketing planning is custom built and connects you with your ... --- TITLE: Official Rosetta StoneÂ® - Learn a Language Online - Language ... URL: http://www.rosettastone.com/ TEXT: Rosetta Stone is the world's #1 language-learning software. Our comprehensive foreign language program provides language learning for individuals and language learning ... --- TITLE: Rosetta (software) - Wikipedia, the free encyclopedia URL: http://en.wikipedia.org/wiki/Rosetta_(software) TEXT: Rosettais a lightweight dynamic translatorfor Mac OS Xdistributed by Apple. It enabled applications compiled for the PowerPCfamily of processors to run on Apple systems that use... --- TITLE: Rosetta (spacecraft) - Wikipedia, the free encyclopedia URL: http://en.wikipedia.org/wiki/Rosetta_space_probe TEXT: Rosettais a robotic spacecraftof the European Space Agencyon a mission to study the comet 67P/ChuryumovâGerasimenko. Rosetta consists of two main elements: the Rosetta space probeand... --- TITLE: Apple - Mac URL: http://www.apple.com/mac/ TEXT: Discover the world of Mac. Check out MacBook, iMac, iLife, and more. Download QuickTime, Safari, and widgets for free. --- TITLE: Rosetta | Free Music, Tour Dates, Photos, Videos URL: http://www.myspace.com/rosetta TEXT: Rosetta's official profile including the latest music, albums, songs, music videos and more updates. --- TITLE: Rosetta URL: http://rosettaband.com/ TEXT: Metal for astronauts. Philadelphia, since 2003. Contact us at rosettaband@gmail.com Twitter | Facebook --- TITLE: Rosetta URL: http://rosetta.jpl.nasa.gov/ TEXT: The Rosetta spacecraft is on its way to catch and land a robot on a comet! Rosetta will reach comet '67P/Churyumov-Gerasimenko' ('C-G') in 2014. The European Space Agency ... --- TITLE: Rosetta : Multi-script Typography URL: http://rosettatype.com/ TEXT: Rosetta is a new independent foundry with a strong focus on multi-script typography. We are committed to promote research and knowledge in that area and to support ... --- TITLE: Rosetta (1999) - IMDb URL: http://www.imdb.com/title/tt0200071/ TEXT: With Ãmilie Dequenne, Fabrizio Rongione, Anne Yernaux, Olivier Gourmet. Young and impulsive Rosetta lives with her alcoholic mother and, moved by despair, she will ... ---

Yahoo! search interface

AutoHotkey

C#

GUISS

Java

Oz

Perl

PicoLisp

Python

R

Ruby

Tcl

TXR