Rosetta Code/Count examples: Difference between revisions

Content added Content deleted

Inline

Revision as of 11:44, 16 December 2009

Find the total number of programming examples for each task and the total for all tasks.

Essentially, count the number of occurrences of =={{header| on each task page.

Output:

<lang>100 doors: 20 examples. 99 Bottles of Beer: 29 examples. Abstract type: 10 examples.

Total: X examples.</lang>

J

Solution:
Using getHTTP from Web Scraping and utilities from Find unimplemented tasks. <lang j> getAllTaskSolnCounts=: monad define

 tasks=.  getTasks 'Programming_Tasks'
 counts=. getTaskSolnCounts &> tasks
 tasks;counts

)

getTaskSolnCounts=: monad define

 makeuri=. 'http://www.rosettacode.org/w/index.php?title=' , ,&'&action=raw'
 wikidata=. getHTTP makeuri urlencode y
 ([: +/ '{{header|'&E.) wikidata

)

formatSolnCounts=: monad define

 'tasks counts'=. y
 tasks=. tasks , &.>':'
 res=. ;:^:_1 tasks ,. (8!:0 counts) ,. <'examples.'
 res , 'Total examples: ' , ": +/counts

) </lang>

Example Usage: <lang j>

  formatSolnCounts getAllTaskSolnCounts

100 doors: 46 examples. 99 Bottles of Beer: 62 examples. Abstract type: 17 examples. Ackermann Function: 52 examples. ... </lang>

Java

Works with: Java version 1.5+

<lang java5> import java.util.ArrayList; import java.util.Iterator; import ScreenScrape;

public class CountProgramExamples { private static final String baseURL = "http://rosettacode.org/wiki/"; private static final String rootURL = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml"; private static final String taskBegin = "title=\""; private static final String taskEnd = "\""; private static final String exmplBegin = ""; private static final String exmplEnd = ""; private static final String editBegin = "";

/** * @param args */ public static void main(String[] args) { // Setup variables int exTotal = 0; int exSubTot = 0; String title = ""; String taskPage = ""; int startPos = 0; String countStr = ""; try { // Get root query results ArrayList<String> tasks = new ArrayList<String>(); ScreenScrape ss = new ScreenScrape(); String rootPage = ss.read(rootURL); while(rootPage.contains(taskBegin)){ rootPage = rootPage.substring(rootPage.indexOf(taskBegin)+taskBegin.length()); title = rootPage.substring(0, rootPage.indexOf(taskEnd)); if (!title.contains("Category:")) { tasks.add(title); } rootPage = rootPage.substring(rootPage.indexOf(taskEnd)); } // Loop through each task and print count Iterator<String> itr = tasks.iterator(); while(itr.hasNext()) { title = itr.next().replaceAll("'","'"); taskPage = ss.read(baseURL+title.replaceAll(" ", "_")); if (taskPage.contains(exmplBegin)) { startPos = taskPage.lastIndexOf(exmplBegin)+exmplBegin.length(); countStr = taskPage.substring(startPos, taskPage.indexOf(exmplEnd, startPos)); exSubTot = Integer.parseInt(countStr.contains(".") ? countStr.substring(0,countStr.indexOf(".")) : countStr); }else{ exSubTot = 0; while(taskPage.contains(editBegin)) { taskPage = taskPage.substring(taskPage.indexOf(editBegin)+editBegin.length()); exSubTot++; } } exTotal += exSubTot; System.out.println(title+": "+exSubTot+" examples."); } // Print total System.out.println("\nTotal: "+exTotal+" examples."); }catch(Exception e){ System.out.println(title); System.out.println(startPos+":"+taskPage.indexOf(exmplEnd, startPos)); System.out.println(taskPage); e.printStackTrace(System.out); } } } </lang>

This is the ScreenScrape class imported in the above class.

OCaml

Library: ocamlnet

Library: xml-light

execute with:

ocaml str.cma unix.cma  -I +pcre pcre.cma  -I +netsys netsys.cma  -I +equeue equeue.cma \
  -I +netstring netstring.cma  -I +netclient netclient.cma  -I +xml-light xml-light.cma  countex.ml

<lang ocaml>let repl_quote s =

 let reg = Str.regexp_string "'" in
 (Str.global_replace reg "%27" s)

let repl_space s =

 let s = String.copy s in
 for i = 0 to pred(String.length s) do
   if s.[i] = ' ' then s.[i] <- '_'
 done;
 (s)

let count_ex s =

 let pat = Str.regexp_string "=={{header|" in
 let rec aux n p =
   try
     let p = Str.search_forward pat s p in
     aux (n+1) (p+1)
   with Not_found -> (n)
 in
 aux 0 0

let () =

 let url = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&\
              cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml" in

 let str = Http_client.Convenience.http_get url in
 let xml = Xml.parse_string str in

 let total = ref 0 in
 at_exit (fun () -> Printf.printf "\n Total: %d\n" !total);

 let f = function
 | Xml.Element ("cm", attrs, _) ->
     let _title = List.assoc "title" attrs in
     let title = repl_space _title in
     let title = repl_quote title in
     let url = "http://www.rosettacode.org/w/index.php?title="^ title ^"&action=raw" in
     let str = Http_client.Convenience.http_get url in
     let n = count_ex str in
     Printf.printf "%s: %d\n%!" _title n;
     total := n + !total;
 | _ -> ()
 in

 match xml with
 | Xml.Element ("api", [],
    [Xml.Element ("query", [],
      [Xml.Element ("categorymembers", [], cms)])]) -> List.iter f cms
 | _ -> ()</lang>

Perl

!/usr/bin/perl -w

use strict ; use LWP::UserAgent ; use HTML::Parser ; use constant DOCROOT => "http://www.rosettacode.org/wiki" ; use constant SOLUTIONROOT => "http://www.rosettacode.org/w/index.php?title=" ; my %tasklist = ( ) ; #key: last part of solution list URL, value: title of solution my $ua = new LWP::UserAgent ; my $url = DOCROOT . "/Category:Programming_Tasks" ; my $request = HTTP::Request->new( 'GET' => "$url" ) ; my $response = $ua->request( $request ) ; my $counted = 0 ; my $total_examples = 0 ; my $solresponse ; my $p = HTML::Parser->new( api_version => 3 ) ; #parser for list of tasks my $q = HTML::Parser->new( api_version => 3 ) ; #parser for solutions by task $p->handler( start => \&process , "tagname , attr" ) ; $q->handler( text => \&langfinder, "text" ) ;

if ( $response->is_success( ) ) {

  $p->parse( $response->content( ) ) ; 
  foreach my $task( keys %tasklist ) { 
     $request->uri( SOLUTIONROOT . "$task" . "&action=edit" ) ; 
     $solresponse = $ua->request( $request ) ;
     if ( $solresponse->is_success( )) {
        $q->parse( $solresponse->content( ) ) ;
        if ( $tasklist{$task} ) {
            print "$tasklist{$task} : $counted examples!\n" ;
        }
        $counted = 0 ;
        $q->eof( ) ;
     }
     else {
        print "Error: " . $solresponse->code( ) . " " . $solresponse->message( ) . "\n" ;
     }
  }
  $p->eof( ) ;
  print "\nTotal: $total_examples examples.\n" ;

} else {

  print "Error " . $response->code( )  . " " . $response->message( ) . "\n" ;

} sub process( ) {

  return if shift ne "a" ;
  my $props = shift ;
  if ( $props->{href} && $props->{href} =~ m,/wiki/([^:]+), ) {
     if ( $1 !~ /Category/ ) {
        $tasklist{ $1 } = $props->{title} ;
     }
  }

} sub langfinder( ) {

  my $text = shift ;
  while ( $text =~ /header\|.+\}/g ) {
     $counted++ ;
     $total_examples++ ;
  }

} </lang>

Python

<lang python>import urllib, xml.dom.minidom

x = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml")

tasks = [] for i in xml.dom.minidom.parseString(x.read()).getElementsByTagName("cm"):

   t = i.getAttribute('title').replace(" ", "_")
   y = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=%s&action=raw" % t)
   tasks.append( y.read().lower().count("{{header|") )
   print t.replace("_", " ") + ": %d examples." % tasks[-1]

print "\nTotal: %d examples." % sum(tasks)</lang>

R

Library: XML (R)

Library: RCurl

<lang R> library(XML) library(RCurl) doc <- xmlInternalTreeParse("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml") nodes <- getNodeSet(doc,"//cm") titles = as.character( sapply(nodes, xmlGetAttr, "title") ) headers <- list() counts <- list() for (i in 1:length(titles)){ headersi <- getURL( paste("http://rosettacode.org/mw/index.php?title=", gsub(" ", "_", titles[i]), "&action=raw", sep="") ) countsi <- strsplit(headersi,split=" ")1 countsi <- grep("\\{\\{header", countsi) cat(titles[i], ":", length(countsi), "examples\n") } cat("Total: ", length(unlist(counts)), "examples\n") </lang>

Ruby

Library: REXML

First, a RosettaCode module, saved as rosettacode.rb: <lang ruby>require 'open-uri' require 'rexml/document'

module RosettaCode

 def self.rc_url(page, query)
   url = "http://www.rosettacode.org/w/%s?%s" % [
     URI.escape(page),
     URI.escape(query.map {|k,v| "%s=%s" % [k,v]}.join("&"))
   ]
   url.gsub(/\+/, '%2B')
 end

 def self.rc_tasks(category)
   query = {
     "action" => "query",
     "list" => "categorymembers",
     "cmtitle" => "Category:#{category}",
     "format" => "xml",
     "cmlimit" => 500,
   }
   while true
     url = rc_url "api.php", query
     doc = REXML::Document.new open(url)

     REXML::XPath.each(doc, "//cm") do |task|
       yield task.attribute("title").value
     end

     continue = REXML::XPath.first(doc, "//query-continue")
     break if continue.nil?
     cm = REXML::XPath.first(continue, "categorymembers")
     query["cmcontinue"] = cm.attribute("cmcontinue").value
   end
 end

end</lang>

Then, we implement the task with: <lang ruby>require 'rosettacode'

total_examples = 0

RosettaCode.rc_tasks("Programming_Tasks") do |task|

 url = RosettaCode.rc_url("index.php", {"action" => "raw", "title" => task})
 examples = open(url).read.scan("=={{header").length
 puts "#{task}: #{examples}"
 total_examples += examples

end

puts puts "Total: #{total_examples}"</lang>

Scala

This was writen for Scala 2.8, but Scala 2.7 can be used with slight modifications to the IO library.

Different than the example for other languages, it parallelizes the reading and counting, and it also encode the URL, because some URLs are now causing problems. These modifications are minor, though.

It was written in script style.

<lang scala>import java.net.{URL, URLEncoder} import scala.io.Source.fromURL

val allTasksURL = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml" val allTasks = xml.parsing.XhtmlParser(fromURL(new URL(allTasksURL)))

val regexExpr = "(?i)==\\{\\{header\\|".r def oneTaskURL(title: String) = "http://www.rosettacode.org/w/index.php?title=%s&action=raw" format URLEncoder.encode(title.replace(' ', '_'), "UTF-8") def count(title: String) = regexExpr findAllIn fromURL(new URL(oneTaskURL(title)))(io.Codec.UTF8).mkString length

val counts = for (task <- allTasks \\ "cm" \\ "@title" map (_.text)) yield scala.actors.Futures.future((task, count(task)))

counts map (_.apply) map Function.tupled("%s: %d examples." format (_, _)) foreach println println("\nTotal: %d examples." format (counts map (_.apply._2) sum)) </lang>

Tcl

Using the json package from

Library: tcllib

<lang tcl>package require Tcl 8.5 package require http package require json

fconfigure stdout -buffering none

proc get_tasks {category} {

   set start [clock milliseconds]
   puts -nonewline "getting $category members..."
   set base_url http://www.rosettacode.org/w/api.php
   set query {action query list categorymembers cmtitle Category:%s format json cmlimit 500}
   set this_query [dict create {*}[split [format $query $category]]]
   set tasks [list]

   while {1} {
       set url [join [list $base_url [http::formatQuery {*}$this_query]] ?]
       set response [http::geturl $url]
       if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
           error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
       }
       set data [json::json2dict [http::data $response]]
       http::cleanup $response
       
       # add tasks to list
       foreach task [dict get $data query categorymembers] {
           lappend tasks [dict get [dict create {*}$task] title]
       }
       
       if {[catch {dict get $data query-continue categorymembers cmcontinue} continue_task] != 0} {
           # no more continuations, we're done
           break
       }
       dict set this_query cmcontinue $continue_task
   }
   puts " found [llength $tasks] tasks in [expr {[clock milliseconds] - $start}] milliseconds"
   return $tasks

}

This proc can be replaced by a single regexp command:
set count [regexp -all "***=$needle" $haystack]
However this proc is more efficient -- we're dealing with plain strings only.

proc count_substrings {needle haystack} {

   set count 0
   set idx 0
   while {[set idx [string first $needle $haystack $idx]] != -1} {
       incr count
       incr idx
   }
   return $count

}

set total 0 foreach task [get_tasks Programming_Tasks] {

   set url [format "http://www.rosettacode.org/w/index.php?title=%s&action=raw" [string map {{ } _} $task]]
   set response [http::geturl $url]
   if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
       error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
   }
   set count [count_substrings "\{\{header|" [http::data $response]]
   puts [format "%3d examples in %s" $count $task]
   http::cleanup $response
   incr total $count

}

puts "\nTotal: $total examples"</lang>