Rosetta Code/Count examples

From Rosetta Code
Task
Rosetta Code/Count examples
You are encouraged to solve this task according to the task description, using any language you may know.

Find the total number of programming examples for each task and the total for all tasks.

Essentially, count the number of occurrences of =={{header| on each task page.

Output:

<lang>100 doors: 20 examples. 99 Bottles of Beer: 29 examples. Abstract type: 10 examples.

Total: X examples.</lang>

AutoHotkey

<lang AutoHotkey>UrlDownloadToFile

 , http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml
 , tasks.xml

FileRead, tasks, tasks.xml pos = 0 quote = "  ; " regtitle := "<cm.*?title=" . quote . "(.*?)" . quote While, pos := RegExMatch(tasks, regtitle, title, pos + 1) {

 UrlDownloadToFile
   , % "http://www.rosettacode.org/w/index.php?title=" . title1 . "&action=raw"
   , task.xml
 FileRead, task, task.xml
 RegExReplace(task, "\{\{header\|", "", count)
 current :=  title1 . ": " . count . " examples.`n"
 output .= current
 TrayTip, current, % current

} MsgBox % output Return</lang>

C#

Object-oriented solution.

<lang csharp>using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using System.Net;

class Task {

   private string _task;
   private int _examples;
   public Task(string task, int examples) {
       _task = task;
       _examples = examples;
   }
   public string Name {
       get { return _task; }
   }
   public int Examples {
       get { return _examples; }
   }
   public override string ToString() {
       return String.Format("{0}: {1} examples.", this._task, this._examples);
   }

}

class Program {

   static List<string> GetTitlesFromCategory(string category, WebClient wc) {
       string content = wc.DownloadString(
           String.Format("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:{0}&cmlimit=500&format=json", category)
       );
       return new Regex("\"title\":\"(.+?)\"").Matches(content).Cast<Match>().Select(x => x.Groups[1].Value.Replace("\\/", "/")).ToList();
   }
   static string GetSourceCodeFromPage(string page, WebClient wc) {
       return wc.DownloadString(
           String.Format("http://www.rosettacode.org/w/index.php?title={0}&action=raw", page)
       );
   }
   static void Main(string[] args) {
       WebClient wc = new WebClient();
       List<Task> tasks = new List<Task>();
       List<string> tasknames = GetTitlesFromCategory("Programming_Tasks", wc);
       foreach (string task in tasknames) {
           string content = GetSourceCodeFromPage(task, wc);
           int count = new Regex("=={{header", RegexOptions.IgnoreCase).Matches(content).Count;
           Task t = new Task(task, count);
           Console.WriteLine(t);
           tasks.Add(t);
       }
       Console.WriteLine("\nTotal: {0} examples.", tasks.Select(x => x.Examples).Sum());
   }

}</lang>

Clojure

<lang clojure>(ns count-examples

 (:use [clojure.contrib.http.agent :only (http-agent string)]
       [clojure.contrib.json :only (read-json)]
       [clojure.contrib.string :only (join)]))

(defn rosettacode-get [path params]

 (let [param-string (join "&"
                          (map (fn n v
                                 (str (if (keyword? n) (name n) n)
                                      "="
                                      (java.net.URLEncoder/encode (str v) "utf-8")))
                               params))]
   (string (http-agent (format "http://www.rosettacode.org/w/%s?%s" path param-string)))))

(defn rosettacode-query [params]

 (read-json (rosettacode-get "api.php" (merge {:action "query" :format "json"} params))))

(defn list-cm

 ([params] (list-cm params nil))
 ([params continue]
    (let [cm-params (merge {:list "categorymembers"} params (if continue continue {}))
          result (rosettacode-query cm-params)]
      (concat (-> result (:query) (:categorymembers))
              (if-let [cmcontinue (-> result (:query-continue) (:categorymembers))]
                (list-cm params cmcontinue))))))

(defn programming-tasks []

 (let [result (list-cm {:cmtitle "Category:Programming_Tasks" :cmlimit 50})]
   (map #(:title %) result)))

(defn task-count [task]

 [task (count
        (re-seq #"==\{\{header"
                (rosettacode-get "index.php" {:action "raw" :title task})))])

(defn print-result []

 (let [task-counts (map task-count (programming-tasks))]
   (doseq [[task count] task-counts]
     (println (str task ":") count)
     (flush))
   (println "Total: " (reduce #(+ %1 (second %2)) 0 task-counts))))

</lang> <lang clojure>count-examples> (print-result) 100 doors: 73 24 game: 18 24 game/Solve: 14 99 Bottles of Beer: 89 Abstract type: 27 Accumulator factory: 23 Ackermann function: 73 Active Directory/Connect: 4 Active Directory/Search for a user: 3 Active object: 14 Add a variable to a class instance at runtime: 21 Address of a variable: 20 ... Total: 11216 nil </lang>

D

Works with: Tango

<lang D> import tango.io.Stdout; import tango.net.http.HttpClient; import tango.net.http.HttpHeaders; import tango.text.xml.Document; import tango.text.Util;

alias HttpHeader.ContentLength CL;

auto url = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml"; void main() {

   auto client = new HttpClient (HttpClient.Get, url);
   client.open();
   char[] mainData, tmp;
   int total, i;
   void cat(void[] content) { tmp ~= cast(char[]) content; }
   if (client.isResponseOK) {
       client.read(&cat, client.getResponseHeaders.getInt(CL));
       mainData = tmp;
       tmp = null;
       auto doc = new Document!(char);
       doc.parse(mainData);
       foreach (n; doc.query.descendant("cm").attribute("title")) {
           auto subClient = new HttpClient(HttpClient.Get, 
                   "http://www.rosettacode.org/w/index.php?title=" ~
                   replace(n.value.dup, ' ', '_') ~ "&action=raw");
           subClient.open();
           if (! subClient.isResponseOK) {
               Stderr (client.getResponse);
                break;
           }
           subClient.read(&cat, subClient.getResponseHeaders.getInt(CL));
           foreach (segment; patterns(cast(char[])tmp, "=={{header|")) i++;
           --i;
           if (i) --i;
           Stdout.formatln ("{0,-40} - {}", n.value, i);
           total += i;
           tmp = null;
           i = 0;
       }
       Stdout("total examples: ", total).newline;
   } else {
       Stderr (client.getResponse);
   }

} </lang>

F#

Using asynchronous workflows to perform downloads concurrently:

<lang fsharp>#r "System.Xml.Linq.dll"

let uri1 = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml" let uri2 task = sprintf "http://www.rosettacode.org/w/index.php?title=%s&action=raw" task

[|for xml in (System.Xml.Linq.XDocument.Load uri1).Root.Descendants() do

   for attrib in xml.Attributes() do
     if attrib.Name.LocalName = "title" then
       yield async {
         let uri = uri2 (attrib.Value.Replace(" ", "_") |> System.Web.HttpUtility.UrlEncode)
         use client = new System.Net.WebClient()
         let! html = client.AsyncDownloadString(System.Uri uri)
         let sols' = html.Split([|"{{header|"|], System.StringSplitOptions.None).Length - 1
         lock stdout (fun () -> printfn "%s: %d examples" attrib.Value sols')
         return sols' }|]

|> Async.Parallel |> Async.RunSynchronously |> fun xs -> printfn "Total: %d examples" (Seq.sum xs)</lang>

This is 21× faster than the python thanks to the concurrency.

Haskell

Library: HTTP XML

from HackageDB

<lang haskell>import Network.Browser import Network.HTTP import Network.URI import Data.List import Data.Maybe import Text.XML.Light import Control.Arrow

justifyR w = foldl ((.return).(++).tail) (replicate w ' ') showFormatted t n = t ++ ": " ++ justifyR 4 (show n)

getRespons url = do

   rsp <- Network.Browser.browse $ do
     setAllowRedirects True
     setOutHandler $ const (return ())     -- quiet
     request $ getRequest url
   return $ rspBody $ snd rsp

getNumbOfExampels p = do

 let pg = intercalate "_" $ words p
 rsp <- getRespons $ "http://www.rosettacode.org/w/index.php?title=" ++ pg ++ "&action=raw"
 let taskPage = rsp
     countEx = length $ filter (=="==[[:Category:{{{1}}}|{{{1}}}]] [[Category:{{{1}}}]] Property "Implemented in language" (as page type) with input value "{{{1}}}" contains invalid characters or is incomplete and therefore can cause unexpected results during a query or annotation process. IsTitle}
     fun {$ A} {Atom.toString A.value} end}
 end

in

 {Main}</lang>

Example output:

100 doors: 59 examples.
24 game: 14 examples.
...
Yuletide Holiday: 33 examples.
Zig Zag: 31 examples.
Total: 9849 examples.

Perl

<lang Perl>

  1. !/usr/bin/perl -w

use strict ; use LWP::UserAgent ; use HTML::Parser ; use constant DOCROOT => "http://www.rosettacode.org/wiki" ; use constant SOLUTIONROOT => "http://www.rosettacode.org/w/index.php?title=" ; my %tasklist = ( ) ; #key: last part of solution list URL, value: title of solution my $ua = new LWP::UserAgent ; my $url = DOCROOT . "/Category:Programming_Tasks" ; my $request = HTTP::Request->new( 'GET' => "$url" ) ; my $response = $ua->request( $request ) ; my $counted = 0 ; my $total_examples = 0 ; my $solresponse ; my $p = HTML::Parser->new( api_version => 3 ) ; #parser for list of tasks my $q = HTML::Parser->new( api_version => 3 ) ; #parser for solutions by task $p->handler( start => \&process , "tagname , attr" ) ; $q->handler( text => \&langfinder, "text" ) ;

if ( $response->is_success( ) ) {

  $p->parse( $response->content( ) ) ; 
  foreach my $task( keys %tasklist ) { 
     $request->uri( SOLUTIONROOT . "$task" . "&action=edit" ) ; 
     $solresponse = $ua->request( $request ) ;
     if ( $solresponse->is_success( )) {
        $q->parse( $solresponse->content( ) ) ;
        if ( $tasklist{$task} ) {
            print "$tasklist{$task} : $counted examples!\n" ;
        }
        $counted = 0 ;
        $q->eof( ) ;
     }
     else {
        print "Error: " . $solresponse->code( ) . " " . $solresponse->message( ) . "\n" ;
     }
  }
  $p->eof( ) ;
  print "\nTotal: $total_examples examples.\n" ;

} else {

  print "Error " . $response->code( )  . " " . $response->message( ) . "\n" ;

} sub process( ) {

  return if shift ne "a" ;
  my $props = shift ;
  if ( $props->{href} && $props->{href} =~ m,/wiki/([^:]+), ) {
     if ( $1 !~ /Category/ ) {
        $tasklist{ $1 } = $props->{title} ;
     }
  }

} sub langfinder( ) {

  my $text = shift ;
  while ( $text =~ /header\|.+\}/g ) {
     $counted++ ;
     $total_examples++ ;
  }

} </lang>

Python

<lang python>import urllib, xml.dom.minidom

x = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml")

tasks = [] for i in xml.dom.minidom.parseString(x.read()).getElementsByTagName("cm"):

   t = i.getAttribute('title').replace(" ", "_")
   y = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=%s&action=raw" % t.encode('utf-8'))
   tasks.append( y.read().lower().count("{{header|") )
   print t.replace("_", " ") + ": %d examples." % tasks[-1]

print "\nTotal: %d examples." % sum(tasks)</lang>

R

Library: XML (R)
Library: RCurl

<lang R> library(XML) library(RCurl) doc <- xmlInternalTreeParse("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml") nodes <- getNodeSet(doc,"//cm") titles = as.character( sapply(nodes, xmlGetAttr, "title") ) headers <- list() counts <- list() for (i in 1:length(titles)){ headersi <- getURL( paste("http://rosettacode.org/mw/index.php?title=", gsub(" ", "_", titles[i]), "&action=raw", sep="") ) countsi <- strsplit(headersi,split=" ")1 countsi <- grep("\\{\\{header", countsi) cat(titles[i], ":", length(countsi), "examples\n") } cat("Total: ", length(unlist(counts)), "examples\n") </lang>

Ruby

Library: REXML

First, a RosettaCode module, saved as rosettacode.rb: <lang ruby>require 'open-uri' require 'rexml/document'

module RosettaCode

 URL_ROOT = "http://rosettacode.org/mw"
 def self.get_url(page, query)
   url = "#{URL_ROOT}/%s?%s" % [
     URI.escape(page),
     URI.escape(query.map {|k,v| "%s=%s" % [k,v]}.join("&"))
   ]
   url.gsub!(/\+/, '%2B')
   p url if $DEBUG
   url
 end
 def self.get_api_url(query)
   get_url "api.php", query
 end
 def self.category_members(category)
   query = {
     "action" => "query",
     "list" => "categorymembers",
     "cmtitle" => "Category:#{category}",
     "format" => "xml",
     "cmlimit" => 500,
   }
   while true
     url = get_api_url query
     doc = REXML::Document.new open(url)
     REXML::XPath.each(doc, "//cm") do |task|
       yield task.attribute("title").value
     end
     continue = REXML::XPath.first(doc, "//query-continue")
     break if continue.nil?
     cm = REXML::XPath.first(continue, "categorymembers")
     query["cmcontinue"] = cm.attribute("cmcontinue").value
   end
 end

end</lang>

Then, we implement the task with: <lang ruby>require 'rosettacode'

total_examples = 0

RosettaCode.category_members("Programming_Tasks") do |task|

 url = RosettaCode.get_url("index.php", {"action" => "raw", "title" => task})
 examples = open(url).read.scan("=={{header").length
 puts "#{task}: #{examples}"
 total_examples += examples

end

puts puts "Total: #{total_examples}"</lang>

Scala

This was writen for Scala 2.8, but Scala 2.7 can be used with slight modifications to the IO library.

Different than the example for other languages, it parallelizes the reading and counting, and it also encode the URL, because some URLs are now causing problems. These modifications are minor, though.

It was written in script style.

<lang scala>import java.net.{URL, URLEncoder} import scala.io.Source.fromURL

val allTasksURL = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml" val allTasks = xml.parsing.XhtmlParser(fromURL(new URL(allTasksURL)))

val regexExpr = "(?i)==\\{\\{header\\|".r def oneTaskURL(title: String) = "http://www.rosettacode.org/w/index.php?title=%s&action=raw" format URLEncoder.encode(title.replace(' ', '_'), "UTF-8") def count(title: String) = regexExpr findAllIn fromURL(new URL(oneTaskURL(title)))(io.Codec.UTF8).mkString length

val counts = for (task <- allTasks \\ "cm" \\ "@title" map (_.text)) yield scala.actors.Futures.future((task, count(task)))

counts map (_.apply) map Function.tupled("%s: %d examples." format (_, _)) foreach println println("\nTotal: %d examples." format (counts map (_.apply._2) sum)) </lang>

Tcl

Using the json package from

Library: tcllib

<lang tcl>package require Tcl 8.5 package require http package require json

fconfigure stdout -buffering none

proc get_tasks {category} {

   set start [clock milliseconds]
   puts -nonewline "getting $category members..."
   set base_url http://www.rosettacode.org/w/api.php
   set query {action query list categorymembers cmtitle Category:%s format json cmlimit 500}
   set this_query [dict create {*}[split [format $query $category]]]
   set tasks [list]
   while {1} {
       set url [join [list $base_url [http::formatQuery {*}$this_query]] ?]
       set response [http::geturl $url]
       if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
           error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
       }
       set data [json::json2dict [http::data $response]]
       http::cleanup $response
       
       # add tasks to list
       foreach task [dict get $data query categorymembers] {
           lappend tasks [dict get [dict create {*}$task] title]
       }
       
       if {[catch {dict get $data query-continue categorymembers cmcontinue} continue_task] != 0} {
           # no more continuations, we're done
           break
       }
       dict set this_query cmcontinue $continue_task
   }
   puts " found [llength $tasks] tasks in [expr {[clock milliseconds] - $start}] milliseconds"
   return $tasks

}

  1. This proc can be replaced by a single regexp command:
  2. set count [regexp -all "***=$needle" $haystack]
  3. However this proc is more efficient -- we're dealing with plain strings only.

proc count_substrings {needle haystack} {

   set count 0
   set idx 0
   while {[set idx [string first $needle $haystack $idx]] != -1} {
       incr count
       incr idx
   }
   return $count

}

set total 0 foreach task [get_tasks Programming_Tasks] {

   set url [format "http://www.rosettacode.org/w/index.php?title=%s&action=raw" [string map {{ } _} $task]]
   set response [http::geturl $url]
   if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
       error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
   }
   set count [count_substrings "\{\{header|" [http::data $response]]
   puts [format "%3d examples in %s" $count $task]
   http::cleanup $response
   incr total $count

}

puts "\nTotal: $total examples"</lang>