Rosetta Code/Count examples: Difference between revisions

From Rosetta Code
Content added Content deleted
m (bugfix)
m (→‎{{header|Tcl}}: link to tcllib library)
Line 281: Line 281:


=={{header|Tcl}}==
=={{header|Tcl}}==
Using the <code>json</code> package from [http://tcllib.sf.net tcllib]
Using the <code>json</code> package from {{libheader|tcllib}}
<lang tcl>package require Tcl 8.5
<lang tcl>package require Tcl 8.5
package require http
package require http

Revision as of 16:19, 15 May 2009

Task
Rosetta Code/Count examples
You are encouraged to solve this task according to the task description, using any language you may know.

Find the total number of programming examples for each task and the total for all tasks.

Essentially, count the number of occurrences of =={{header| on each task page.

Output:

<lang>100 doors: 20 examples. 99 Bottles of Beer: 29 examples. Abstract type: 10 examples.

Total: X examples.</lang>

C#

Object-oriented solution.

<lang csharp>using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using System.Net;

class Task {

   private string _task;
   private int _examples;
   public Task(string task, int examples) {
       _task = task;
       _examples = examples;
   }
   public string Name {
       get { return _task; }
   }
   public int Examples {
       get { return _examples; }
   }
   public override string ToString() {
       return String.Format("{0}: {1} examples.", this._task, this._examples);
   }

}

class Program {

   static List<string> GetTitlesFromCategory(string category, WebClient wc) {
       string content = wc.DownloadString(
           String.Format("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:{0}&cmlimit=500&format=json", category)
       );
       return new Regex("\"title\":\"(.+?)\"").Matches(content).Cast<Match>().Select(x => x.Groups[1].Value.Replace("\\/", "/")).ToList();
   }
   static string GetSourceCodeFromPage(string page, WebClient wc) {
       return wc.DownloadString(
           String.Format("http://www.rosettacode.org/w/index.php?title={0}&action=raw", page)
       );
   }
   static void Main(string[] args) {
       WebClient wc = new WebClient();
       List<Task> tasks = new List<Task>();
       List<string> tasknames = GetTitlesFromCategory("Programming_Tasks", wc);
       foreach (string task in tasknames) {
           string content = GetSourceCodeFromPage(task, wc);
           int count = new Regex("=={{header", RegexOptions.IgnoreCase).Matches(content).Count;
           Task t = new Task(task, count);
           Console.WriteLine(t);
           tasks.Add(t);
       }
       Console.WriteLine("\nTotal: {0} examples.", tasks.Select(x => x.Examples).Sum());
   }

}</lang>

D

Works with: Tango

<lang D> import tango.io.Stdout; import tango.net.http.HttpClient; import tango.net.http.HttpHeaders; import tango.text.xml.Document; import tango.text.Util;

alias HttpHeader.ContentLength CL;

auto url = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml"; void main() {

   auto client = new HttpClient (HttpClient.Get, url);
   client.open();
   char[] mainData, tmp;
   int total, i;
   void cat(void[] content) { tmp ~= cast(char[]) content; }
   if (client.isResponseOK) {
       client.read(&cat, client.getResponseHeaders.getInt(CL));
       mainData = tmp;
       tmp = null;
       auto doc = new Document!(char);
       doc.parse(mainData);
       foreach (n; doc.query.descendant("cm").attribute("title")) {
           auto subClient = new HttpClient(HttpClient.Get, 
                   "http://www.rosettacode.org/w/index.php?title=" ~
                   replace(n.value.dup, ' ', '_') ~ "&action=raw");
           subClient.open();
           if (! subClient.isResponseOK) {
               Stderr (client.getResponse);
                break;
           }
           subClient.read(&cat, subClient.getResponseHeaders.getInt(CL));
           foreach (segment; patterns(cast(char[])tmp, "=={{header|")) i++;
           --i;
           if (i) --i;
           Stdout.formatln ("{0,-40} - {}", n.value, i);
           total += i;
           tmp = null;
           i = 0;
       }
       Stdout("total examples: ", total).newline;
   } else {
       Stderr (client.getResponse);
   }

} </lang>

Java

Works with: Java version 1.5+

<lang java5> import java.util.ArrayList; import java.util.Iterator; import ScreenScrape;

public class CountProgramExamples { private static final String baseURL = "http://rosettacode.org/wiki/"; private static final String rootURL = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml"; private static final String taskBegin = "title=\""; private static final String taskEnd = "\""; private static final String exmplBegin = ""; private static final String exmplEnd = ""; private static final String editBegin = "";

/** * @param args */ public static void main(String[] args) { // Setup variables int exTotal = 0; int exSubTot = 0; String title = ""; String taskPage = ""; int startPos = 0; String countStr = ""; try { // Get root query results ArrayList<String> tasks = new ArrayList<String>(); ScreenScrape ss = new ScreenScrape(); String rootPage = ss.read(rootURL); while(rootPage.contains(taskBegin)){ rootPage = rootPage.substring(rootPage.indexOf(taskBegin)+taskBegin.length()); title = rootPage.substring(0, rootPage.indexOf(taskEnd)); if (!title.contains("Category:")) { tasks.add(title); } rootPage = rootPage.substring(rootPage.indexOf(taskEnd)); } // Loop through each task and print count Iterator<String> itr = tasks.iterator(); while(itr.hasNext()) { title = itr.next().replaceAll("'","'"); taskPage = ss.read(baseURL+title.replaceAll(" ", "_")); if (taskPage.contains(exmplBegin)) { startPos = taskPage.lastIndexOf(exmplBegin)+exmplBegin.length(); countStr = taskPage.substring(startPos, taskPage.indexOf(exmplEnd, startPos)); exSubTot = Integer.parseInt(countStr.contains(".") ? countStr.substring(0,countStr.indexOf(".")) : countStr); }else{ exSubTot = 0; while(taskPage.contains(editBegin)) { taskPage = taskPage.substring(taskPage.indexOf(editBegin)+editBegin.length()); exSubTot++; } } exTotal += exSubTot; System.out.println(title+": "+exSubTot+" examples."); } // Print total System.out.println("\nTotal: "+exTotal+" examples."); }catch(Exception e){ System.out.println(title); System.out.println(startPos+":"+taskPage.indexOf(exmplEnd, startPos)); System.out.println(taskPage); e.printStackTrace(System.out); } } } </lang>

This is the ScreenScrape class imported in the above class.

Perl

<lang Perl>

  1. !/usr/bin/perl -w

use strict ; use LWP::UserAgent ; use HTML::Parser ; use constant DOCROOT => "http://www.rosettacode.org/wiki" ; use constant SOLUTIONROOT => "http://www.rosettacode.org/w/index.php?title=" ; my %tasklist = ( ) ; #key: last part of solution list URL, value: title of solution my $ua = new LWP::UserAgent ; my $url = DOCROOT . "/Category:Programming_Tasks" ; my $request = HTTP::Request->new( 'GET' => "$url" ) ; my $response = $ua->request( $request ) ; my $counted = 0 ; my $total_examples = 0 ; my $solresponse ; my $p = HTML::Parser->new( api_version => 3 ) ; #parser for list of tasks my $q = HTML::Parser->new( api_version => 3 ) ; #parser for solutions by task $p->handler( start => \&process , "tagname , attr" ) ; $q->handler( text => \&langfinder, "text" ) ;

if ( $response->is_success( ) ) {

  $p->parse( $response->content( ) ) ; 
  foreach my $task( keys %tasklist ) { 
     $request->uri( SOLUTIONROOT . "$task" . "&action=edit" ) ; 
     $solresponse = $ua->request( $request ) ;
     if ( $solresponse->is_success( )) {
        $q->parse( $solresponse->content( ) ) ;
        if ( $tasklist{$task} ) {
            print "$tasklist{$task} : $counted examples!\n" ;
        }
        $counted = 0 ;
        $q->eof( ) ;
     }
     else {
        print "Error: " . $solresponse->code( ) . " " . $solresponse->message( ) . "\n" ;
     }
  }
  $p->eof( ) ;
  print "\nTotal: $total_examples examples.\n" ;

} else {

  print "Error " . $response->code( )  . " " . $response->message( ) . "\n" ;

} sub process( ) {

  return if shift ne "a" ;
  my $props = shift ;
  if ( $props->{href} && $props->{href} =~ m,/wiki/([^:]+), ) {
     if ( $1 !~ /Category/ ) {
        $tasklist{ $1 } = $props->{title} ;
     }
  }

} sub langfinder( ) {

  my $text = shift ;
  while ( $text =~ /header\|.+\}/g ) {
     $counted++ ;
     $total_examples++ ;
  }

} </lang>

Python

<lang python>import urllib, xml.dom.minidom

x = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml")

tasks = [] for i in xml.dom.minidom.parseString(x.read()).getElementsByTagName("cm"):

   t = i.getAttribute('title').replace(" ", "_")
   y = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=%s&action=raw" % t)
   tasks.append( y.read().lower().count("{{header|") )
   print t.replace("_", " ") + ": %d examples." % tasks[-1]

print "\nTotal: %d examples." % sum(tasks)</lang>

Tcl

Using the json package from

Library: tcllib

<lang tcl>package require Tcl 8.5 package require http package require json

fconfigure stdout -buffering none

proc get_tasks {category} {

   set start [clock milliseconds]
   puts -nonewline "getting $category members..."
   set base_url http://www.rosettacode.org/w/api.php
   set query {action query list categorymembers cmtitle Category:%s format json cmlimit 500}
   set this_query [dict create {*}[split [format $query $category]]]
   set tasks [list]
   while {1} {
       set url [join [list $base_url [http::formatQuery {*}$this_query]] ?]
       set response [http::geturl $url]
       if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
           error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
       }
       set data [json::json2dict [http::data $response]]
       http::cleanup $response
       
       # add tasks to list
       foreach task [dict get $data query categorymembers] {
           lappend tasks [dict get [dict create {*}$task] title]
       }
       
       if {[catch {dict get $data query-continue categorymembers cmcontinue} continue_task] != 0} {
           # no more continuations, we're done
           break
       }
       dict set this_query cmcontinue $continue_task
   }
   puts " found [llength $tasks] tasks in [expr {[clock milliseconds] - $start}] milliseconds"
   return $tasks

}

  1. This proc can be replaced by a single regexp command:
  2. set count [regexp -all "***=$needle" $haystack]
  3. However this proc is more efficient -- we're dealing with plain strings only.

proc count_substrings {needle haystack} {

   set count 0
   set idx 0
   while {[set idx [string first $needle $haystack $idx]] != -1} {
       incr count
       incr idx
   }
   return $count

}

set total 0 foreach task [get_tasks Programming_Tasks] {

   set url [format "http://www.rosettacode.org/w/index.php?title=%s&action=raw" [string map {{ } _} $task]]
   set response [http::geturl $url]
   if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
       error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
   }
   set count [count_substrings "\{\{header|" [http::data $response]]
   puts [format "%3d examples in %s" $count $task]
   http::cleanup $response
   incr total $count

}

puts "\nTotal: $total examples"</lang>