Rosetta Code/List authors of task descriptions: Difference between revisions
Rosetta Code/List authors of task descriptions (view source)
Revision as of 23:57, 10 October 2017
, 6 years ago→{{header|Perl 6}}: Refactor to make more modular/resumable. Adjust table spacing, now shows date task added and author links
Thundergnat (talk | contribs) m (update full list) |
Thundergnat (talk | contribs) (→{{header|Perl 6}}: Refactor to make more modular/resumable. Adjust table spacing, now shows date task added and author links) |
||
Line 272:
{{works with|Rakudo|2017.08}}
The pseudocode above is no longer really useful as the page format has changed significantly sine ths task was written. Rather than checking '''every''' edit to see if it was a change to the task description, we'll just assume the user that created the page is the task author. This isn't 100% accurate; a very few pages got renamed and recreated by someone other than the original author without preserving the history, so they are misreported (15 Puzzle Game for instance,) but is as good as it is likely to get without extensive manual intervention.
The task names and author information are saved to local files so it can pick up where it left off if it gets interrupted during processing. As the task creation time (and original editor) never change, don't bother to re-download every time. Just update the category (Draft or Task) as that is the only thing that really changes. If a task name gets edited, manual intervention is required. Either edit the JSON file with the task information or just delete it and recreate it from scratch.
<lang perl6>use HTTP::UserAgent;
use Gumbo;
use Sort::Naturally;
use JSON::Fast;
my $ua = HTTP::UserAgent.new;
Line 284 ⟶ 285:
for 'Programming_Tasks', 'Draft_Programming_Tasks' -> $category
{ # Get lists of Tasks & Draft Tasks
#
my $page = "http://rosettacode.org/wiki/Category:$category";
my $html
my $xmldoc = parse-html($html, :TAG<div>, :id<mw-pages>);
my @tasks = parse-html($xmldoc[0].Str, :TAG<li>).Str.comb( /'/wiki/' <-["]>+ / )
my $f = open("./RC_{$category}.txt", :w) or die "$!\n";
note "Writing $category file...";
$f.print( @tasks.join("\n") );
$f.close;
}
my %cat = ( # Friendlier descriptions for task categories
'Draft_Programming_Tasks' => 'Draft:'
# Month names for date manipulations
my %months = <January February March April May June July August
September October November December> Z=> ^12;
my $hashfile = './RC_hash.json';
my $htmlfile = './RC_Authors.html';
note "Reading JSON hash file...";
my %tasks = $hashfile.IO.e ?? $hashfile.IO.slurp.&from-json !! ( );
for 'Programming_Tasks', 'Draft_Programming_Tasks' -> $category
{ # Scrape info from each page.
#
my @entries = "./RC_{$category}.txt".IO.slurp.lines;
for @
# Update the category as that is the only thing that can really change.
%tasks{$title}{'category'} = %cat{$category};
# Otherwise skip if it has already been indexed. The creation date can't change
# the task name *can* change, but it is exceedinly rare
if %tasks{$title}{'title'}:exists {
}▼
# Get the earliest edit
my $
▲ $html.content ~~ m|'<li><span class="mw-history-histlinks">' (.+?) '</ul>' |;
# Only interested in the oldest (last in the list)
my $line = $0.lines.tail;
# Parse out the User name
$line ~~ m| 'title="User:' <-[>]>+? '>' (.+?) '</a>' |;
▲ my $auth = $0;
# Oops, no user name, must be anonymous, get IP address instead
unless $auth {
$line ~~ m| '"mw-userlink mw-anonuserlink">' (.+?) '</a>' |;
$auth = $0.Str;
}
%tasks{$title}{'author'} = $auth;
# Parse out human readable title
$line ~~ m| '<a href="/mw/index.php?title=' $title '&' .+? 'title="'(<-["]>+)'"' |; #"'
%tasks{$title}{'title'} = $0.Str;
# Parse out date task was added, convert date to ISO format
▲ my $decoded = $0;
$line ~~ m| 'class="mw-changeslist-date">' <-[\s]>+ (<-[<]>+) '</a>' |;
%tasks{$title}{'date'} = $0.Str.trim.&toISO8601;
# report progress
# save
$hashfile.IO.spurt(%tasks.&to-json);
▲ $f.close;
sleep 3; # Don't pound the server
Line 337 ⟶ 364:
}
#
note "Building HTML table...";
my
my $taskcnt = +%tasks.grep: *.value.<category> eq %cat<Programming_Tasks>;
$cnt++;▼
▲ $cat = 'Task: ';
▲ }
▲};
# Dump an HTML table to a file
my $out = open(
# Add table boilerplate and header
$out.say( '<table border="1" cellpadding="4"><tr><th colspan="2">As of ', Date.today, ' | Total: ',
for %authors.sort(*.key.&naturally) -> $a {▼
# Get sorted unique list of task authors
$out.print( '<tr><td>', $a.key, '</td><td><ol><li>' );▼
$out.
# Get list of tasks by this author, sorted by name
for %tasks.grep( { $_.value.<author> eq $author } ).sort(*.key.&naturally) -> $task {
# and add them
$out.print( "<li>{$task.value.<date>} - {$task.value.<category>}",
" [[{$task.key}|{$task.value.<title>}]]</li>"
}
$out.say( '</table>' );
$out.close;
say "HTML file saved as: {$htmlfile.IO.absolute}";
sub toISO8601 ($date) { # convert day month year to YYYY-MM-DD
my @dmy = $date.split: ' ';
sprintf "%4d-%02d-%02d", @dmy[2].Int, %months{@dmy[1]}, @dmy[0].Int;
}
</lang>
;Sample output
<table border="1" cellpadding="4"><tr><th colspan="2">As of 2017-
<tr><td><ul>[[User:2Powers|2Powers]]</ul></td><td><ul><ol><li>2013-04-16 - Draft: [[Names_to_numbers|Names to numbers]]</li><li>2013-04-16 - Draft: [[Solving_coin_problems|Solving coin problems]]</li></ol></ul></td></tr>
<tr><td><ul>[[User:12.175.32.19|12.175.32.19]]</ul></td><td><ul><ol><li>2009-10-12 - Task: [[Soundex|Soundex]]</li></ol></ul></td></tr>
<tr><td><ul>[[User:12Me21|12Me21]]</ul></td><td><ul><ol><li>2015-04-04 - Task: [[Draw_a_rotating_cube|Draw a rotating cube]]</li></ol></ul></td></tr>
<tr><td colspan='2'><br/> Many rows omitted... <br/></td></tr>
<tr><td><ul>[[User:Zorro1024|Zorro1024]]</ul></td><td><ul><ol><li>2015-03-16 - Task: [[Perfect_shuffle|Perfect shuffle]]</li><li>2015-02-21 - Draft: [[Vector|Vector]]</li></ol></ul></td></tr>
<tr><td><ul>[[User:Zzo38|Zzo38]]</ul></td><td><ul><ol><li>2015-08-20 - Task: [[Thue-Morse|Thue-Morse]]</li></ol></ul></td></tr>
<tr><td><ul>[[User:Русский|Русский]]</ul></td><td><ul><ol><li>2012-07-31 - Task: [[Main_step_of_GOST_28147-89|Main step of GOST 28147-89]]</li><li>2013-00-09 - Draft: [[Old_Russian_measure_of_length|Old Russian measure of length]]</li><li>2013-04-24 - Draft: [[Transportation_problem|Transportation problem]]</li></ol></ul></td></tr>
</table>
|