User:ImplSearchBot/Code: Difference between revisions

m
ImplSearchBot:0
m (ImplSearchBot:0)
m (ImplSearchBot:0)
Line 4:
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
 
my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]";
 
my %options;
Line 19 ⟶ 17:
my $cacheonly; # Don't query the wiki for data. Just pull from cache.
my $nosvn;
 
my $result = GetOptions(
my $opt_matrix = {
"wiki=s" => \$wiki,
"username=s" => \$username,
Line 27 ⟶ 26:
"cacheonly" => \$cacheonly,
"nosvn" => \$nosvn,
"cachepath=s" => \$cachepath) };
 
my $result = GetOptions( %$opt_matrix );
$options{'wiki'} = $wiki;
 
Line 52 ⟶ 53:
if defined $cacheonly;
 
my $usage = "Usage: $0 (options)\n The available options are:\n";
$usage .= "\t--$_\n"
foreach (keys %$opt_matrix);
 
$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n";
# This could be reversed as a qualified statement, but I don't know
 
# if that it'd be any less ugly.
 
unless( exists $options{'cacheonly'})
my $wikineeded;
 
$wikineeded = "yes"
unless ( exists $options{'cacheonly'} );
 
$wikineeded = "yes"
if ( exists $options{'post'} );
 
if("yes" eq $wikineeded)
{
ifunless( exists $options{'postusername'} and exists $options{'password'})
{
die $usage;
unless(exists $options{'username'} and exists $options{'password'})
{
use Data::Dumper;
print Dumper(%options);
die $usage;
}
}
}
}
 
 
 
Line 96 ⟶ 106:
# Get a complete listing of the languages.
&out("Getting the languages.\n", 3);
my $alllanguages = &getcategory('Category:Programming Languages');
 
# We want the language name, not the fully-qualified wiki name.
Line 119 ⟶ 129:
&processimplediff(\%impldiff);
 
 
# To add here: Post stats on activities:
# 2. Time last run started
 
my $runtime = time() - $starttime;
Line 149 ⟶ 156:
&out("Done\n", 3);
 
 
# END OF PROGRAM EXECUTION
exit(0);
 
#---------------------
# These are all the functions that break up our work
# into logical chunks.
#---------------------
 
# Builds a simple hash ref associating a page name with body.
# Used to help us prepare our postings in one pass, then commit them
# in a second pass.
sub build_posting
{
Line 159 ⟶ 176:
}
 
 
# Prepares the template body for the unimplemented data.
sub prep_unimp_posting
{
Line 167 ⟶ 186:
foreach my $taskname (@$alltasks)
{
# WeIf wantit's a category task, the task name, notwill thebe fully-qualifiedslightly wiki namedifferent.
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
my $implpage = $taskname;
$implpage =~ s/^Category:(.*)/$1\/$language/;
 
my $link;
if($taskname eq $baretaskname)
{
$link = "[[$taskname]]";
}
else
{
$link = "[[:$taskname|$baretaskname]]";
}
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list.
$unimplisting .= "* [[$baretaskname]]\n"
unless(exists $impldiff->{$language}->{'impl'}->{$tasknameimplpage} or exists $impldiff->{$language}->{'omit'}->{$tasknameimplpage});
{
$unimplisting .= "* $link\n";
}
 
}
Line 181 ⟶ 215:
}
 
# Prepares the template body for the listings of omitted data for a page.
sub prep_omit_posting
{
Line 205 ⟶ 240:
}
 
# Prepares the page body that folks look at to find out what changed for a language.
sub prep_listing_posting
{
Line 239 ⟶ 275:
}
 
# Prepare the body of the omit category.
sub prep_omit_cat_posting
{
Line 246 ⟶ 283:
}
 
# Prepare the page title for the page folks look at to see what change what.
sub get_listing_name
{
Line 252 ⟶ 290:
}
 
# Prepare the template name for the omit listing body
sub get_omit_template_name
{
Line 258 ⟶ 297:
}
 
# Prepare the template name for the unimplimended listing body
sub get_unimpl_template_name
{
Line 264 ⟶ 304:
}
 
# Return the name of the category to find the omitted pages for a particular language.
sub get_omit_cat_name
{
Line 270 ⟶ 311:
}
 
# Prepare all the pages associated with a specific language.
sub process_language
{
Line 292 ⟶ 334:
}
 
# Prepare and post all of the core pages for this run.
sub processimplediff
{
my $implediff = shift;
my %work;
 
# Prepare all our work.
foreach my $language (keys %impldiff)
{
Line 303 ⟶ 348:
}
 
# Now that we've prepared all our work, commit it.
foreach my $pagename (keys %work)
{
Line 309 ⟶ 355:
}
 
# Return the Mediawiki editor obect.
sub geteditor
{
Line 343 ⟶ 390:
}
 
# Simple logging infrastructure. Current sends to STDWARN or STDOUT, which cron
# emails to the user, which gets forwarded to Short Circuit. (Anyone want a copy?)
sub out
{
Line 363 ⟶ 412:
}
 
# Many popular filesystems can't andle : and \ in filenames.
# Since I plan to open the SVN repo we save to the rest of the world at some point,
# I'm trying to make sure the files are representable.
sub sanitizenamefs
{
Line 370 ⟶ 422:
}
 
# Find all the entries that are in the second list ref, but not the first.
sub diffcat_simple
{
Line 387 ⟶ 440:
}
 
# Find all the entries that are in one listref, but not the other.
sub diffcat
{
Line 396 ⟶ 450:
}
 
# Post a page (or save it to disk, if we're testing.)
sub postpage
{
Line 433 ⟶ 488:
# last ran, so that we can learn to avoid doing unnecessary work.
# (Saves on server resources and bloated edit statistics...)
# Also, Ithe plandata ongets publishingsaved theto an SVN cacherepo, filesso inthat amultiple versionbots controlcan
# use the history. We're using JSON, as JSON has broader
# system like SVN or Git, to allow multiple bots
# cross-language support than Data::Dumper, making it easier for others
# to share the data and to provide history. We're using JSON
# to use the data.
# Instead of Perl's native Data::Dumper as JSON has broader
# cross-language support, making it easier for others to use the data.
# SVN has the advantage that I already know how to use it.
# Git has the advantage in that I can use GitHub and not tax
# my Slice with Git traffic, and I don't have to punch a hole
# in the firewall to access more services.
sub cachedata
{
Line 465 ⟶ 515:
}
 
# Return data we cached previously.
sub getcacheddata
{
Line 473 ⟶ 524:
unless (open $infile, '<', $filename)
{
&out("Failed to load cached data $filename: $!\n", 15);
return [];
}
Line 488 ⟶ 539:
}
 
# Report the changes between two categories.
sub getcategory
# More interesting than "x added, y removed"
sub reportcatchanges
{
my $categorynamecategory = shift;
my $old = shift;
my $new = shift;
 
my ($removed, $added) = &diffcat($old, $new);
# Return the cache data if we're not supposed to query the database.
return &getcacheddata($categoryname)
if( exists $options{'cacheonly'} );
 
my $out = "Removed from $category:\n";
 
my $page;
 
foreach $page (@$removed)
{
$out .= "$page\n";
}
 
$out .= "Added to $category:\n";
foreach $page (@$added)
{
$out .= "$page\n";
}
 
&out($out, 2);
}
 
# Pull the category data, or cached data if we're not pulling from the wiki.
sub getcategory
{
my $categoryname = shift;
&out("Getting category contents for $categoryname...", 4);
my @categorycontents = $editor->get_pages_in_category($categoryname);
&out(scalar @categorycontents . " members retrieved for $categoryname\n", 5);
 
my $old;
my ($removed, $added) = &diffcat(&getcacheddata($categoryname), \@categorycontents);
my $new;
 
if( exists $options{'cacheonly'} )
&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", &getloglevelfromdiff(2, $removed, $added));
{
# Return the cache data if we're not supposed to query the database.
$old = [];
$new = &getcacheddata($categoryname);
}
else
{
$old = &getcacheddata($categoryname);
$new = [$editor->get_pages_in_category($categoryname)];
++$categorypulls;
}
 
&reportcatchanges($categoryname, $old, $new);
++$categorypulls;
 
&cachedata("$categoryname", \@categorycontents$new);
 
&out(scalar @$new . " members returned for $categoryname\n", 5);
return \@categorycontents;
return $new;
}
 
# Find if this category changed, report its contents if it has.
sub getwork
{
Line 542 ⟶ 629:
}
 
# If changes occurred, the info is more important than if they didn't.
sub getloglevelfromdiff
{
Line 552 ⟶ 640:
}
 
# Find all the work items for a given language.
sub getlangwork
{
Line 583 ⟶ 672:
}
 
# Commit the cache.
sub commitcache
{
Line 615 ⟶ 705:
}
 
 
# Wrap svn commands so we can log them.
sub svn
{