Anonymous user
User:ImplSearchBot/Code: Difference between revisions
m
ImplSearchBot:0
m (ImplSearchBot:0) |
m (ImplSearchBot:0) |
||
Line 4:
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
my %options;
Line 19 ⟶ 17:
my $cacheonly; # Don't query the wiki for data. Just pull from cache.
my $nosvn;
my $opt_matrix = {
"wiki=s" => \$wiki,
"username=s" => \$username,
Line 27 ⟶ 26:
"cacheonly" => \$cacheonly,
"nosvn" => \$nosvn,
"cachepath=s" => \$cachepath
my $result = GetOptions( %$opt_matrix );
$options{'wiki'} = $wiki;
Line 52 ⟶ 53:
if defined $cacheonly;
my $usage = "Usage: $0 (options)\n The available options are:\n";
$usage .= "\t--$_\n"
foreach (keys %$opt_matrix);
$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n";
my $wikineeded;
$wikineeded = "yes"
unless ( exists $options{'cacheonly'} );
$wikineeded = "yes"
if ( exists $options{'post'} );
if("yes" eq $wikineeded)
{
{
die $usage;
}
}
}
Line 96 ⟶ 106:
# Get a complete listing of the languages.
&out("Getting the languages.\n", 3);
my $alllanguages
# We want the language name, not the fully-qualified wiki name.
Line 119 ⟶ 129:
&processimplediff(\%impldiff);
my $runtime = time() - $starttime;
Line 149 ⟶ 156:
&out("Done\n", 3);
# END OF PROGRAM EXECUTION
exit(0);
#---------------------
# These are all the functions that break up our work
# into logical chunks.
#---------------------
# Builds a simple hash ref associating a page name with body.
# Used to help us prepare our postings in one pass, then commit them
# in a second pass.
sub build_posting
{
Line 159 ⟶ 176:
}
# Prepares the template body for the unimplemented data.
sub prep_unimp_posting
{
Line 167 ⟶ 186:
foreach my $taskname (@$alltasks)
{
#
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
my $implpage = $taskname;
$implpage =~ s/^Category:(.*)/$1\/$language/;
my $link;
if($taskname eq $baretaskname)
{
$link = "[[$taskname]]";
}
else
{
$link = "[[:$taskname|$baretaskname]]";
}
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list.
{
$unimplisting .= "* $link\n";
}
}
Line 181 ⟶ 215:
}
# Prepares the template body for the listings of omitted data for a page.
sub prep_omit_posting
{
Line 205 ⟶ 240:
}
# Prepares the page body that folks look at to find out what changed for a language.
sub prep_listing_posting
{
Line 239 ⟶ 275:
}
# Prepare the body of the omit category.
sub prep_omit_cat_posting
{
Line 246 ⟶ 283:
}
# Prepare the page title for the page folks look at to see what change what.
sub get_listing_name
{
Line 252 ⟶ 290:
}
# Prepare the template name for the omit listing body
sub get_omit_template_name
{
Line 258 ⟶ 297:
}
# Prepare the template name for the unimplimended listing body
sub get_unimpl_template_name
{
Line 264 ⟶ 304:
}
# Return the name of the category to find the omitted pages for a particular language.
sub get_omit_cat_name
{
Line 270 ⟶ 311:
}
# Prepare all the pages associated with a specific language.
sub process_language
{
Line 292 ⟶ 334:
}
# Prepare and post all of the core pages for this run.
sub processimplediff
{
my $implediff = shift;
my %work;
# Prepare all our work.
foreach my $language (keys %impldiff)
{
Line 303 ⟶ 348:
}
# Now that we've prepared all our work, commit it.
foreach my $pagename (keys %work)
{
Line 309 ⟶ 355:
}
# Return the Mediawiki editor obect.
sub geteditor
{
Line 343 ⟶ 390:
}
# Simple logging infrastructure. Current sends to STDWARN or STDOUT, which cron
# emails to the user, which gets forwarded to Short Circuit. (Anyone want a copy?)
sub out
{
Line 363 ⟶ 412:
}
# Many popular filesystems can't andle : and \ in filenames.
# Since I plan to open the SVN repo we save to the rest of the world at some point,
# I'm trying to make sure the files are representable.
sub sanitizenamefs
{
Line 370 ⟶ 422:
}
# Find all the entries that are in the second list ref, but not the first.
sub diffcat_simple
{
Line 387 ⟶ 440:
}
# Find all the entries that are in one listref, but not the other.
sub diffcat
{
Line 396 ⟶ 450:
}
# Post a page (or save it to disk, if we're testing.)
sub postpage
{
Line 433 ⟶ 488:
# last ran, so that we can learn to avoid doing unnecessary work.
# (Saves on server resources and bloated edit statistics...)
# Also,
# use the history. We're using JSON, as JSON has broader
# cross-language support than Data::Dumper, making it easier for others
# to use the data.
sub cachedata
{
Line 465 ⟶ 515:
}
# Return data we cached previously.
sub getcacheddata
{
Line 473 ⟶ 524:
unless (open $infile, '<', $filename)
{
&out("Failed to load cached data $filename: $!\n",
return [];
}
Line 488 ⟶ 539:
}
# Report the changes between two categories.
# More interesting than "x added, y removed"
sub reportcatchanges
{
my $
my $old = shift;
my $new = shift;
my ($removed, $added) = &diffcat($old, $new);
my $out = "Removed from $category:\n";
my $page;
foreach $page (@$removed)
{
$out .= "$page\n";
}
$out .= "Added to $category:\n";
foreach $page (@$added)
{
$out .= "$page\n";
}
&out($out, 2);
}
# Pull the category data, or cached data if we're not pulling from the wiki.
sub getcategory
{
my $categoryname = shift;
&out("Getting category contents for $categoryname...", 4);
my $old;
my $new;
if( exists $options{'cacheonly'} )
{
# Return the cache data if we're not supposed to query the database.
$old = [];
$new = &getcacheddata($categoryname);
}
else
{
$old = &getcacheddata($categoryname);
$new = [$editor->get_pages_in_category($categoryname)];
++$categorypulls;
}
&reportcatchanges($categoryname, $old, $new);
&cachedata("$categoryname",
&out(scalar @$new . " members returned for $categoryname\n", 5);
return $new;
}
# Find if this category changed, report its contents if it has.
sub getwork
{
Line 542 ⟶ 629:
}
# If changes occurred, the info is more important than if they didn't.
sub getloglevelfromdiff
{
Line 552 ⟶ 640:
}
# Find all the work items for a given language.
sub getlangwork
{
Line 583 ⟶ 672:
}
# Commit the cache.
sub commitcache
{
Line 615 ⟶ 705:
}
# Wrap svn commands so we can log them.
sub svn
{
|