WiktionaryDumps to words: Difference between revisions

added Perl programming solution
m (→‎{{header|Phix}}: oops, just mullered the tag)
(added Perl programming solution)
Line 481:
minute
...
</pre>
 
=={{header|Perl}}==
{{trans|Raku}}
<lang perl># 20211214 Perl programming solution
 
use strict;
use warnings;
use LWP::UserAgent;
use Compress::Raw::Bzip2 ;
 
my $LanguageMark = "==French==";
my $Target = 5; # words
my $URL = 'https://dumps.wikimedia.org/enwiktionary/latest/enwiktionary-latest-pages-articles.xml.bz2';
 
my %needles; my $plain = my $tail = '';
 
my $ua = LWP::UserAgent->new;
my $bz = new Compress::Raw::Bunzip2({ -Bufsize => 1, -AppendOutput => 0 });
 
my $res = $ua->request( HTTP::Request->new(GET => $URL),
sub {
my @Char_Array = split('', $_[0]) ;
foreach (@Char_Array) {
my $status = $bz->bzinflate($_, substr($plain, 0)) ;
last if $status == BZ_STREAM_END or $status != BZ_OK ;
}
 
my @haystacks = split("\n", $plain);
if ( scalar @haystacks ) {
$haystacks[0] = $tail . $haystacks[0];
$tail = $haystacks[$#haystacks];
my ($title,$got_text_last) = '', 0 ;
foreach (@haystacks[0..$#haystacks-1]) {
if ( /<title>(\w+?)<\/title>/ ) {
($title,$got_text_last) = $1, 0;
} elsif ( /<text/ ) {
$got_text_last = 1;
} elsif ( /$LanguageMark/ ) {
$needles{$title}++ if ( $got_text_last and $title.defined );
if ( %needles >= $Target ) {
print "$_\n" for keys %needles;
exit;
}
$got_text_last = 0;
} elsif ( /<\/text>/ ) { $got_text_last = 0 }
}
}
}
)</lang>
{{out}}
<pre>
gratis
pond
chien
gratuit
livre
</pre>
 
350

edits