WiktionaryDumps to words: Difference between revisions
Content added Content deleted
(added Perl programming solution) |
m (→{{header|Perl}}: simplify) |
||
Line 497: | Line 497: | ||
my %needles; my $plain = my $tail = ''; |
my %needles; my $plain = my $tail = ''; |
||
my $ua = LWP::UserAgent->new; |
my $ua = LWP::UserAgent->new; |
||
my $bz = new Compress::Raw::Bunzip2({ -Bufsize => 1, -AppendOutput => 0 }); |
my $bz = new Compress::Raw::Bunzip2({ -Bufsize => 1, -AppendOutput => 0 }); |
||
my $res = $ua->request( HTTP::Request->new(GET => $URL), |
my $res = $ua->request( HTTP::Request->new(GET => $URL), |
||
sub { # @_ = Data Chunk, HTTP::Response |
|||
sub { |
|||
foreach (split '', $_[0]) { |
|||
foreach (@Char_Array) { |
|||
my $status = $bz->bzinflate($_, substr($plain, 0)) ; |
my $status = $bz->bzinflate($_, substr($plain, 0)) ; |
||
last if $status == BZ_STREAM_END or $status != BZ_OK ; |
last if $status == BZ_STREAM_END or $status != BZ_OK ; |
||
} |
} |
||
my @haystacks = split |
if ( scalar ( my @haystacks = split "\n", $plain)) { |
||
if ( scalar @haystacks ) { |
|||
$haystacks[0] = $tail . $haystacks[0]; |
$haystacks[0] = $tail . $haystacks[0]; |
||
$tail = $haystacks[$#haystacks]; |
$tail = $haystacks[$#haystacks]; |
||
Line 516: | Line 513: | ||
my ($title,$got_text_last) = '', 0 ; |
my ($title,$got_text_last) = '', 0 ; |
||
foreach (@haystacks[0..$#haystacks-1]) { |
foreach ( @haystacks[0..$#haystacks-1] ) { |
||
if ( /<title>(\w+?)<\/title>/ ) { |
if ( /<title>(\w+?)<\/title>/ ) { |
||
($title,$got_text_last) = $1, 0; |
($title,$got_text_last) = $1, 0; |
||
Line 524: | Line 521: | ||
$needles{$title}++ if ( $got_text_last and $title.defined ); |
$needles{$title}++ if ( $got_text_last and $title.defined ); |
||
if ( %needles >= $Target ) { |
if ( %needles >= $Target ) { |
||
print "$_\n" for keys %needles; |
print "$_\n" for sort keys %needles; |
||
exit; |
exit; |
||
} |
} |
||
Line 535: | Line 532: | ||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
⚫ | |||
gratis |
gratis |
||
⚫ | |||
⚫ | |||
gratuit |
gratuit |
||
livre |
livre |
||
⚫ | |||
</pre> |
</pre> |
||