Levenshtein distance/Alignment: Difference between revisions
(→{{header|Perl}}: put everything in one array for a shorter code) |
|||
Line 13: | Line 13: | ||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
<lang perl>use |
<lang perl>use strict; |
||
use warnings; |
|||
use List::Util qw(min); |
|||
sub levenshtein_distance_alignment { |
sub levenshtein_distance_alignment { |
||
my @s = ('^', split //, shift); |
my @s = ('^', split //, shift); |
||
my @t = ('^', split //, shift); |
my @t = ('^', split //, shift); |
||
my @ |
my @A; |
||
$ |
@{$A[$_][0]}{qw(d s t)} = ($_, join('', @s[1 .. $_]), ('~' x $_)) for 0 .. $#s; |
||
$ |
@{$A[0][$_]}{qw(d s t)} = ($_, ('-' x $_), join '', @t[1 .. $_]) for 0 .. $#t; |
||
my (@AS, @AT); |
|||
$AS[$_][0] = join '', @s[1 .. $_] for 0 .. $#s; |
|||
$AS[0][$_] = '-' x $_ for 0 .. $#t; |
|||
$AT[0][$_] = join '', @t[1 .. $_] for 0 .. $#t; |
|||
$AT[$_][0] = '-' x $_ for 0 .. $#s; |
|||
for my $i (1 .. $#s) { |
for my $i (1 .. $#s) { |
||
for my $j (1 .. $#t) { |
for my $j (1 .. $#t) { |
||
if ($s[$i] ne $t[$j]) { |
|||
$A[$i][$j]{d} = 1 + ( |
|||
my $min = min $A[$i-1][$j]{d}, $A[$i][$j-1]{d}, $A[$i-1][$j-1]{d} |
|||
); |
|||
$d[$i][$j] = $d[$i-1][$j-1]; |
|||
@{$A[$i][$j]}{qw(s t)} = |
|||
next; |
|||
$A[$i-1][$j]{d} == $min ? ($A[$i-1][$j]{s}.$s[$i], $A[$i-1][$j]{t}.'-') : |
|||
} |
|||
$A[$i][$j-1]{d} == $min ? ($A[$i][$j-1]{s}.'-', $A[$i][$j-1]{t}.$t[$j]) : |
|||
$d[$i][$j] = 1 + ( |
|||
($A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j]); |
|||
} |
|||
if ($d[$i-1][$j] == $min) { |
|||
$AS[$i][$j] = $AS[$i-1][$j] . $s[$i]; |
|||
$AT[$i][$j] = $AT[$i-1][$j] . '-'; |
|||
} |
|||
elsif ($d[$i][$j-1] == $min) { |
|||
$AS[$i][$j] = $AS[$i][$j-1] . '-'; |
|||
$AT[$i][$j] = $AT[$i][$j-1] . $t[$j]; |
|||
} |
|||
else { |
else { |
||
@{$A[$i][$j]}{qw(d s t)} = ( |
|||
$A[$i-1][$j-1]{d}, |
|||
$A[$i-1][$j-1]{s}.$s[$i], |
|||
$A[$i-1][$j-1]{t}.$t[$j] |
|||
); |
|||
} |
} |
||
} |
} |
||
} |
} |
||
return $ |
return @{$A[-1][-1]}{'s', 't'}; |
||
} |
} |
||
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword"; |
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";</lang> |
||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre>ro-settac-o-de |
<pre>ro-settac-o-de |
Revision as of 01:49, 4 May 2013
The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.
An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:
P-LACE PALACE
For this task, write a function that shows the alignment of two strings for the corresponding levenshtein distance. As an example, use the words "rosettacode" and "raisethysword".
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).
Perl
<lang perl>use strict; use warnings;
use List::Util qw(min);
sub levenshtein_distance_alignment {
my @s = ('^', split //, shift); my @t = ('^', split //, shift); my @A; @{$A[$_][0]}{qw(d s t)} = ($_, join(, @s[1 .. $_]), ('~' x $_)) for 0 .. $#s; @{$A[0][$_]}{qw(d s t)} = ($_, ('-' x $_), join , @t[1 .. $_]) for 0 .. $#t; for my $i (1 .. $#s) { for my $j (1 .. $#t) {
if ($s[$i] ne $t[$j]) { $A[$i][$j]{d} = 1 + ( my $min = min $A[$i-1][$j]{d}, $A[$i][$j-1]{d}, $A[$i-1][$j-1]{d} ); @{$A[$i][$j]}{qw(s t)} = $A[$i-1][$j]{d} == $min ? ($A[$i-1][$j]{s}.$s[$i], $A[$i-1][$j]{t}.'-') : $A[$i][$j-1]{d} == $min ? ($A[$i][$j-1]{s}.'-', $A[$i][$j-1]{t}.$t[$j]) : ($A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j]); }
else {
@{$A[$i][$j]}{qw(d s t)} = ( $A[$i-1][$j-1]{d}, $A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j] );
} } } return @{$A[-1][-1]}{'s', 't'};
}
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";</lang>
- Output:
ro-settac-o-de raisethysword-
Perl 6
<lang Perl 6>sub align ( Str $σ, Str $t ) {
my @s = *, $σ.comb; my @t = *, $t.comb; my @A; @A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s; @A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t; for 1 ..^ @s X 1..^ @t -> \i, \j {
if @s[i] ne @t[j] {
@A[i][j]<d> = 1 + my $min =
min @A[i-1][j]<d>, @A[i][j-1]<d>, @A[i-1][j-1]<d>;
@A[i][j] =
@A[i-1][j]<d> == $min ?? (@A[i-1][j] Z~ @s[i], '-') !!
@A[i][j-1]<d> == $min ?? (@A[i][j-1] Z~ '-', @t[j]) !!
(@A[i-1][j-1] Z~ @s[i], @t[j]);
} else {
@A[i][j]<d s t> = @A[i-1][j-1]<d s t> Z~ , @s[i], @t[j];
}
} return @A[*-1][*-1];
}
.say for align |<rosettacode raisethysword>;</lang>
- Output:
ro-settac-o-de raisethysword-