Levenshtein distance/Alignment: Difference between revisions
m (→{{header|Perl 6}}: minor tweak) |
(→{{header|Perl 6}}: putting everything in a single array for shorter code) |
||
Line 68: | Line 68: | ||
my @s = *, $σ.comb; |
my @s = *, $σ.comb; |
||
my @t = *, $t.comb; |
my @t = *, $t.comb; |
||
my @ |
my @A; |
||
@ |
@A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s; |
||
@ |
@A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t; |
||
⚫ | |||
⚫ | |||
my %A; |
|||
%A<s>[$_][ 0] = @s[1..$_].join for ^@s; |
|||
%A<s>[ 0][$_] = '-' x $_ for ^@t; |
|||
%A<t>[ 0][$_] = @t[1..$_].join for ^@t; |
|||
%A<t>[$_][ 0] = '-' x $_ for ^@s; |
|||
⚫ | |||
⚫ | |||
if @s[$i] eq @t[$j] { |
if @s[$i] eq @t[$j] { |
||
# No operation required when eq |
# No operation required when eq |
||
@A[$i][$j]<d s t> = @A[$i-1][$j-1]<d s t> Z~ '', @s[$i], @t[$j]; |
|||
%A<t>[$i][$j] = %A<t>[$i-1][$j-1] ~ @t[$j]; |
|||
@d[$i][$j] = @d[$i-1][$j-1]; |
|||
next; |
next; |
||
} |
} |
||
@ |
@A[$i][$j]<d> = 1 + my $min = |
||
min @ |
min @A[$i-1][$j]<d>, @A[$i][$j-1]<d>, @A[$i-1][$j-1]<d>; |
||
if @ |
if @A[$i-1][$j]<d> == $min { |
||
# Deletion |
# Deletion |
||
@A[$i][$j]<s t> = @A[$i-1][$j]<s t> Z~ @s[$i], '-'; |
|||
%A<t>[$i][$j] = %A<t>[$i-1][$j] ~ '-'; |
|||
} |
} |
||
elsif @ |
elsif @A[$i][$j-1]<d> == $min { |
||
# Insertion |
# Insertion |
||
@A[$i][$j]<s t> = @A[$i][$j-1]<s t> Z~ '-', @t[$j]; |
|||
%A<t>[$i][$j] = %A<t>[$i][$j-1] ~ @t[$j]; |
|||
} |
} |
||
else { |
else { |
||
# Substitution |
# Substitution |
||
@A[$i][$j]<s t> = @A[$i-1][$j-1]<s t> Z~ @s[$i], @t[$j]; |
|||
%A<t>[$i][$j] = %A<t>[$i-1][$j-1] ~ @t[$j]; |
|||
} |
} |
||
} |
} |
||
return |
return @A[*-1][*-1]<s t>; |
||
} |
} |
||
.say for align |<rosettacode raisethysword>; |
.say for align |<rosettacode raisethysword>;</lang> |
||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre>ro-settac-o-de |
<pre>ro-settac-o-de |
Revision as of 13:58, 3 May 2013
The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.
An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:
P-LACE PALACE
For this task, write a function that shows the alignment of two strings for the corresponding levenshtein distance. As an example, use the words "rosettacode" and "raisethysword".
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).
Perl
<lang perl>use List::Util qw(min);
sub levenshtein_distance_alignment {
my @s = ('^', split //, shift); my @t = ('^', split //, shift);
my @d; $d[$_][0] = $_ for 0 .. @s-1; $d[0][$_] = $_ for 0 .. @t-1;
my (@AS, @AT); $AS[$_][0] = join , @s[1 .. $_] for 0 .. @s-1; $AS[0][$_] = '-' x $_ for 0 .. @t-1; $AT[0][$_] = join , @t[1 .. $_] for 0 .. @t-1; $AT[$_][0] = '-' x $_ for 0 .. @s-1;
for my $i (1 .. @s-1) { for my $j (1 .. @t-1) { if ($s[$i] eq $t[$j]) { $AS[$i][$j] = $AS[$i-1][$j-1] . $s[$i]; $AT[$i][$j] = $AT[$i-1][$j-1] . $t[$j]; $d[$i][$j] = $d[$i-1][$j-1]; next; } $d[$i][$j] = 1 + ( my $min = min $d[$i-1][$j], $d[$i][$j-1], $d[$i-1][$j-1] ); if ($d[$i-1][$j] == $min) { $AS[$i][$j] = $AS[$i-1][$j] . $s[$i]; $AT[$i][$j] = $AT[$i-1][$j] . '-'; } elsif ($d[$i][$j-1] == $min) { $AS[$i][$j] = $AS[$i][$j-1] . '-'; $AT[$i][$j] = $AT[$i][$j-1] . $t[$j]; } else { $AS[$i][$j] = $AS[$i-1][$j-1] . $s[$i]; $AT[$i][$j] = $AT[$i-1][$j-1] . $t[$j]; } } } return $AS[-1][-1], $AT[-1][-1];
}
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword"; </lang>
- Output:
ro-settac-o-de raisethysword-
Perl 6
<lang Perl 6>sub align ( Str $σ, Str $t ) {
my @s = *, $σ.comb; my @t = *, $t.comb; my @A; @A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s; @A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t; for 1 ..^ @s X 1..^ @t -> $i, $j {
if @s[$i] eq @t[$j] {
# No operation required when eq
@A[$i][$j]<d s t> = @A[$i-1][$j-1]<d s t> Z~ , @s[$i], @t[$j];
next;
}
@A[$i][$j]<d> = 1 + my $min =
min @A[$i-1][$j]<d>, @A[$i][$j-1]<d>, @A[$i-1][$j-1]<d>;
if @A[$i-1][$j]<d> == $min {
# Deletion
@A[$i][$j] = @A[$i-1][$j] Z~ @s[$i], '-';
}
elsif @A[$i][$j-1]<d> == $min {
# Insertion
@A[$i][$j] = @A[$i][$j-1] Z~ '-', @t[$j];
}
else {
# Substitution
@A[$i][$j] = @A[$i-1][$j-1] Z~ @s[$i], @t[$j];
}
} return @A[*-1][*-1];
}
.say for align |<rosettacode raisethysword>;</lang>
- Output:
ro-settac-o-de raisethysword-