Levenshtein distance/Alignment: Difference between revisions

From Rosetta Code
Content added Content deleted
m (→‎{{header|Perl 6}}: minor tweak)
(→‎{{header|Perl 6}}: putting everything in a single array for shorter code)
Line 68: Line 68:
my @s = *, $σ.comb;
my @s = *, $σ.comb;
my @t = *, $t.comb;
my @t = *, $t.comb;
 
my @d;
my @A;
@d[$_][ 0] = $_ for ^@s;
@A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s;
@d[ 0][$_] = $_ for ^@t;
@A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t;

for 1 ..^ @s X 1..^ @t -> $i, $j {
my %A;
%A<s>[$_][ 0] = @s[1..$_].join for ^@s;
%A<s>[ 0][$_] = '-' x $_ for ^@t;
%A<t>[ 0][$_] = @t[1..$_].join for ^@t;
%A<t>[$_][ 0] = '-' x $_ for ^@s;
 
for 1..^@s X 1..^@t -> $i, $j {
if @s[$i] eq @t[$j] {
if @s[$i] eq @t[$j] {
# No operation required when eq
# No operation required when eq
%A<s>[$i][$j] = %A<s>[$i-1][$j-1] ~ @s[$i];
@A[$i][$j]<d s t> = @A[$i-1][$j-1]<d s t> Z~ '', @s[$i], @t[$j];
%A<t>[$i][$j] = %A<t>[$i-1][$j-1] ~ @t[$j];
@d[$i][$j] = @d[$i-1][$j-1];
next;
next;
}
}
@d[$i][$j] = 1 + my $min =
@A[$i][$j]<d> = 1 + my $min =
min @d[$i-1][$j], @d[$i][$j-1], @d[$i-1][$j-1];
min @A[$i-1][$j]<d>, @A[$i][$j-1]<d>, @A[$i-1][$j-1]<d>;
if @d[$i-1][$j] == $min {
if @A[$i-1][$j]<d> == $min {
# Deletion
# Deletion
%A<s>[$i][$j] = %A<s>[$i-1][$j] ~ @s[$i];
@A[$i][$j]<s t> = @A[$i-1][$j]<s t> Z~ @s[$i], '-';
%A<t>[$i][$j] = %A<t>[$i-1][$j] ~ '-';
}
}
elsif @d[$i][$j-1] == $min {
elsif @A[$i][$j-1]<d> == $min {
# Insertion
# Insertion
%A<s>[$i][$j] = %A<s>[$i][$j-1] ~ '-';
@A[$i][$j]<s t> = @A[$i][$j-1]<s t> Z~ '-', @t[$j];
%A<t>[$i][$j] = %A<t>[$i][$j-1] ~ @t[$j];
}
}
else {
else {
# Substitution
# Substitution
%A<s>[$i][$j] = %A<s>[$i-1][$j-1] ~ @s[$i];
@A[$i][$j]<s t> = @A[$i-1][$j-1]<s t> Z~ @s[$i], @t[$j];
%A<t>[$i][$j] = %A<t>[$i-1][$j-1] ~ @t[$j];
}
}
}
}
 
return map *[*-1][*-1], %A<s t>;
return @A[*-1][*-1]<s t>;
}
}

.say for align |<rosettacode raisethysword>;
.say for align |<rosettacode raisethysword>;</lang>
</lang>
{{out}}
{{out}}
<pre>ro-settac-o-de
<pre>ro-settac-o-de

Revision as of 13:58, 3 May 2013

Levenshtein distance/Alignment is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.

An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:

P-LACE
PALACE

For this task, write a function that shows the alignment of two strings for the corresponding levenshtein distance. As an example, use the words "rosettacode" and "raisethysword".

You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).

Perl

<lang perl>use List::Util qw(min);

sub levenshtein_distance_alignment {

   my @s = ('^', split //, shift);
   my @t = ('^', split //, shift);
   my @d;
   $d[$_][0] = $_ for 0 .. @s-1;
   $d[0][$_] = $_ for 0 .. @t-1;
   my (@AS, @AT);
   $AS[$_][0] = join , @s[1 .. $_] for 0 .. @s-1;
   $AS[0][$_] = '-' x $_ for 0 .. @t-1;
   $AT[0][$_] = join , @t[1 .. $_] for 0 .. @t-1;
   $AT[$_][0] = '-' x $_ for 0 .. @s-1;
   for my $i (1 .. @s-1) {
       for my $j (1 .. @t-1) {
           if ($s[$i] eq $t[$j]) {
               $AS[$i][$j] = $AS[$i-1][$j-1] . $s[$i];
               $AT[$i][$j] = $AT[$i-1][$j-1] . $t[$j];
               $d[$i][$j] = $d[$i-1][$j-1];
               next;
           }
           $d[$i][$j] = 1 + (
               my $min = min $d[$i-1][$j], $d[$i][$j-1], $d[$i-1][$j-1]
           );
           if ($d[$i-1][$j] == $min) {
               $AS[$i][$j] = $AS[$i-1][$j] . $s[$i];
               $AT[$i][$j] = $AT[$i-1][$j] . '-';
           }
           elsif ($d[$i][$j-1] == $min) {
               $AS[$i][$j] = $AS[$i][$j-1] . '-';
               $AT[$i][$j] = $AT[$i][$j-1] . $t[$j];
           }
           else {
               $AS[$i][$j] = $AS[$i-1][$j-1] . $s[$i];
               $AT[$i][$j] = $AT[$i-1][$j-1] . $t[$j];
           }
       }
   }
   return $AS[-1][-1], $AT[-1][-1];

}

print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword"; </lang>

Output:
ro-settac-o-de
raisethysword-

Perl 6

Translation of: Perl

<lang Perl 6>sub align ( Str $σ, Str $t ) {

   my @s = *, $σ.comb;
   my @t = *, $t.comb;
    
   my @A;
   @A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s;
   @A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join  for ^@t;
    
   for 1 ..^ @s X 1..^ @t -> $i, $j {

if @s[$i] eq @t[$j] { # No operation required when eq @A[$i][$j]<d s t> = @A[$i-1][$j-1]<d s t> Z~ , @s[$i], @t[$j]; next; } @A[$i][$j]<d> = 1 + my $min = min @A[$i-1][$j]<d>, @A[$i][$j-1]<d>, @A[$i-1][$j-1]<d>; if @A[$i-1][$j]<d> == $min { # Deletion @A[$i][$j] = @A[$i-1][$j] Z~ @s[$i], '-'; } elsif @A[$i][$j-1]<d> == $min { # Insertion @A[$i][$j] = @A[$i][$j-1] Z~ '-', @t[$j]; } else { # Substitution @A[$i][$j] = @A[$i-1][$j-1] Z~ @s[$i], @t[$j]; }

   }
    
   return @A[*-1][*-1];

}

.say for align |<rosettacode raisethysword>;</lang>

Output:
ro-settac-o-de
raisethysword-