Levenshtein distance: Difference between revisions

Add bruijn
(Add bruijn)
 
(16 intermediate revisions by 11 users not shown)
Line 927:
levenshtein$(rosettacode,raisethysword)
8</pre>
 
=={{header|Bruijn}}==
{{trans|Haskell}}
Recursive and non-memoized method:
 
<syntaxhighlight lang="bruijn>
:import std/Combinator .
:import std/Char C
:import std/List .
:import std/Math .
 
levenshtein y [[[∅?1 ∀0 (∅?0 ∀1 (0 (1 [[[[go]]]])))]]]
go (C.eq? 3 1) (6 2 0) ++(lmin ((6 2 0) : ((6 5 0) : {}(6 2 4))))
 
:test ((levenshtein "rosettacode" "raisethysword") =? (+8)) ([[1]])
:test ((levenshtein "kitten" "sitting") =? (+3)) ([[1]])
</syntaxhighlight>
 
=={{header|C}}==
Line 1,398 ⟶ 1,415:
((aref rec x y) (aref rec x y))
(t (setf (aref rec x y)
(min (+ (if (char= (char aleven (1- la x)) (char b (- lb y))) 0 1)
(min+ (leven x (1- xy)) y1)
(+ (leven (1- x) (1- y)) (if (char= (levenchar a (- la x)) (char b (1- lb y))) 0 1))))))))
(leven (1- x) (1- y)))))))))
(leven la lb))))
 
Line 1,700 ⟶ 1,716:
 
<pre>rosettacode -> raisethysword = 8</pre>
 
=={{header|EasyLang}}==
{{trans|AWK}}
 
<syntaxhighlight lang=easylang>
func dist s1$ s2$ .
if len s1$ = 0
return len s2$
.
if len s2$ = 0
return len s1$
.
c1$ = substr s1$ 1 1
c2$ = substr s2$ 1 1
s1rest$ = substr s1$ 2 len s1$
s2rest$ = substr s2$ 2 len s2$
#
if c1$ = c2$
return dist s1rest$ s2rest$
.
min = lower dist s1rest$ s2rest$ dist s1$ s2rest$
min = lower min dist s1rest$ s2rest$
return min + 1
.
print dist "kitten" "sitting"
print dist "rosettacode" "raisethysword"
</syntaxhighlight>
 
=={{header|EchoLisp}}==
Line 2,411 ⟶ 2,454:
==> 8
Io> </syntaxhighlight>
 
=={{header|IS-BASIC}}==
<syntaxhighlight lang="is-basic">100 PROGRAM "Levensht.bas"
110 LET S1$="kitten":LET S2$="sitting"
120 PRINT "The Levenshtein distance between """;S1$;""" and """;S2$;""" is:";LEVDIST(S1$,S2$)
130 DEF LEVDIST(S$,T$)
140 LET N=LEN(T$):LET M=LEN(S$)
150 NUMERIC D(0 TO M,0 TO N)
160 FOR I=0 TO M
170 LET D(I,0)=I
180 NEXT
190 FOR J=0 TO N
200 LET D(0,J)=J
210 NEXT
220 FOR J=1 TO N
230 FOR I=1 TO M
240 IF S$(I)=T$(J) THEN
250 LET D(I,J)=D(I-1,J-1)
260 ELSE
270 LET D(I,J)=MIN(D(I-1,J)+1,MIN(D(I,J-1)+1,D(I-1,J-1)+1))
280 END IF
290 NEXT
300 NEXT
310 LET LEVDIST=D(M,N)
320 END DEF</syntaxhighlight>
 
=={{header|J}}==
Line 3,700 ⟶ 3,768:
echo levenshteinDistance("kitten","sitting")
echo levenshteinDistance("rosettacode","raisethysword")</syntaxhighlight>
 
=={{header|Oberon-2}}==
{{trans|Modula-2}}
<syntaxhighlight lang="oberon2">MODULE LevesteinDistance;
 
IMPORT Out,Strings;
PROCEDURE Levestein(s,t:ARRAY OF CHAR):LONGINT;
CONST
maxlen = 15;
VAR
d:ARRAY maxlen,maxlen OF LONGINT;
lens,lent,i,j:LONGINT;
PROCEDURE Min(a,b:LONGINT):LONGINT;
BEGIN
IF a < b THEN RETURN a ELSE RETURN b END
END Min;
BEGIN
lens := Strings.Length(s);
lent := Strings.Length(t);
IF lens = 0 THEN RETURN lent
ELSIF lent = 0 THEN RETURN lens
ELSE
FOR i := 0 TO lens DO d[i,0] := i END;
FOR j := 0 TO lent DO d[0,j] := j END;
FOR i := 1 TO lens DO
FOR j := 1 TO lent DO
IF s[i-1] = t[j-1] THEN
d[i,j] := d[i-1,j-1]
ELSE
d[i,j] := Min((d[i-1,j] + 1),
Min(d[i,j-1] + 1, d[i-1,j-1] + 1));
END
END
END
END;
RETURN d[lens,lent];
END Levestein;
 
PROCEDURE ShowDistance(s,t:ARRAY OF CHAR);
BEGIN
Out.String(s);
Out.String(" -> ");
Out.String(t);
Out.String(": ");
Out.Int(Levestein(s,t),0);
Out.Ln
END ShowDistance;
BEGIN
ShowDistance("kitten", "sitting");
ShowDistance("rosettacode", "raisethysword");
END LevesteinDistance.
</syntaxhighlight>
 
{{out}}
<pre>kitten -> sitting: 3
rosettacode -> raisethysword: 8
</pre>
 
=={{header|Objeck}}==
Line 5,131 ⟶ 5,260:
Levenshtein distance('saturday', 'sunday') == 3
Levenshtein distance('rosettacode', 'raisethysword') == 8</pre>
 
=={{header|Refal}}==
<syntaxhighlight lang="refal">$ENTRY Go {
= <Show ('kitten') ('sitting')>
<Show ('rosettacode') ('raisethysword')>;
};
 
Show {
(e.A) (e.B) = <Prout e.A ' -> ' e.B ': ' <Lev (e.A) (e.B)>>;
};
 
Lev {
(e.A) (), <Lenw e.A>: s.L e.A = s.L;
() (e.B), <Lenw e.B>: s.L e.B = s.L;
(s.C e.A) (s.C e.B) = <Lev (e.A) (e.B)>;
(e.A) (e.B), e.A: s.HA e.LA, e.B: s.HB e.LB =
<+ 1 <Min <Lev (e.LA) (e.B)>
<Lev (e.A) (e.LB)>
<Lev (e.LA) (e.LB)>>>;
}
 
Min {
s.N = s.N;
s.M s.N e.X, <Compare s.M s.N>: {
'-' = <Min s.M e.X>;
s.X = <Min s.N e.X>;
};
};</syntaxhighlight>
{{out}}
<pre>kitten -> sitting: 3
rosettacode -> raisethysword: 8</pre>
 
=={{header|REXX}}==
Line 5,482 ⟶ 5,642:
distance(saturday, sunday) = 3
distance(rosettacode, raisethysword) = 8
</pre>
 
=={{header|RPL}}==
{{works with|HP|28}}
{| class="wikitable"
! RPL code
! Comment
|-
|
≪ DUP2 SIZE SWAP SIZE → a b lb la
≪ '''IF''' la lb * NOT '''THEN''' la lb +
'''ELSE'''
a 2 la SUB b 2 lb SUB DUP2 <span style="color:blue">LEV</span>
'''IF''' a 1 1 SUB b 1 1 SUB == '''THEN'''
ROT ROT DROP2
'''ELSE'''
a ROT <span style="color:blue">LEV</span> ROT b <span style="color:blue">LEV</span>
MIN MIN 1 +
'''END END'''
≫ ≫ '<span style="color:blue">LEV</span>' STO
|
<span style="color:blue">LEV</span> ''( "a" "b" → distance )''
if |a|=0 or [b|=0 then return resp. [b| or |a|
else
put tail(a), tail(b) and lev(tail(a),tail(b)) in stack
if a[1]=b[1} then
clean stack and return lev(tail(a),tail(b))
else
put lev(a,tail(b)) and lev(tail(a),b) in stack
return min of the 3 values in stack and add 1
|}
"kitten" "sitting" <span style="color:blue">LEV</span>
"Summer" "Winter" <span style="color:blue">LEV</span>
"Levenshtein" "Levenshtein" <span style="color:blue">LEV</span>
{{out}}
<pre>
3: 3
2: 4
1: 0
</pre>
 
===Iterative implementation (Wagner-Fischer algorithm)===
 
index of arrays and strings start with 1 in RPL, so the main trick in translating the algorithm given by Wikipedia was to manage the offsets properly. The distance between "rosettacode" and "raisethysword" is within the reach of a calculator (emulated or not), unlike the above recursive approach.
{{works with|HP|48}}
{| class="wikitable"
! RPL code
! Comment
|-
|
DUP2 { } + + ≪ SIZE ≫ DOLIST 1 ADD 0 CON → a b d
≪ 1 a SIZE '''FOR''' h
'd' h 1 + 1 2 →LIST h PUT '''NEXT'''
1 b SIZE '''FOR''' j
'd' 1 j 1 + 2 →LIST j PUT '''NEXT'''
1 b SIZE '''FOR''' j
1 a SIZE '''FOR''' h
a h DUP SUB b j DUP SUB ≠
'd' h j 2 →LIST GET +
'd' h j 1 + 2 →LIST GET 1 +
'd' h 1 + j 2 →LIST GET 1 +
MIN MIN 'd' h 1 + j 1 + 2 →LIST ROT PUT
'''NEXT NEXT'''
'd' DUP SIZE GET
≫ ≫ '<span style="color:blue">LEV2</span>' STO
|
<span style="color:blue">LEV2</span> ''( "a" "b" → distance )''
declare int d[0..m, 0..n] and set each element in d to zero
for h from 1 to m: <span style="color:grey>// h replaces i, which is √-1 in RPL</span>
d[h, 0] := i <span style="color:grey>// RPL array indexes start with 1</span>
for j from 1 to n:
d[0, j] := j
for j from 1 to n:
for h from 1 to m:
substitutionCost := ( s[h] <> t[j] )
d[h, j] := minimum(d[h-1, j-1] + substitutionCost,
d[h-1, j] + 1,
d[h, j-1] + 1)
return d[m, n]
|}
"rosettacode" "raisethysword" <span style="color:blue">LEV2</span>
{{out}}
<pre>
1: 8
</pre>
 
Line 5,806 ⟶ 6,056:
t || return s.len
 
var s1 = s.ftslice(1)
var t1 = t.ftslice(1)
 
s[0] == t[0] ? __FUNC__(s1, t1)
Line 6,102 ⟶ 6,352:
}
 
</syntaxhighlight>
 
=={{header|uBasic/4tH}}==
{{trans|FreeBASIC}}
<syntaxhighlight lang="ubasic-4th">
' Uses the "iterative with two matrix rows" algorithm
' referred to in the Wikipedia article.
 
' create two integer arrays of distances
Dim @u(128) ' previous
Dim @v(128) ' current
 
Print "'kitten' to 'sitting' => ";
Print FUNC(_Levenshtein ("kitten", "sitting"))
 
Print "'rosettacode' to 'raisethysword' => ";
Print FUNC(_Levenshtein ("rosettacode", "raisethysword"))
 
Print "'sleep' to 'fleeting' => ";
Print FUNC(_Levenshtein ("sleep", "fleeting"))
 
End
 
_Levenshtein
Param (2)
Local (3)
 
' degenerate cases
If Comp(a@, b@) = 0 Then Return (0)
If Len(a@) = 0 Then Return (Len(b@))
If Len(b@) = 0 Then Return (Len(a@))
 
' initialize v0
For c@ = 0 To Len(b@)
@u(c@) = c@
Next
 
For c@ = 0 To Len(a@) - 1
' calculate @v() from @u()
@v(0) = c@ + 1
For d@ = 0 To Len(b@) - 1
e@ = IIf(Peek (a@, c@) = Peek (b@, d@), 0, 1)
@v(d@ + 1) = min(@v(d@) + 1, Min(@u(d@ + 1) + 1, @u(d@) + e@))
Next
 
' copy @v() to @u() for next iteration
For d@ = 0 To Len(b@)
@u(d@) = @v(d@)
Next
Next
 
Return (@v(Len(b@)))
</syntaxhighlight>
 
Line 6,329 ⟶ 6,632:
Return Matrix(String1.Length - 1, String2.Length - 1)
End Function</syntaxhighlight>
 
=={{header|V (Vlang)}}==
<syntaxhighlight lang="Zig">
import strings
 
fn main() {
println(strings.levenshtein_distance("kitten", "sitting"))
println(strings.levenshtein_distance("rosettacode", "raisethysword"))
}
</syntaxhighlight>
 
{{out}}
<pre>
3
8
</pre>
 
=={{header|Wren}}==
{{trans|Go}}
<syntaxhighlight lang="ecmascriptwren">var levenshtein = Fn.new { |s, t|
var ls = s.count
var lt = t.count
Line 6,362 ⟶ 6,681:
3
</pre>
 
=={{header|Zig}}==
{{trans|C}}
 
'''Works with:''' 0.11.x, 0.12.0-dev.1381+61861ef39
 
For 0.10.x, replace @min(a, b, c) with std.math.min3(a, b, c).
 
Recursive method without memoization.
 
<syntaxhighlight lang="zig">const std = @import("std");
 
fn levenshtein(s: []const u8, t: []const u8) usize {
// If either string is empty, difference is inserting all chars
// from the other
if (s.len == 0) return t.len;
if (t.len == 0) return s.len;
 
// If last letters are the same, the difference is whatever is
// required to edit the rest of the strings
if (s[s.len - 1] == t[t.len - 1])
return levenshtein(s[0 .. s.len - 1], t[0 .. t.len - 1]);
 
// Else try:
// changing last letter of s to that of t; or
// remove last letter of s; or
// remove last letter of t,
// any of which is 1 edit plus editing the rest of the strings
const a = levenshtein(s[0 .. s.len - 1], t[0 .. t.len - 1]);
const b = levenshtein(s, t[0 .. t.len - 1]);
const c = levenshtein(s[0 .. s.len - 1], t);
 
return @min(a, b, c) + 1;
}
 
pub fn main() std.fs.File.WriteError!void {
const stdout = std.io.getStdOut();
const stdout_w = stdout.writer();
 
const s1 = "rosettacode";
const s2 = "raisethysword";
try stdout_w.print("distance between '{s}' and '{s}': {d}\n", .{ s1, s2, levenshtein(s1, s2) });
return;
}</syntaxhighlight>
 
=={{header|zkl}}==
55

edits