Soundex: Difference between revisions
Line 69: | Line 69: | ||
}</lang> |
}</lang> |
||
=={{header| |
=={{header|Clipper/XBase++}}== |
||
***************************************************************************** |
|||
There is already a soundex method in std.string [http://www.digitalmars.com/d/2.0/phobos/std_string.html#soundex] |
|||
<nowiki> |
|||
<lang d>import std.string; |
|||
FUNCTION Soundex(cWord) |
|||
/* |
|||
void main() |
|||
This is an implementation for Clipper and XBase++ of the standard American Soundex System. |
|||
{ |
|||
Clipper already has a Soundex function which will give the same results as this function. |
|||
char[][] input = ["Euler","Gauss","Hilbert","Knuth","Lloyd","Lukasiewicz","Ellery", |
|||
XBase++ does not. It does come with a C++ interpretation of Soundex. |
|||
"Ghosh", "Heilbronn","Kant","Ladd","Lissajous","Wheaton","Ashcraft", |
|||
"Burroughs","Burrows","O'Hara"]; |
|||
The following is pure Clipper and pure XBase++. |
|||
uint padding = 0; |
|||
*/ |
|||
foreach(inp; input) |
|||
LOCAL cSoundex, i, nLast, cChar, nCode |
|||
if(inp.length > padding) |
|||
padding = inp.length; |
|||
char[] padstr = new char[](padding); |
|||
cWord:=ALLTRIM(UPPER(cWord)) |
|||
foreach(inp; input) |
|||
cSoundex:=LEFT(cWord, 1) // first letter is first char |
|||
{ |
|||
nLast:=-1 |
|||
padstr[] = ' '; |
|||
FOR i:=2 TO LEN(cWord) |
|||
padstr[0..inp.length] = inp; |
|||
cChar:=SUBSTR(cWord, i, 1) // get char |
|||
nCode:=SoundexCode(cChar) // get soundex code for char |
|||
IF nCode=0 // if 0, ignore |
|||
LOOP |
|||
ENDIF |
|||
IF nCode#nLast // if not same code, add to soundex |
|||
nLast:=nCode // and replace the last one |
|||
cSoundex+=STR(nCode, 1) |
|||
ENDIF |
|||
NEXT |
|||
cSoundex:=PADR(cSoundex, 4, "0") |
|||
printf("%.*s -> %.*s\n", padstr, soundex(inp)); |
|||
} |
|||
RETURN(cSoundex) |
|||
}</lang> |
|||
******************************************************************************* |
|||
STATIC FUNCTION SoundexCode(cLetter) |
|||
LOCAL aCodes:={"BFPV", "CGJKQSXZ", "DT", "L", "MN", "R"}, i, nRet:=0 |
|||
FOR i:=1 TO LEN(aCodes) |
|||
IF cLetter $ aCodes[i] |
|||
nRet:=i |
|||
EXIT |
|||
ENDIF |
|||
NEXT |
|||
RETURN(nRet) |
|||
******************************************************************************* |
|||
FUNCTION SoundexDifference(cSound1, cSound2) |
|||
LOCAL nMatch:=0, nLen1, nLen2, i |
|||
nLen1:=LEN(cSound1) |
|||
nLen2:=LEN(cSound2) |
|||
// make the two words the same length. This is a safety catch. They should both be 4 characters long. |
|||
IF nLen1 > nLen2 |
|||
cSound2:=PADR(cSound2, nLen1-nLen2, "0") |
|||
ELSEIF nLen1 < nLen2 |
|||
cSound1:=PADR(cSound1, nLen2-nLen1, "0") |
|||
ENDIF |
|||
// compare the corresponding characters between the two words |
|||
FOR i:=1 TO LEN(cSound1) |
|||
IF SUBSTR(cSound1, i, 1) == SUBSTR(cSound2, i, 1) |
|||
++nMatch |
|||
ENDIF |
|||
NEXT |
|||
RETURN(nMatch) |
|||
******************************************************************************* |
|||
</nowiki> |
|||
=={{header|Forth}}== |
=={{header|Forth}}== |
Revision as of 22:37, 4 November 2010
You are encouraged to solve this task according to the task description, using any language you may know.
Soundex is an algorithm for creating indices for words based on their pronunciation. The goal is for homophones to be encoded to the same representation so that they can be matched despite minor differences in spelling (from the WP article).
C
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <string.h>
char chr(int i) {
return ((0<=i) && (i<=9)) ? '0'+i : '-';
}
char *soundex(const char *word, char *bufr) {
static const char *lset[] = { "", "BFPVbfpv", "CGJKQSXZcgjkqsxz", "DTdt", "Ll","MNmn", "Rr" };
- define LSIZE (sizeof(lset)/sizeof(char *))
const char *ch; char *bp; char prev = '-'; int j;
bp = bufr; ch = word;
while (*ch && (bp-bufr<4)) { for (j=1; j< LSIZE; j++) if (strchr( lset[j], *ch) ) break;
if ((j<LSIZE) && (chr(j) != prev)) *bp++ = prev = chr(j); else if (!strchr("WHwh", *ch)) prev = '7';
if (ch == word) { bp = bufr; *bp++ = toupper(*ch); } ch++; } while (bp-bufr < 4) *bp++ = '0'; *bp = 0; return bufr;
}
int main() {
const char *w, **wp; static const char *testwords[] = { "Euler","Gauss","Hilbert","Knuth","Lloyd","Lukasiewicz","Ellery", "Ghosh", "Heilbronn","Kant","Ladd","Lissajous","Wheaton","Ashcraft", "Burroughs","Burrows","O'Hara", NULL}; char buffer[8];
w= "Soundex"; printf("soundex(%s) = %s\n", w, soundex(w,buffer)); w= "Example"; printf("soundex(%s) = %s\n", w, soundex(w,buffer)); w= "Sownteks"; printf("soundex(%s) = %s\n", w, soundex(w,buffer)); w= "Ekzampul"; printf("soundex(%s) = %s\n", w, soundex(w,buffer));
for (wp=testwords; *wp; wp++ ) printf("soundex(%s) = %s\n", *wp, soundex(*wp, buffer)); return 0;
}</lang>
Clipper/XBase++
FUNCTION Soundex(cWord) /* This is an implementation for Clipper and XBase++ of the standard American Soundex System. Clipper already has a Soundex function which will give the same results as this function. XBase++ does not. It does come with a C++ interpretation of Soundex. The following is pure Clipper and pure XBase++. */ LOCAL cSoundex, i, nLast, cChar, nCode cWord:=ALLTRIM(UPPER(cWord)) cSoundex:=LEFT(cWord, 1) // first letter is first char nLast:=-1 FOR i:=2 TO LEN(cWord) cChar:=SUBSTR(cWord, i, 1) // get char nCode:=SoundexCode(cChar) // get soundex code for char IF nCode=0 // if 0, ignore LOOP ENDIF IF nCode#nLast // if not same code, add to soundex nLast:=nCode // and replace the last one cSoundex+=STR(nCode, 1) ENDIF NEXT cSoundex:=PADR(cSoundex, 4, "0") RETURN(cSoundex) ******************************************************************************* STATIC FUNCTION SoundexCode(cLetter) LOCAL aCodes:={"BFPV", "CGJKQSXZ", "DT", "L", "MN", "R"}, i, nRet:=0 FOR i:=1 TO LEN(aCodes) IF cLetter $ aCodes[i] nRet:=i EXIT ENDIF NEXT RETURN(nRet) ******************************************************************************* FUNCTION SoundexDifference(cSound1, cSound2) LOCAL nMatch:=0, nLen1, nLen2, i nLen1:=LEN(cSound1) nLen2:=LEN(cSound2) // make the two words the same length. This is a safety catch. They should both be 4 characters long. IF nLen1 > nLen2 cSound2:=PADR(cSound2, nLen1-nLen2, "0") ELSEIF nLen1 < nLen2 cSound1:=PADR(cSound1, nLen2-nLen1, "0") ENDIF // compare the corresponding characters between the two words FOR i:=1 TO LEN(cSound1) IF SUBSTR(cSound1, i, 1) == SUBSTR(cSound2, i, 1) ++nMatch ENDIF NEXT RETURN(nMatch) *******************************************************************************
Forth
This implements the US Census rules, where W and H are ignored but, unlike vowels, aren't separators. Further corner cases welcome...
<lang forth>: alpha-table create does> swap 32 or [char] a - 0 max 26 min + 1+ c@ ;
alpha-table soundex-code
," 123 12. 22455 12623 1.2 2 " \ ABCDEFGHIJKLMNOPQRSTUVWXYZ
- soundex ( name len -- pad len )
over c@ pad c! \ First character verbatim pad 1+ 3 [char] 0 fill \ Pad to four characters with zeros 1 pad c@ soundex-code ( count code ) 2swap bounds do i c@ soundex-code ( count code next ) 2dup = if drop else \ runs are ignored dup [char] . = if drop else \ W, H don't separate runs of consonants dup bl = if nip else \ vowels separate consonants but aren't coded nip 2dup swap pad + c! swap 1+ tuck 4 = if leave then then then then loop 2drop pad 4 ;
\ Knuth's test cases s" Euler" soundex cr type \ E460 s" Gauss" soundex cr type \ G200 s" Hilbert" soundex cr type \ H416 s" Knuth" soundex cr type \ K530 s" Lloyd" soundex cr type \ L300 s" Lukasiewicz" soundex cr type \ L222 (W test) s" Ellery" soundex cr type \ E460 s" Ghosh" soundex cr type \ G200 s" Heilbronn" soundex cr type \ H416 s" Kant" soundex cr type \ K530 s" Ladd" soundex cr type \ L300 s" Lissajous" soundex cr type \ L222
s" Wheaton" soundex cr type \ W350 s" Ashcraft" soundex cr type \ A261 (H tests) s" Burroughs" soundex cr type \ B620 s" Burrows" soundex cr type \ B620 (W test) (any Welsh names?) s" O'Hara" soundex cr type \ O600 (punctuation test)</lang>
Haskell
<lang haskell>import Text.PhoneticCode.Soundex import Control.Arrow</lang> Example: <lang haskell>*Main> mapM_ print $ map (id &&& soundexSimple) ["Soundex", "Example", "Sownteks", "Ekzampul"] ("Soundex","S532") ("Example","E251") ("Sownteks","S532") ("Ekzampul","E251")</lang>
Icon and Unicon
Icon
<lang icon>procedure main(arglist) # computes soundex of each argument every write(x := !arglist, " => ",soundex(x)) end
procedure soundex(name)
local dig,i,x static con initial { # construct mapping x[i] => i all else . x := ["bfpv","cgjkqsxz","dt","l","mn","r"] every ( dig := con := "") ||:= repl(i := 1 to *x,*x[i]) do con ||:= x[i] con := map(map(&lcase,con,dig),&lcase,repl(".",*&lcase)) } name := map(name) # lower case name[1] := map(name[1],&lcase,&ucase) # upper case 1st name := map(name,&lcase,con) # map cons every x := !"123456" do while name[find(x||x,name)+:2] := x # kill duplicates while name[upto('.',name)] := "" # kill . return left(name,4,"0")
end</lang>
implements soundex. The above version is an adaptation of that procedure.
Unicon
This Icon solution works in Unicon.
J
Solution <lang j>removeDups =: {.;.1~ (1 , }. ~: }: ) codes =: ;: 'BFPV CGJKQSXZ DT L MN R HW'
soundex =: 3 : 0
if. 0=# k=.toupper y do. '0' return. end. ({.k), ,": ,. 3 {. 0-.~ }. removeDups 7 0:`(I.@:=)`]} , k >:@I.@:(e. &>)"0 _ codes
)</lang> Usage <lang j>names=: 'Lloyd Woolcock Donnell Baragwanath Williams Ashcroft Euler Ellery Gauss Ghosh Hilbert Heilbronn Knuth Kant Ladd Lukasiewicz Lissajous' soundexNames=: 'L300 W422 D540 B625 W452 A226 E460 E460 G200 G200 H416 H416 K530 K530 L300 L222 L222'
soundex &> ;: names
L300 W422 D540 B625 W452 ....</lang> Test <lang j> soundexNames-:(soundex &.>) &. ;: names 1</lang>
Java
<lang java>public static void main(String[] args){
System.out.println(soundex("Soundex")); System.out.println(soundex("Example")); System.out.println(soundex("Sownteks")); System.out.println(soundex("Ekzampul")); }
private static String getCode(char c){
switch(c){ case 'B': case 'F': case 'P': case 'V': return "1"; case 'C': case 'G': case 'J': case 'K': case 'Q': case 'S': case 'X': case 'Z': return "2"; case 'D': case 'T': return "3"; case 'L': return "4"; case 'M': case 'N': return "5"; case 'R': return "6"; default: return ""; }
}
public static String soundex(String s){
String code, previous, soundex; code = s.toUpperCase().charAt(0) + ""; previous = "7"; for(int i = 1;i < s.length();i++){ String current = getCode(s.toUpperCase().charAt(i)); if(current.length() > 0 && !current.equals(previous)){ code = code + current; } previous = current; } soundex = (code + "0000").substring(0, 4); return soundex;
}</lang> Output:
S532 E251 S532 E251
JavaScript
<lang javascript>var soundex = function (s) {
var a = s .substring(1, s.length) .toLowerCase() .split(), r = , codes = { a: , e: , i: , o: , u: , b: 1, f: 1, p: 1, v: 1, c: 2, g: 2, j: 2, k: 2, q: 2, s: 2, x: 2, z: 2, d: 3, t: 3, l: 4, m: 5, n: 5, r: 6 };
r = s[0].toUpperCase() + a .filter(function (v, i, a) { return v !== a[i + 1]; }) .map(function (v, i, a) { return codes[v] }).join();
return (r + '000').slice(0, 4);
};</lang>
OCaml
Here is an implementation:
<lang ocaml>let c2d = function
| 'B' | 'F' | 'P' | 'V' -> "1" | 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' -> "2" | 'D' | 'T' -> "3" | 'L' -> "4" | 'M' | 'N' -> "5" | 'R' -> "6" | _ -> ""
let rec dbl acc = function
| [] -> (List.rev acc) | [c] -> List.rev(c::acc) | c1::(c2::_ as tl) -> if c1 = c2 then dbl acc tl else dbl (c1::acc) tl
let pad s =
match String.length s with | 0 -> s ^ "000" | 1 -> s ^ "00" | 2 -> s ^ "0" | 3 -> s | _ -> String.sub s 0 3
let soundex_aux rem =
pad(String.concat "" (dbl [] (List.map c2d rem)))
let soundex s =
let s = String.uppercase s in let cl = ref [] in String.iter (fun c -> cl := c :: !cl) s; match dbl [] (List.rev !cl) with | c::rem -> (String.make 1 c) ^ (soundex_aux rem) | [] -> invalid_arg "soundex"</lang>
Test our implementation:
<lang ocaml>let tests = [
"Soundex", "S532"; "Example", "E251"; "Sownteks", "S532"; "Ekzampul", "E251"; "Euler", "E460"; "Gauss", "G200"; "Hilbert", "H416"; "Knuth", "K530"; "Lloyd", "L300"; "Lukasiewicz", "L222"; "Ellery", "E460"; "Ghosh", "G200"; "Heilbronn", "H416"; "Kant", "K530"; "Ladd", "L300"; "Lissajous", "L222"; "Wheaton", "W350"; "Ashcraft", "A226"; "Burroughs", "B622"; "Burrows", "B620"; "O'Hara", "O600"; ]
let () =
print_endline " Word \t Code Found Status"; List.iter (fun (word, code1) -> let code2 = soundex word in let status = if code1 = code2 then "OK " else "Arg" in Printf.printf " \"%s\" \t %s %s %s\n" word code1 code2 status ) tests</lang>
This test outputs:
Word Code Found Status "Soundex" S532 S532 OK "Example" E251 E251 OK "Sownteks" S532 S532 OK "Ekzampul" E251 E251 OK "Euler" E460 E460 OK "Gauss" G200 G200 OK "Hilbert" H416 H416 OK "Knuth" K530 K530 OK "Lloyd" L300 L300 OK "Lukasiewicz" L222 L222 OK "Ellery" E460 E460 OK "Ghosh" G200 G200 OK "Heilbronn" H416 H416 OK "Kant" K530 K530 OK "Ladd" L300 L300 OK "Lissajous" L222 L222 OK "Wheaton" W350 W350 OK "Ashcraft" A226 A226 OK "Burroughs" B622 B622 OK "Burrows" B620 B620 OK "O'Hara" O600 O600 OK
See Soundex/OCaml for a version that can switch the language (English, French...) with a type which definition is hidden in the interface.
Perl
The Text::Soundex core module supports various soundex algorithms. <lang perl>use Text::Soundex; print soundex("Soundex"), "\n"; # S532 print soundex("Example"), "\n"; # E251 print soundex("Sownteks"), "\n"; # S532 print soundex("Ekzampul"), "\n"; # E251</lang>
PHP
PHP already has a built-in soundex() function: <lang php><?php echo soundex("Soundex"), "\n"; // S532 echo soundex("Example"), "\n"; // E251 echo soundex("Sownteks"), "\n"; // S532 echo soundex("Ekzampul"), "\n"; // E251 ?></lang>
PicoLisp
<lang PicoLisp>(de soundex (Str)
(pack (pad -4 (cons (uppc (char (char Str))) (let Last NIL (extract '((C) (and (setq C (case (uppc C) (`(chop "BFPV") "1") (`(chop "CGJKQSXZ") "2") (("D" "T") "3") ("L" "4") (("M" "N") "5") ("R" "6") ) ) (<> Last C) (setq Last C) ) ) (head 3 (cdr (chop Str))) ) ) ) ) ) )</lang>
Output:
: (mapcar soundex '("Soundex" "Example" "Sownteks" "Ekzampul")) -> ("S500" "E250" "S500" "E200")
PureBasic
<lang PureBasic>Procedure.s getCode(c.s)
Protected getCode.s = "" If FindString("BFPV", c ,1) : getCode = "1" : EndIf If FindString("CGJKQSXZ", c ,1) : getCode = "2" : EndIf If FindString("DT", c ,1) : getCode = "3" : EndIf If "L" = c : getCode = "4" : EndIf If FindString("MN", c ,1) : getCode = "5" : EndIf If "R" = c : getCode = "6" : EndIf If FindString("HW", c ,1) : getCode = "." : EndIf ProcedureReturn getCode
EndProcedure
Procedure.s soundex(word.s)
Protected.s previous.s = "" , code.s , current , soundex Protected.i i word = UCase(word) code = Mid(word,1,1) previous = "" For i = 2 To (Len(word) + 1) current = getCode(Mid(word, i, 1)) If current = "." : Continue : EndIf If Len(current) > 0 And current <> previous code + current EndIf previous = current If Len(code) = 4 Break EndIf Next If Len(code) < 4 code = LSet(code, 4,"0") EndIf ProcedureReturn code
EndProcedure
OpenConsole()
PrintN (soundex("Lukasiewicz")) PrintN("Press any key to exit"): Repeat: Until Inkey() <> ""</lang>
Python
<lang python>from itertools import groupby
def soundex(word):
codes = ("bfpv","cgjkqsxz", "dt", "l", "mn", "r") soundDict = dict((ch, str(ix+1)) for ix,cod in enumerate(codes) for ch in cod) cmap2 = lambda kar: soundDict.get(kar, '9') sdx = .join(cmap2(kar) for kar in word.lower()) sdx2 = word[0].upper() + .join(k for k,g in list(groupby(sdx))[1:] if k!='9') sdx3 = sdx2[0:4].ljust(4,'0') return sdx3
</lang> Example Output <lang Python>>>>print soundex("soundex") S532 >>>print soundex("example") E251 >>>print soundex("ciondecks") C532 >>>print soundex("ekzampul") E251</lang>
Ruby
Courtesy http://snippets.dzone.com/posts/show/4530 <lang ruby>class String
SoundexChars = 'BFPVCGJKQSXZDTLMNR' SoundexNums = '111122222222334556' SoundexCharsEx = '^' + SoundexChars SoundexCharsDel = '^A-Z'
# desc: http://en.wikipedia.org/wiki/Soundex def soundex(census = true) str = self.upcase.delete(SoundexCharsDel) str[0,1] + str[1..-1].delete(SoundexCharsEx). tr(SoundexChars, SoundexNums). squeeze[0 .. (census ? 2 : -1)]. ljust(3, '0') rescue end
def sounds_like(other) self.soundex == other.soundex end
end
%w(Soundex Sownteks Example Ekzampul foo bar).each_slice(2) do |word1, word2|
[word1, word2].each {|word| puts '%-8s -> %s' % [word, word.soundex]}
print "'#{word1}' " print word1.sounds_like(word2) ? "sounds" : "does not sound" print " like '#{word2}'\n"
end</lang>
Soundex -> S532 Sownteks -> S532 'Soundex' sounds like 'Sownteks' Example -> E251 Ekzampul -> E251 'Example' sounds like 'Ekzampul' foo -> F000 bar -> B600 'foo' does not sound like 'bar'
Scala
<lang scala>def soundex(s:String)={
var code=s.head.toUpper.toString var previous=getCode(code.head) for(ch <- s.drop(1); current=getCode(ch.toUpper)){ if (!current.isEmpty && current!=previous) code+=current previous=current } code+="0000" code.slice(0,4)
}
def getCode(c:Char)={
val code=Map("1"->List('B','F','P','V'), "2"->List('C','G','J','K','Q','S','X','Z'), "3"->List('D', 'T'), "4"->List('L'), "5"->List('M', 'N'), "6"->List('R'))
code.find(_._2.exists(_==c)) match { case Some((k,_)) => k case _ => "" }
}</lang>
<lang scala>def main(args: Array[String]): Unit = {
val tests=Map( "Soundex" -> "S532", "Euler" -> "E460", "Gauss" -> "G200", "Hilbert" -> "H416", "Knuth" -> "K530", "Lloyd" -> "L300", "Lukasiewicz" -> "L222", "Ellery" -> "E460", "Ghosh" -> "G200", "Heilbronn" -> "H416", "Kant" -> "K530", "Ladd" -> "L300", "Lissajous" -> "L222", "Wheaton" -> "W350", "Ashcraft" -> "A226", "Burroughs" -> "B622", "Burrows" -> "B620", "O'Hara" -> "O600")
tests.foreach{(v)=> val code=soundex(v._1) val status=if (code==v._2) "OK" else "ERROR" printf("Name: %-20s Code: %s Found: %s - %s\n", v._1, v._2, code, status) }
}</lang>
Scheme
This implements American Soundex as described at [1].
<lang scheme>;; The American Soundex System
- The soundex code consist of the first letter of the name followed
- by three digits. These three digits are determined by dropping the
- letters a, e, i, o, u, h, w and y and adding three digits from the
- remaining letters of the name according to the table below. There
- are only two additional rules. (1) If two or more consecutive
- letters have the same code, they are coded as one letter. (2) If
- there are an insufficient numbers of letters to make the three
- digits, the remaining digits are set to zero.
- Soundex Table
- 1 b,f,p,v
- 2 c,g,j,k,q,s,x,z
- 3 d, t
- 4 l
- 5 m, n
- 6 r
- Examples
- Miller M460
- Peterson P362
- Peters P362
- Auerbach A612
- Uhrbach U612
- Moskowitz M232
- Moskovitz M213
(define (char->soundex c)
(case (char-upcase c) ((#\B #\F #\P #\V) #\1) ((#\C #\G #\J #\K #\Q #\S #\X #\Z) #\2) ((#\D #\T) #\3) ((#\L) #\4) ((#\M #\N) #\5) ((#\R) #\6) (else #\nul)))
(define (collapse-dups lst)
(if (= (length lst) 1) lst (if (equal? (car lst) (cadr lst))
(collapse-dups (cdr lst)) (cons (car lst) (collapse-dups (cdr lst))))))
(define (remove-nul lst)
(filter (lambda (c)
(not (equal? c #\nul))) lst))
(define (force-len n lst)
(cond ((= n 0) '())
((null? lst) (force-len n (list #\0))) (else (cons (car lst) (force-len (- n 1) (cdr lst))))))
(define (soundex s)
(let ((slst (string->list s))) (force-len 4 (cons (char-upcase (car slst))
(remove-nul (collapse-dups (map char->soundex (cdr slst))))))))
(soundex "miller") (soundex "Peterson") (soundex "PETERS") (soundex "auerbach") (soundex "Uhrbach") (soundex "Moskowitz") (soundex "Moskovitz")</lang>
Sample Output
> "M460" > "P362" > "P362" > "A612" > "U612" > "M232" > "M213"
SNOBOL4
US National Archives (NARA) Soundex. Includes the "HW" rule omitted by Knuth and many other implementations.
<lang SNOBOL4>* # Soundex coding
- # ABCDEFGHIJKLMNOPQRSTUVWXYZ
- # 01230127022455012623017202
define('soundex(str)init,ch') :(soundex_end)
soundex sdxmap = '01230127022455012623017202'
str = replace(str,&lcase,&ucase)
sdx1 str notany(&ucase) = :s(sdx1)
init = substr(str,1,1) str = replace(str,&ucase,sdxmap)
sdx2 str len(1) $ ch span(*ch) = ch :s(sdx2)
- # Omit next line for Knuth's simple Soundex
sdx3 str len(1) $ ch ('7' *ch) = ch :s(sdx3)
str len(1) = init
sdx4 str any('07') = :s(sdx4)
str = substr(str,1,4) str = lt(size(str),4) str dupl('0',4 - size(str)) soundex = str :(return)
soundex_end
- # Test and display
test = " Washington Lee Gutierrez Pfister Jackson Tymczak"
+ " Ashcroft Swhgler O'Connor Rhys-Davies" loop test span(' ') break(' ') . name = :f(end)
output = soundex(name) ' ' name :(loop)
end</lang>
Output:
W252 Washington L000 Lee G362 Gutierrez P236 Pfister J250 Jackson T522 Tymczak A261 Ashcroft S460 Swhgler O256 O'Connor
Tcl
contains an implementation of Knuth's soundex algorithm in the soundex
package.
<lang tcl>package require soundex
foreach string {"Soundex" "Example" "Sownteks" "Ekzampul"} {
set soundexCode [soundex::knuth $string] puts "\"$string\" has code $soundexCode"
}</lang> Which produces this output:
"Soundex" has code S532 "Example" has code E251 "Sownteks" has code S532 "Ekzampul" has code E251
VBScript
<lang vbscript>Function getCode(c)
Select Case c Case "B", "F", "P", "V" getCode = "1" Case "C", "G", "J", "K", "Q", "S", "X", "Z" getCode = "2" Case "D", "T" getCode = "3" Case "L" getCode = "4" Case "M", "N" getCode = "5" Case "R" getCode = "6" End Select
End Function
Function soundex(s)
Dim code, previous code = UCase(Mid(s, 1, 1)) previous = 7 For i = 2 to (Len(s) + 1) current = getCode(UCase(Mid(s, i, 1))) If Len(current) > 0 And current <> previous Then code = code & current End If previous = current Next soundex = Mid(code, 1, 4) If Len(code) < 4 Then soundex = soundex & String(4 - Len(code), "0") End If
End Function</lang>