Text completion: Difference between revisions

m
(Text completion en FreeBASIC)
m (→‎{{header|Wren}}: Minor tidy)
 
(6 intermediate revisions by 4 users not shown)
Line 8:
[https://github.com/dwyl/english-words Github Repo]<br>
[https://raw.githubusercontent.com/dwyl/english-words/master/words.txt Raw Text, Save as .txt file]<br>
[https[wp://en.wikipedia.org/wiki/Hamming_distance |Hamming Distance]]<br>
[https[wp://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance |Jaro-Winkler Distance]]<br>
[https://dev.to/lefebvre/the-soundex-algorithm-5el1 SoundEx Algorithm]<br>
[https[wp://en.wikipedia.org/wiki/Soundex |SoundEx Algorithm Wiki]]<br>
[http://www.catalysoft.com/articles/StrikeAMatch.html Dice's Coefficient]<br>
[https[wp://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient |Dice Coefficient Wiki]]
;Possible Output:
<pre>
Line 31:
=={{header|C++}}==
{{trans|Julia}}
<langsyntaxhighlight lang="cpp">#include <algorithm>
#include <fstream>
#include <iostream>
Line 99:
}
return EXIT_SUCCESS;
}</langsyntaxhighlight>
 
{{out}}
Line 125:
{{Trans|Wren}}
Using '''unixdict.txt'''.
<syntaxhighlight lang="delphi">
<lang Delphi>
program Text_Completion;
 
Line 236:
end.
 
</syntaxhighlight>
</lang>
 
{{out}}
Line 255:
{{trans|Julia}}
{{works with|Factor|0.99 2020-07-03}}
<langsyntaxhighlight lang="factor">USING: formatting fry http.client io kernel lcs literals math
math.ranges namespaces prettyprint.config sequences splitting ;
 
Line 273:
dup [ similarity ] [ drop word ] [ word swap lev-dist-of ] tri
"Words at Levenshtein distance of %d (%.1f%% similarity) from %u:\n%u\n\n" printf
] each</langsyntaxhighlight>
{{out}}
<pre>
Line 292:
=={{header|FreeBASIC}}==
This uses '[http://wiki.puzzlers.org/pub/wordlists/unixdict.txt unixdict]' and the [https://www.rosettacode.org/wiki/Levenshtein_distance#FreeBASIC Levenshtein distance] algorithm to test for similarity.
<langsyntaxhighlight lang="freebasic">#define MIN(a, b) iif((a) < (b), (a), (b))
 
Dim As String palabra = "complition"
Line 323:
Print
Next n
Sleep</langsyntaxhighlight>
{{out}}
<pre>Input word: complition
Line 339:
=={{header|Go}}==
{{trans|Wren}}
<langsyntaxhighlight lang="go">package main
 
import (
Line 402:
fmt.Println()
}
}</langsyntaxhighlight>
 
{{out}}
Line 420:
=={{header|Java}}==
[https://github.com/dwyl/english-words Github Repo Uses dependencies given].
<syntaxhighlight lang="java">
<lang Java>
import java.io.File;
import java.io.IOException;
Line 483:
}
}
</syntaxhighlight>
</lang>
;Output
<pre>
Line 512:
 
The "debug" statements for showing the number of words under consideration for best Levenshtein matches is retained.
<syntaxhighlight lang="jq">
<lang jq>
include "levenshtein-distance" {search: "."}; # https://rosettacode.org/wiki/Levenshtein_distance#jq
 
Line 546:
 
task
</syntaxhighlight>
</lang>
{{out}}
Invocation: jq -nR -f text-completion.jq unixdict.txt
Line 614:
=={{header|Julia}}==
See https://en.wikipedia.org/wiki/Levenshtein_distance, the number of one character edits to obtain one word from another.
<langsyntaxhighlight lang="julia">using StringDistances
 
const fname = download("https://www.mit.edu/~ecprice/wordlist.10000", "wordlist10000.txt")
Line 627:
levdistof(n, wrd), "\n")
end
</langsyntaxhighlight>{{out}}
<pre>
Words at Levenshtein distance of 1 (90% similarity) from "complition":
Line 643:
 
=={{header|Mathematica}}==
<langsyntaxhighlight Mathematicalang="mathematica">Module[
{word = "complition"},
Map[
Line 649:
SpellingCorrectionList[word]
]
] // Grid</langsyntaxhighlight>
{{out}}
<pre>completion 90%
Line 663:
We use the function <code>editDistance</code> from the <code>std/editdistance</code> module to get the Levenshtein distance (computed by considering in Unicode codepoints).
 
<langsyntaxhighlight Nimlang="nim">import std/editdistance, sequtils, strformat, strutils
 
let search = "complition"
Line 679:
echo &"Words which are {similarity:4.1f}% similar:"
echo lev[i].join(" ")
echo()</langsyntaxhighlight>
 
{{out}}
Line 695:
=={{header|Perl}}==
Inspired by Raku Sorenson-Dice implementation (doesn't handle Unicode, but module <code>Text::Dice</code> can).
<langsyntaxhighlight lang="perl">use strict;
use warnings;
use feature 'say';
use Path::Tiny;
use List::Util '<uniq head'>;
 
# sub bi_gram { (lc shift) =~ /(?<=\K.)./g } ## doesn't work in recent versions of Perl
 
sub bi_gram {
my $line = lc shift;
uniq map { substr $line,$_,2 } 0 .. length($line)-2;
}
 
sub score {
Line 728 ⟶ 733:
push @ranked, sprintf "%.3f $_", $scored{$_} for sort { $scored{$b} <=> $scored{$a} } keys %scored;
say "\n$word:\n" . join("\n", head 10, @ranked);
}</langsyntaxhighlight>
{{out}}
<pre>complition:
Line 752 ⟶ 757:
=={{header|Phix}}==
uses levenshtein() from [[Levenshtein_distance#Phix]] (reproduced below for your convenience) and the standard unix_dict().
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">function</span> <span style="color: #000000;">levenshtein</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">),</span>
Line 780 ⟶ 785:
<span style="color: #0000FF;">{</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">100</span><span style="color: #0000FF;">-</span><span style="color: #7060A8;">round</span><span style="color: #0000FF;">(</span><span style="color: #000000;">100</span><span style="color: #0000FF;">*</span><span style="color: #000000;">n</span><span style="color: #0000FF;">/</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)),</span><span style="color: #000000;">word</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">join_by</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">filter</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ln</span><span style="color: #0000FF;">,</span><span style="color: #000000;">n</span><span style="color: #0000FF;">),</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">6</span><span style="color: #0000FF;">)})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</langsyntaxhighlight>-->
{{out}}
(matches Delphi/Go/Wren)
Line 811 ⟶ 816:
===Hamming distance===
{{Trans|Java}}
<syntaxhighlight lang="raku" perl6line>sub MAIN ( Str $user_word = 'complition', Str $filename = 'words.txt' ) {
my @s1 = $user_word.comb;
my @listed = gather for $filename.IO.lines -> $line {
Line 829 ⟶ 834:
 
say "{.[0].fmt('%.2f')}% {.[1]}" for @listed;
}</langsyntaxhighlight>
{{out}}
<pre>80.00% compaction
Line 842 ⟶ 847:
Using unixdict.txt from www.puzzlers.org
 
<syntaxhighlight lang="raku" perl6line>sub sorenson ($phrase, %hash) {
my $match = bigram $phrase;
%hash.race.map: { [(2 * ($match ∩ .value) / ($match + .value)).round(.001), .key] }
Line 856 ⟶ 861:
say "\n$w:";
.say for sorenson($w, %hash).grep(*.[0] >= .55).sort({-.[0],~.[1]}).head(10);
}</langsyntaxhighlight>
 
{{out}}
Line 914 ⟶ 919:
 
No attempt was made to change (by any method) three letters or more.
<langsyntaxhighlight lang="rexx">/*REXX pgm finds (dictionary) words which can be found in a specified word wheel (grid).*/
parse arg what iFID . /*obtain optional arguments from the CL*/
if what==''|what=="," then what= 'complition' /*Not specified? Then use the default.*/
Line 977 ⟶ 982:
if @.z\==. then $= $ z; if r==1 then call recur
end /*k*/
end /*j*/; return space($)</langsyntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
Line 1,044 ⟶ 1,049:
(80%) compaction
(80%) complexion
</langpre>
 
=={{header|V (Vlang)}}==
{{trans|Go}}
<syntaxhighlight lang="v (vlang)">import os
 
fn levenshtein(s string, t string) int {
mut d := [][]int{len:s.len+1, init: []int{len:t.len+1}}
for i,_ in d {
d[i][0] = i
}
for j in d[0] {
d[0][j] = j
}
for j := 1; j <= t.len; j++ {
for i := 1; i <= s.len; i++ {
if s[i-1] == t[j-1] {
d[i][j] = d[i-1][j-1]
} else {
mut min := d[i-1][j]
if d[i][j-1] < min {
min = d[i][j-1]
}
if d[i-1][j-1] < min {
min = d[i-1][j-1]
}
d[i][j] = min + 1
}
}
}
return d[s.len][t.len]
}
fn main() {
search := "complition"
filename := "unixdict.txt"
words := os.read_lines(filename) or { panic('FAILED to read file: $filename')}
mut lev := [][]string{len:4}
for word in words {
s := word
ld := levenshtein(search, s)
if ld < 4 {
lev[ld] << s
}
}
println("Input word: $search\n")
for i in 1..4 {
length := f64(search.len)
similarity := (length - f64(i)) * 100 / length
println("Words which are ${similarity:4.1f}% similar:", )
println(lev[i])
println('')
}
}}</syntaxhighlight>
 
{{out}}
<pre>
Input word: complition
 
Words which are 90.0% similar:
['completion', 'incompletion']
 
Words which are 80.0% similar:
['coalition', 'competition', 'compilation', 'complexion', 'composition', 'decomposition']
 
Words which are 70.0% similar:
['abolition', 'cognition', 'collision', 'combustion', 'commotion', 'companion', 'compassion', 'complain', 'complicity', 'compton', 'compulsion', 'compunction', 'computation', 'condition', 'contrition', 'demolition', 'locomotion', 'postcondition', 'volition']
</pre>
 
Line 1,049 ⟶ 1,121:
{{libheader|Wren-fmt}}
This uses 'unixdict' and the Levenshtein distance algorithm to test for similarity.
<langsyntaxhighlight ecmascriptlang="wren">import "io" for File
import "./fmt" for Fmt
 
var levenshtein = Fn.new { |s, t|
Line 1,093 ⟶ 1,165:
System.print(lev[i])
System.print()
} </langsyntaxhighlight>
 
{{out}}
9,476

edits