Textonyms: Difference between revisions
Content added Content deleted
m (→{{header|Phix}}: added syntax colouring, use of unix_dict(), multiple max_idx, and longest words) |
|||
Line 2,238: | Line 2,238: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
<lang Phix> |
<!--<lang Phix>(phixonline)--> |
||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
|||
digit['a'..'c'] = '2' |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">digit</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">255</span><span style="color: #0000FF;">)</span> |
|||
digit['d'..'f'] = '3' |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'c'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'2'</span> |
|||
digit['g'..'i'] = '4' |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'d'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'f'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'3'</span> |
|||
digit['j'..'l'] = '5' |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'g'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'i'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'4'</span> |
|||
digit['m'..'o'] = '6' |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'j'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'l'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'5'</span> |
|||
digit['p'..'s'] = '7' |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'m'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'6'</span> |
|||
digit['t'..'v'] = '8' |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'p'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'s'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'7'</span> |
|||
digit['w'..'z'] = '9' |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'t'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'v'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'8'</span> |
|||
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'w'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'9'</span> |
|||
function digits(string word) |
|||
for i=1 to length(word) do |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">digits</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">)</span> |
|||
integer ch = word[i] |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">keycode</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">))</span> |
|||
if ch<'a' or ch>'z' then return "" end if |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
word[i] = digit[ch] |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
end for |
|||
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span> |
|||
return word |
|||
<span style="color: #000000;">keycode</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">]</span> |
|||
end function |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">keycode</span><span style="color: #0000FF;">,</span><span style="color: #000000;">word</span><span style="color: #0000FF;">}</span> |
|||
sequence words = {}, last="" |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
object word, keycode |
|||
integer keycode_count = 0, textonyms = 0, |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">az</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)<=</span><span style="color: #008000;">'z'</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
this_count = 0, max_count = 0, max_idx |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">apply</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">filter</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">unix_dict</span><span style="color: #0000FF;">(),</span><span style="color: #000000;">az</span><span style="color: #0000FF;">),</span><span style="color: #000000;">digits</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">max_idx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">long_idx</span> |
|||
integer fn = open("demo\\unixdict.txt","r") |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">keycode</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">last</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span> |
|||
while 1 do |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">keycode_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">textonyms</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> |
|||
word = trim(gets(fn)) |
|||
<span style="color: #000000;">this_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">max_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">longest</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
if atom(word) then exit end if |
|||
keycode = digits(word) |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"There are %d words in unixdict.txt which can be represented by the digit key mapping.\n"</span><span style="color: #0000FF;">,{</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)})</span> |
|||
if length(keycode) then |
|||
words = append(words, {keycode, word}) |
|||
<span style="color: #000080;font-style:italic;">-- Sort by keycode: while words are ordered we get |
|||
end if |
|||
-- eg {"a","ab","b","ba"} -> {"2","22","2","22"}</span> |
|||
end while |
|||
<span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">))</span> |
|||
close(fn) |
|||
printf(1,"There are %d words in unixdict.txt which can be represented by the digit key mapping.\n",{length(words)}) |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">keycode</span><span style="color: #0000FF;">,</span><span style="color: #000000;">word</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
words = sort(words) |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">keycode</span><span style="color: #0000FF;">=</span><span style="color: #000000;">last</span> <span style="color: #008080;">then</span> |
|||
for i=1 to length(words) do |
|||
<span style="color: #000000;">textonyms</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">this_count</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> |
|||
{keycode,word} = words[i] |
|||
<span style="color: #000000;">this_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
if keycode=last then |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">this_count</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">max_count</span> <span style="color: #008080;">then</span> |
|||
textonyms += this_count=1 |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">this_count</span><span style="color: #0000FF;">></span><span style="color: #000000;">max_count</span> <span style="color: #008080;">then</span> |
|||
this_count += 1 |
|||
<span style="color: #000000;">max_idx</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">}</span> |
|||
if this_count>max_count then |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #000000;">max_idx</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">i</span> |
|||
max_idx = i |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
end if |
|||
<span style="color: #000000;">max_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">this_count</span> |
|||
else |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
keycode_count += 1 |
|||
<span style="color: #008080;">else</span> |
|||
last = keycode |
|||
<span style="color: #000000;">keycode_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
this_count = 1 |
|||
<span style="color: #000000;">last</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">keycode</span> |
|||
end if |
|||
<span style="color: #000000;">this_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> |
|||
end for |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)>=</span><span style="color: #000000;">longest</span> <span style="color: #008080;">then</span> |
|||
printf(1,"They require %d digit combinations to represent them.\n",{keycode_count}) |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)></span><span style="color: #000000;">longest</span> <span style="color: #008080;">then</span> |
|||
printf(1,"%d digit combinations represent Textonyms.\n",{textonyms}) |
|||
<span style="color: #000000;">long_idx</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">}</span> |
|||
<span style="color: #008080;">else</span> |
|||
sequence dups = {} |
|||
<span style="color: #000000;">long_idx</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">i</span> |
|||
for i=max_idx-max_count+1 to max_idx do |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
dups = append(dups,words[i][2]) |
|||
<span style="color: #000000;">longest</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)</span> |
|||
end for |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
printf(1,"The maximum number of textonyms for a particular digit key mapping is %d:\n",{max_count}) |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"They require %d digit combinations to represent them.\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">keycode_count</span><span style="color: #0000FF;">})</span> |
|||
printf(1," %s encodes %s\n",{words[max_idx][1],join(dups,"/")})</lang> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%d digit combinations represent Textonyms.\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">textonyms</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The maximum number of textonyms for a particular digit key mapping is %d:\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">max_count</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">max_idx</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">max_idx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">-</span><span style="color: #000000;">max_count</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">dups</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">vslice</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">l</span><span style="color: #0000FF;">..</span><span style="color: #000000;">k</span><span style="color: #0000FF;">],</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span><span style="color: #008000;">"/"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" %s encodes %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">dups</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The longest words are %d characters long\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">longest</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Encodings with this length are:\n"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">long_idx</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" %s encodes %s\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">long_idx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]])</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<!--</lang>--> |
|||
{{Out}} |
{{Out}} |
||
<small>(my unixdict.txt seems to have grown by 4 entries sometime in the past couple of years...)</small> |
|||
<pre> |
<pre> |
||
There are |
There are 24981 words in unixdict.txt which can be represented by the digit key mapping. |
||
They require |
They require 22906 digit combinations to represent them. |
||
1473 digit combinations represent Textonyms. |
1473 digit combinations represent Textonyms. |
||
The maximum number of textonyms for a particular digit key mapping is 9: |
The maximum number of textonyms for a particular digit key mapping is 9: |
||
269 encodes amy/any/bmw/bow/box/boy/cow/cox/coy |
269 encodes amy/any/bmw/bow/box/boy/cow/cox/coy |
||
729 encodes paw/pax/pay/paz/raw/ray/saw/sax/say |
|||
The longest words are 22 characters long |
|||
Encodings with this length are: |
|||
3532876362374256472749 encodes electroencephalography |
|||
</pre> |
</pre> |
||