Jump to content

Knuth-Morris-Pratt string search: Difference between revisions

(Added Wren)
Line 129:
Found <and> at: (one-based indices) [102, 129, 172]
Found <alfalfa> at: (one-based indices) [34, 88]
</pre>
 
=={{header|Phix}}==
<!--<lang Phix>(phixonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\KnuthMorrisPratt.exw
-- =================================
--
-- based on https://www-igm.univ-mlv.fr/~lecroq/string/node8.html#SECTION0080
--</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">preKmp</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">kmpNext</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">m</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">pat</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">'\0'</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;"><=</span> <span style="color: #000000;">m</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">></span> <span style="color: #000000;">0</span> <span style="color: #008080;">and</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">!=</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">else</span>
<span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">kmpNext</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">case_insensitive</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">pins</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"`%s` in `%s`"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">shorten</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"characters"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">10</span><span style="color: #0000FF;">)})</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">case_insensitive</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">({</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">/* Preprocessing */</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">kmpNext</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">preKmp</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">/* Searching */</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">cc</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;"><</span> <span style="color: #000000;">n</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">></span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">cc</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">>=</span> <span style="color: #000000;">m</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000080;font-style:italic;">/* Output */</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">ccs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"(%d character comparisons)"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">cc</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">many</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">ordinant</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">))</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Found %s %s at %v %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pins</span><span style="color: #0000FF;">,</span><span style="color: #000000;">many</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ccs</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">else</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"No %s %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pins</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ccs</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"GCAGAGAG"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GCATCGCAGAGAGTATACAGTACG"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TCTA"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GCTAGCTCTACGAGTCTA"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TAATAAA"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GGCTATAATGCGTA"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"word"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"there would have been a time for such a word"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"needle"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"needle need noodle needle"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">book</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesley"</span><span style="color: #0000FF;">&</span>
<span style="color: #008000;">"DKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeand"</span><span style="color: #0000FF;">&</span>
<span style="color: #008000;">"assemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented"</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"put"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">book</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"and"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">book</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">farm</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with "</span><span style="color: #0000FF;">&</span>
<span style="color: #008000;">"bales of all that alfalfa exchanged for milk."</span>
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"alfalfa"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">farm</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">--KMP("aLfAlfa",farm) -- none
--KMP("aLfAlfa",farm,true) -- as -2</span>
<!--</lang>-->
{{out}}
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br>
Also higher than those on the above (lecroq) link, probably because this carries on for all matches.
<pre>
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons)
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons)
Found `word` in `there woul...uch a word (44 characters)` once at {41} (45 character comparisons)
Found `needle` in `needle need noodle needle` twice at {1,20} (27 character comparisons)
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons)
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons)
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)
</pre>
 
7,813

edits

Cookies help us deliver our services. By using our services, you agree to our use of cookies.