Boyer-Moore string search: Difference between revisions
Content added Content deleted
SqrtNegInf (talk | contribs) m (→{{header|Perl}}: off-by-one) |
(Emacs lisp support good suffix heuristic) |
||
Line 13: | Line 13: | ||
<syntaxhighlight lang="lisp"> |
<syntaxhighlight lang="lisp"> |
||
(defun bm_compile_pattern (pattern) |
(defun bm_compile_pattern (pattern) |
||
"Compile the pattern to a right most position map" |
"Compile the pattern to a right most position map" |
||
Line 23: | Line 24: | ||
) |
) |
||
) |
) |
||
;; |
|||
(defun bm_make_suffix_table (text) |
|||
(let ((suffix-table (make-vector (length text) -1)) (textLen (length text)) |
|||
(suffix-found nil) |
|||
) |
|||
(cl-loop for pos from (1- textLen) downto 1 do |
|||
(setq suffix-found nil) |
|||
(cl-loop for ptn from (- textLen 2) downto 0 while (not suffix-found) do |
|||
(let ((start1 pos) (end1 (1- textLen)) |
|||
(start2 (- ptn (- (1- textLen) pos))) (end2 ptn) |
|||
(matched 't) |
|||
) |
|||
(if (< start2 0) (setq start2 0)) |
|||
(cl-loop for idx1 from end1 downto start1 and idx2 from end2 downto start2 while matched do |
|||
(if (/= (elt text idx1) (elt text idx2)) |
|||
(setq matched nil)) |
|||
) |
|||
(when matched |
|||
(aset suffix-table pos start2) |
|||
(setq suffix-found 't) ) |
|||
) |
|||
) |
|||
) |
|||
suffix-table |
|||
) |
|||
⚫ | |||
;; |
|||
;; |
|||
(defun bm_substring_search (pattern text) |
(defun bm_substring_search (pattern text) |
||
"Boyer-Moore string search" |
"Boyer-Moore string search" |
||
Line 30: | Line 58: | ||
(startPos 0) |
(startPos 0) |
||
(result nil) |
(result nil) |
||
(rightMap (bm_compile_pattern pattern |
(rightMap (bm_compile_pattern pattern)) |
||
(suffixTable (bm_make_suffix_table pattern))) |
|||
;; Continue this loop when no result and not exceed the text length |
;; Continue this loop when no result and not exceed the text length |
||
(while (and (not result) (<= (+ startPos patLen) txtLen)) |
(while (and (not result) (<= (+ startPos patLen) txtLen)) |
||
(let ((idx patLen) |
(let ((idx patLen) |
||
(suffixSkip 0) |
|||
(badCharSkip 0) |
|||
(skip 0)) |
(skip 0)) |
||
(while (and (= 0 skip) (<= 0 (setq idx (1- idx)))) |
(while (and (= 0 skip) (<= 0 (setq idx (1- idx)))) |
||
(setq suffixSkip 0) |
|||
(setq badCharSkip 0) |
|||
;; skip when the character at position idx is different |
;; skip when the character at position idx is different |
||
(when (/= (elt pattern idx) (elt text (+ startPos idx))) |
(when (/= (elt pattern idx) (elt text (+ startPos idx))) |
||
(when (< idx (1- (length pattern))) |
|||
(setq suffixSkip (aref suffixTable (1+ idx))) ) |
|||
⚫ | |||
;; looking up the right most position in pattern |
;; looking up the right most position in pattern |
||
⚫ | |||
(setq skip (max 1 badCharSkip suffixSkip)) |
|||
) |
) |
||
) |
) |
||
Line 52: | Line 89: | ||
) |
) |
||
) |
) |
||
;; |
|||
(let ((pattern "alfalfa") |
(let ((pattern "alfalfa") |
||
(full_text "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.")) |
(full_text "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.")) |
||
⚫ | |||
⚫ | |||
⚫ | |||
) |
) |
||
</syntaxhighlight> |
</syntaxhighlight> |
||
Line 64: | Line 100: | ||
<pre>33</pre> |
<pre>33</pre> |
||
=={{header|J}}== |
=={{header|J}}== |
||
{{trans|Emacs Lisp}} |
{{trans|Emacs Lisp}} |