Ukkonen’s suffix tree construction: Difference between revisions

m
(Added Wren)
m (→‎{{header|Wren}}: Minor tidy)
 
(7 intermediate revisions by 5 users not shown)
Line 1:
[[Category:String algorithms]]
{{draft task}}
Suffix Trees are very useful in numerous string processing and computational biology problems.
 
Line 22:
 
As the task doesn't say whether overlapping sequences are to be counted, I've assumed that they are as this is what the algorithm naturally produces.
<langsyntaxhighlight lang="go">package main
 
import (
Line 272:
fmt.Printf(" (this took %s)\n\n", elapsed)
}
}</langsyntaxhighlight>
 
{{out}}
Line 296:
first 100000 d.p. of Pi is: 041021944
(this took 599.770281ms)
</pre>
 
=={{header|Java}}==
<syntaxhighlight lang="java">
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
 
public final class UkkonenSuffixTree {
 
public static void main(String[] aArgs) throws IOException {
List<Integer> limits = List.of( 1_000, 10_000, 100_000 );
for ( int limit : limits ) {
String contents = Files.readString(Path.of("piDigits.txt"));
String piDigits = contents.substring(0, limit + 1);
final long start = System.currentTimeMillis();
SuffixTree tree = new SuffixTree(piDigits);
Map<String, Set<Integer>> substrings = tree.getLongestRepeatedSubstrings();
final long finish = System.currentTimeMillis();
System.out.println("First " + limit + " digits of pi has longest repeated characters:");
for ( String substring : substrings.keySet() ) {
System.out.print(" '" + substring + "' starting at index ");
for ( Iterator<Integer> iterator = substrings.get(substring).iterator(); iterator.hasNext(); ) {
System.out.print(iterator.next());
if ( iterator.hasNext() ) {
System.out.print(" and ");
}
}
System.out.println();
}
System.out.println("Time taken: " + ( finish - start ) + " milliseconds." + System.lineSeparator());
}
System.out.println("The timings show that the implementation has approximately linear performance.");
}
 
private final static class SuffixTree {
public SuffixTree(String aWord) {
text = Arrays.copyOfRange(aWord.toCharArray(), 0, aWord.length() + 1);
text[aWord.length()] = '\uF123'; // Terminal character
nodes = new Node[2 * aWord.length() + 2];
root = newNode(UNDEFINED, UNDEFINED);
activeNode = root;
for ( char character : text ) {
extendSuffixTree(character);
}
}
public Map<String, Set<Integer>> getLongestRepeatedSubstrings() {
List<Integer> indexes = doTraversal();
String word = String.valueOf(text).substring(0, text.length - 1);
Map<String, Set<Integer>> result = new HashMap<String, Set<Integer>>();
if ( indexes.get(0) > 0 ) {
for ( int i = 1; i < indexes.size(); i++ ) {
String substring = word.substring(indexes.get(i), indexes.get(i) + indexes.get(0));
result.computeIfAbsent(substring, k -> new TreeSet<Integer>()).add(indexes.get(i));
}
}
return result;
}
// PRIVATE //
private void extendSuffixTree(char aCharacter) {
needParentLink = UNDEFINED;
remainder++;
while ( remainder > 0 ) {
if ( activeLength == 0 ) {
activeEdge = textIndex;
}
if ( ! nodes[activeNode].children.containsKey(text[activeEdge]) ) {
final int leaf = newNode(textIndex, LEAF_NODE);
nodes[activeNode].children.put(text[activeEdge], leaf);
addSuffixLink(activeNode);
} else {
final int next = nodes[activeNode].children.get(text[activeEdge]);
if ( walkDown(next) ) {
continue;
}
if ( text[nodes[next].start + activeLength] == aCharacter ) {
activeLength++;
addSuffixLink(activeNode);
break;
}
final int split = newNode(nodes[next].start, nodes[next].start + activeLength);
nodes[activeNode].children.put(text[activeEdge], split);
final int leaf = newNode(textIndex, LEAF_NODE);
nodes[split].children.put(aCharacter, leaf);
nodes[next].start += activeLength;
nodes[split].children.put(text[nodes[next].start], next);
addSuffixLink(split);
}
remainder--;
if ( activeNode == root && activeLength > 0 ) {
activeLength--;
activeEdge = textIndex - remainder + 1;
} else {
activeNode = ( nodes[activeNode].parentLink > 0 ) ? nodes[activeNode].parentLink : root;
}
}
textIndex++;
}
private boolean walkDown(int aNode) {
if ( activeLength >= nodes[aNode].edgeLength() ) {
activeEdge += nodes[aNode].edgeLength();
activeLength -= nodes[aNode].edgeLength();
activeNode = aNode;
return true;
}
return false;
}
private void addSuffixLink(int aNode) {
if ( needParentLink != UNDEFINED ) {
nodes[needParentLink].parentLink = aNode;
}
needParentLink = aNode;
}
private int newNode(int aStart, int aEnd) {
Node node = new Node(aStart, aEnd);
node.leafIndex = ( aEnd == LEAF_NODE ) ? leafIndexGenerator++ : UNDEFINED;
nodes[currentNode] = node;
return currentNode++;
}
private List<Integer> doTraversal() {
List<Integer> indexes = new ArrayList<Integer>();
indexes.add(UNDEFINED);
return traversal(indexes, nodes[root], 0);
}
private List<Integer> traversal(List<Integer> aIndexes, Node aNode, int aHeight) {
if ( aNode.leafIndex == UNDEFINED ) {
for ( int index : aNode.children.values() ) {
Node child = nodes[index];
traversal(aIndexes, child, aHeight + child.edgeLength());
}
} else if ( aIndexes.get(0) < aHeight - aNode.edgeLength() ) {
aIndexes.clear();
aIndexes.add(aHeight - aNode.edgeLength());
aIndexes.add(aNode.leafIndex);
} else if ( aIndexes.get(0) == aHeight - aNode.edgeLength() ) {
aIndexes.add(aNode.leafIndex);
}
return aIndexes;
}
private final class Node {
public Node(int aStart, int aEnd) {
start = aStart;
end = aEnd;
}
public int edgeLength() {
return Math.min(end, textIndex + 1) - start;
}
private int start, end, parentLink, leafIndex;
private Map<Character, Integer> children = new HashMap<Character, Integer>();
}
private Node[] nodes;
private char[] text;
private int activeNode, activeLength, activeEdge;
private int root, textIndex, currentNode, needParentLink, remainder, leafIndexGenerator;
private static final int UNDEFINED = -1;
private static final int LEAF_NODE = Integer.MAX_VALUE;
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
First 1000 digits of pi has longest repeated characters:
'82534' starting at index 87 and 902
'33446' starting at index 214 and 830
'60943' starting at index 396 and 550
'23846' starting at index 15 and 578
'93751' starting at index 44 and 940
'99999' starting at index 761 and 762
'42019' starting at index 700 and 993
Time taken: 13 milliseconds.
 
First 10000 digits of pi has longest repeated characters:
'7111369' starting at index 3501 and 6114
'8530614' starting at index 4166 and 4600
Time taken: 8 milliseconds.
 
First 100000 digits of pi has longest repeated characters:
'041021944' starting at index 21760 and 75869
'201890888' starting at index 30626 and 33843
'134926158' starting at index 69596 and 86281
Time taken: 100 milliseconds.
 
The timings show that the implementation has approximately linear performance.
</pre>
 
Line 301 ⟶ 532:
{{trans|Go}}
Uses array indices instead of the Go version's node pointers.
<langsyntaxhighlight lang="julia">const oo = typemax(Int)
 
"""The suffix-tree's node."""
Line 479 ⟶ 710:
 
testsuffixtree()
</langsyntaxhighlight>{{out}}
<pre>
Longest Repeated Substring in:
Line 501 ⟶ 732:
first 1000000 d.p. of π: 756130190263
6.008879 seconds (35.25 M allocations: 4.152 GiB, 23.20% gc time)
</pre>
 
=={{header|Nim}}==
{{trans|Julia}}
{{libheader|Nim-Integers}}
<syntaxhighlight lang="Nim">import std/[sequtils, strformat, strutils, tables]
 
const ∞ = int.high
 
type
 
# Suffix-tree node.
Node = ref object
children: Table[char, int]
first: int
last: int
suffixLink: int
suffixIndex: int
 
# Ukkonen suffix-tree.
SuffixTree = object
nodes: seq[Node]
text: string
root: int
position: int
currentNode: int
needSuffixLink: int
remainder: int
activeNode: int
activeLength: int
activeEdge: int
 
 
func newNode(): Node =
Node(first: -1, last: ∞, suffixLink: -1, suffixIndex: -1)
 
func newNode(first, last: int): Node =
Node(first: first, last: last, suffixLink: 0, suffixIndex: -1)
 
func newNode(st: var SuffixTree; first, last: int): int =
inc st.currentNode
st.nodes[st.currentNode] = newNode(first, last)
result = st.currentNode
 
 
func activeEdgeChar(st: SuffixTree): char {.inline.} = st.text[st.activeedge]
 
 
func edgeLength(st: SuffixTree; n: Node): int =
min(n.last, st.position + 1) - n.first
 
 
func addSuffixLink(st: var SuffixTree; nodenum: int) =
if st.needSuffixLink >= 0:
st.nodes[st.needSuffixLink].suffixLink = nodenum
st.needSuffixLink = nodenum
 
func walkdown(st: var SuffixTree; currNode: int): bool =
let length = st.edgeLength(st.nodes[currNode])
if st.activeLength < length: return false
inc st.activeEdge, length
dec st.activeLength, length
st.activeNode = currnode
result = true
 
 
func extendSuffixTree(st: var SuffixTree; pos: int) =
st.position = pos
st.needSuffixLink = -1
inc st.remainder
while st.remainder > 0:
if st.activeLength == 0: st.activeEdge = st.position
if st.activeEdgeChar() notin st.nodes[st.activeNode].children:
let nodeNum = st.newNode(st.position, ∞)
st.nodes[st.activeNode].children[st.activeEdgeChar()] = nodeNum
st.addSuffixLink(st.activeNode)
else:
let next = st.nodes[st.activeNode].children[st.activeEdgeChar()]
if st.walkdown(next): continue
if st.text[st.nodes[next].first + st.activeLength] == st.text[pos]:
st.addSuffixLink(st.activeNode)
inc st.activeLength
break
let split = st.newNode(st.nodes[next].first, st.nodes[next].first + st.activeLength)
st.nodes[st.activeNode].children[st.activeEdgeChar()] = split
let nodeNum = st.newNode(st.position, ∞)
st.nodes[split].children[st.text[pos]] = nodeNum
st.nodes[next].first += st.activelength
st.nodes[split].children[st.text[st.nodes[next].first]] = next
st.addSuffixLink(split)
 
dec st.remainder
if st.activeNode == st.root and st.activeLength > 0:
dec st.activelength
st.activeEdge = st.position - st.remainder + 1
elif st.activeNode != st.root:
st.activeNode = st.nodes[st.activeNode].suffixLink
 
 
proc setSuffixIndexByDFS(st: var SuffixTree; node: Node; labelHeight: Natural; verbose = false) =
if verbose and node.first >= 0:
echo st.text[node.first..<min(node.last, st.text.len)]
var isLeaf = true
for ichild in node.children.values:
let child = st.nodes[ichild]
if verbose and isLeaf and node.first >= 0:
echo &" [{node.suffixindex}]"
isLeaf = false
st.setSuffixIndexbyDFS(child, labelHeight + st.edgeLength(child))
if isleaf:
node.suffixindex = st.text.len - labelHeight
if verbose: echo &" [{node.suffixindex}]"
 
proc initSuffixTree(str: string): SuffixTree =
var nodes = newSeqWith(str.len * 2, newNode())
result = SuffixTree(nodes: nodes, text: str, position: -1, currentNode: -1,
needSuffixLink: -1, remainder: 0, activeLength: 1, activeEdge: 0)
result.root = result.newNode(0, 0)
result.activeNode = result.root
for i in 0..result.text.high:
result.extendSuffixTree(i)
result.setSuffixIndexByDFS(result.nodes[result.root], 0)
 
 
func doTraversal(st: SuffixTree): (int, seq[int]) =
var maxHeight = 0
var substringStartIndices = @[-1]
 
func traversal(node: Node; labelHeight: int) =
if node.suffixIndex == -1:
for ichild in node.children.values:
let child = st.nodes[ichild]
traversal(child, labelHeight + st.edgeLength(child))
elif maxHeight < labelHeight - st.edgeLength(node):
maxHeight = labelHeight - st.edgeLength(node)
substringStartIndices = @[node.suffixIndex]
elif maxHeight == labelHeight - st.edgelength(node):
substringStartIndices.add node.suffixIndex
 
traversal(st.nodes[st.root], 0)
result = (maxHeight, move(substringStartIndices))
 
 
proc getLongestRepeatedSubstring(st: SuffixTree; label = ""; printResult = true): string =
let (length, starts) = st.dotraversal()
result = if length == 0: ""
else: starts.mapIt(st.text[it..it+length-1]).deduplicate().join(" (or) ")
if printResult:
stdout.write " ", if label.len == 0: join(st.text) else: label, ": "
echo if length == 0: "No repeated substring." else: result
 
 
 
const Tests = ["CAAAABAAAABD$",
"GEEKSFORGEEKS$",
"AAAAAAAAAA$",
"ABCDEFG$",
"ABABABA$",
"ATCGATCGA$",
"banana$",
"abcpqrabpqpq$",
"pqrpqpqabab$"]
 
echo "Longest Repeated Substring in:\n"
for test in Tests:
var st = initSuffixTree(test)
discard st.getLongestRepeatedSubstring()
echo()
 
 
#############################################################################
# Pi calculation.
 
import std/[monotimes, times]
import integers
 
func isr(term, guess: Integer): Integer =
var term = term
result = guess
let value = term * result
while true:
if abs(term - result) <= 1:
break
result = (result + term) shr 1
term = value div result
 
func calcAgm(lam, gm: Integer; z: var Integer; ep: Integer): Integer =
var am: Integer
var lam = lam
var gm = gm
var n = 1
while true:
am = (lam + gm) shr 1
gm = isr(lam, gm)
let v = am - lam
let zi = v * v * n
if zi < ep:
break
z -= zi
inc n, n
lam = am
result = am
 
func bip(exp: int; man = 1): Integer {.inline.} = man * 10^exp
 
func calculatePi(): string =
const Digits = 4_000_000
let am = bip(Digits)
let gm = isqrt(bip(Digits + Digits - 1, 5))
var z = bip(Digits + Digits - 2, 25)
let agm = calcAGM(am, gm, z, bip(Digits + 1))
let pi = agm * agm * bip(Digits - 2) div z
result = $pi
 
let sπ = calculatePi()
for number in [1000, 10000, 100000, 1000000]:
let text = sπ[2..<number] & '$'
let start = getMonoTime()
var st = initSuffixTree(text)
discard st.getlongestrepeatedsubstring(&"first {number} d.p. of π")
echo &" → Temps: {(getMonoTime() - start).inMicroseconds} µs"
 
</syntaxhighlight>
 
{{out}}
<pre>Longest Repeated Substring in:
 
CAAAABAAAABD$: AAAAB
GEEKSFORGEEKS$: GEEKS
AAAAAAAAAA$: AAAAAAAAA
ABCDEFG$: No repeated substring.
ABABABA$: ABABA
ATCGATCGA$: ATCGA
banana$: ana
abcpqrabpqpq$: ab (or) pq
pqrpqpqabab$: ab (or) pq
 
first 1000 d.p. of π: 60943 (or) 42019 (or) 82534 (or) 33446 (or) 93751 (or) 99999 (or) 23846
→ Temps: 780 µs
first 10000 d.p. of π: 7111369 (or) 8530614
→ Temps: 14726 µs
first 100000 d.p. of π: 134926158 (or) 041021944 (or) 201890888
→ Temps: 178656 µs
first 1000000 d.p. of π: 756130190263
→ Temps: 1919290 µs
</pre>
 
=={{header|Phix}}==
{{trans|Go}}
<!--<syntaxhighlight lang="phix">(phixonline)-->
<lang Phix>-- demo/rosetta/Ukkonens_Suffix_Tree.exw
<span style="color: #000080;font-style:italic;">-- demo/rosetta/Ukkonens_Suffix_Tree.exw</span>
integer maxChar = 'z'
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxChar</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'z'</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">suffixLinks</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">starts</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">endIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">suffixIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">leaves</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">v</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">)</span>
sequence children = {},
<span style="color: #000000;">leaves</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">,</span><span style="color: #000000;">v</span><span style="color: #0000FF;">)</span>
suffixLinks = {},
<span style="color: #008080;">return</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">)</span>
starts = {},
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
ends = {},
suffixIndices = {}
string text
atom pSplitEnd,
pRootEnd,
pLeafEnd = allocate_word(0,true)
integer root = NULL,
lastNewNode,
activeNode,
activeEdge = -1,
activeLength = 0,
remainingSuffixCount = 0,
size = -1
<span style="color: #004080;">string</span> <span style="color: #000000;">text</span>
function newNode(integer start, atom pFinish, bool bKids=false)
<span style="color: #004080;">integer</span> <span style="color: #000000;">splitEndIdx</span><span style="color: #0000FF;">,</span>
children = append(children,iff(bKids?repeat(NULL,maxChar):0))
<span style="color: #000000;">rootEndIdx</span><span style="color: #0000FF;">,</span>
suffixLinks = append(suffixLinks,root)
<span style="color: #000000;">leafEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(),</span>
starts = append(starts,start)
<span style="color: #000000;">root</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span>
ends = append(ends,pFinish)
<span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">,</span>
suffixIndices = append(suffixIndices,-1)
<span style="color: #000000;">activeNode</span><span style="color: #0000FF;">,</span>
return length(children)
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
end function
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
function edgeLength(integer n)
<span style="color: #000000;">size</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
if n == root then
return 0
<span style="color: #008080;">function</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">finishIdx</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">bKids</span><span style="color: #0000FF;">=</span><span style="color: #004600;">false</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">,</span><span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">bKids</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">maxChar</span><span style="color: #0000FF;">):</span><span style="color: #000000;">0</span><span style="color: #0000FF;">))</span>
return peekns(ends[n]) - starts[n] + 1
<span style="color: #000000;">suffixLinks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">,</span><span style="color: #000000;">root</span><span style="color: #0000FF;">)</span>
end function
<span style="color: #000000;">starts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">endIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">endIndices</span><span style="color: #0000FF;">,</span><span style="color: #000000;">finishIdx</span><span style="color: #0000FF;">)</span>
function walkDown(integer currNode)
<span style="color: #000000;">suffixIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
integer l = edgeLength(currNode)
<span style="color: #008080;">return</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)</span>
if activeLength >= l then
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
activeEdge += l
activeLength -= l
<span style="color: #008080;">function</span> <span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span>
activeNode = currNode
<span style="color: #008080;">return</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">==</span><span style="color: #000000;">root</span><span style="color: #0000FF;">?</span><span style="color: #000000;">0</span><span style="color: #0000FF;">:</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">endIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]]</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
return true
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end if
return false
<span style="color: #008080;">function</span> <span style="color: #000000;">walkDown</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">currNode</span><span style="color: #0000FF;">)</span>
end function
<span style="color: #004080;">integer</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">currNode</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">>=</span> <span style="color: #000000;">l</span> <span style="color: #008080;">then</span>
procedure extendSuffixTree(integer pos)
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">l</span>
poken(pLeafEnd,pos)
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">l</span>
remainingSuffixCount += 1
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">currNode</span>
lastNewNode = NULL
<span style="color: #008080;">return</span> <span style="color: #004600;">true</span>
while remainingSuffixCount > 0 do
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if activeLength == 0 then
<span style="color: #008080;">return</span> <span style="color: #004600;">false</span>
activeEdge = pos
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end if
integer ta = text[activeEdge]
<span style="color: #008080;">procedure</span> <span style="color: #000000;">extendSuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">pos</span><span style="color: #0000FF;">)</span>
object ca = children[activeNode]
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span>
integer next = iff(ca=0?NULL:ca[ta])
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
if next == null then
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
if ca=0 then
<span style="color: #008080;">while</span> <span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">></span> <span style="color: #000000;">0</span> <span style="color: #008080;">do</span>
children[activeNode] = repeat(NULL,maxChar)
<span style="color: #008080;">if</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
end if
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span>
children[activeNode][ta] = newNode(pos, pLeafEnd)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if lastNewNode != NULL then
<span style="color: #004080;">integer</span> <span style="color: #000000;">ta</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeEdge</span><span style="color: #0000FF;">]</span>
suffixLinks[lastNewNode] = activeNode
<span style="color: #004080;">bool</span> <span style="color: #000000;">ca0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">0</span>
lastNewNode = NULL
<span style="color: #004080;">integer</span> <span style="color: #000000;">next</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ca0</span><span style="color: #0000FF;">?</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">:</span><span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">])</span>
end if
<span style="color: #008080;">if</span> <span style="color: #000000;">next</span><span style="color: #0000FF;">==</span><span style="color: #004600;">null</span> <span style="color: #008080;">then</span>
else
<span style="color: #008080;">if</span> <span style="color: #000000;">ca0</span> <span style="color: #008080;">then</span>
if walkDown(next) then
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">maxChar</span><span style="color: #0000FF;">)</span>
continue
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">)</span>
integer tp = text[pos]
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
if text[starts[next]+activeLength] == tp then
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">activeNode</span>
if lastNewNode != NULL and activeNode != root then
<span style="color: suffixLinks[#000000;">lastNewNode]</span> <span style="color: #0000FF;">=</span> <span style="color: activeNode#004600;">NULL</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
lastNewNode = NULL
<span style="color: end if#008080;">else</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">walkDown</span><span style="color: #0000FF;">(</span><span style="color: #000000;">next</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
activeLength += 1
exit <span style="color: #008080;">continue</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #004080;">integer</span> <span style="color: #000000;">tp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">]</span>
integer temp = starts[next] + activeLength - 1
<span style="color: #008080;">if</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">activeLength</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">tp</span> <span style="color: #008080;">then</span>
pSplitEnd = allocate_word(temp,true)
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">and</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">root</span> <span style="color: #008080;">then</span>
integer splitnode = newNode(starts[next], pSplitEnd,true)
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">activeNode</span>
ta = text[activeEdge]
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
children[activeNode][ta] = splitnode
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
children[splitnode][tp] = newNode(pos, pLeafEnd)
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
starts[next] += activeLength
<span style="color: #008080;">exit</span>
children[splitnode][text[starts[next]]] = next
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if lastNewNode != NULL then
<span style="color: #004080;">integer</span> <span style="color: #000000;">temp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">1</span>
suffixLinks[lastNewNode] = splitnode
<span style="color: #000000;">splitEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">temp</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #004080;">integer</span> <span style="color: #000000;">splitnode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">splitEndIdx</span><span style="color: #0000FF;">,</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">)</span>
lastNewNode = splitnode
<span style="color: #000000;">ta</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeEdge</span><span style="color: #0000FF;">]</span>
end if
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
remainingSuffixCount -= 1
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">splitnode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">tp</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">)</span>
if activeNode == root and activeLength > 0 then
<span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">activeLength</span>
activeLength -= 1
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">splitnode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]]]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">next</span>
activeEdge = pos - remainingSuffixCount + 1
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
elsif activeNode != root then
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
activeNode = suffixLinks[activeNode]
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
end while
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end procedure
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">==</span><span style="color: #000000;">root</span> <span style="color: #008080;">and</span> <span style="color: #000000;">activeLength</span><span style="color: #0000FF;">></span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
procedure setSuffixIndexByDFS(integer n, integer labelHeight)
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
if n!=NULL then
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">1</span>
-- Uncomment the 3 lines below to print suffix tree
<span style="color: #008080;">elsif</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">root</span> <span style="color: #008080;">then</span>
--if starts[n]!=-1 then
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]</span>
-- printf(1,text[starts[n]..peekns(ends[n])])
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
--end if
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if children[n]=0 then
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
suffixIndices[n] = size - labelHeight
-- Uncomment line below to print suffix index
<span style="color: #008080;">procedure</span> <span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">)</span>
--printf(1," [%d]\n", suffixIndices[n])
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
else
<span style="color: #008080;">if</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
bool leaf = true
<span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">size</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">labelHeight</span>
for i=1 to maxChar do
<span style="color: #008080;">else</span>
integer nci = children[n][i]
<span style="color: #004080;">bool</span> <span style="color: #000000;">leaf</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
if nci != NULL then
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">maxChar</span> <span style="color: #008080;">do</span>
-- Uncomment the 3 lines below to print suffix index
<span style="color: #004080;">integer</span> <span style="color: #000000;">nci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
--if leaf and starts[n] != -1 then
<span style="color: #008080;">if</span> <span style="color: #000000;">nci</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
-- printf(1," [%d]\n", suffixIndices[n])
<span style="color: #000000;">leaf</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
--end if
<span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">+</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">))</span>
leaf = false
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
setSuffixIndexByDFS(nci, labelHeight+edgeLength(nci))
<span style="color: #008080;">end</span> <span style="color: if#008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">leaf</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">-- (sanity check)</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if leaf then ?9/0 end if -- (sanity check)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
end if
end procedure
<span style="color: #008080;">procedure</span> <span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">size</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">)</span>
procedure buildSuffixTree()
<span style="color: #000000;">rootEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
size = length(text)
<span style="color: #000000;">root</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">rootEndIdx</span><span style="color: #0000FF;">)</span>
pRootEnd = allocate_word(-1,true)
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">root</span>
root = newNode(-1, pRootEnd)
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">size</span> <span style="color: #008080;">do</span>
activeNode = root
<span style="color: #000000;">extendSuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
for i=1 to size do
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
extendSuffixTree(i)
<span style="color: #004080;">integer</span> <span style="color: #000000;">labelHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
end for
<span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #000000;">root</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">)</span>
integer labelHeight = 0
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
setSuffixIndexByDFS(root, labelHeight)
end procedure
<span style="color: #008080;">procedure</span> <span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
procedure doTraversal(integer n, integer labelHeight, atom pMaxHeight, pSubstringStartIndex)
<span style="color: #004080;">integer</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">newHeight</span>
if n!=NULL then
<span style="color: #008080;">if</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">==</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
integer nsi = suffixIndices[n], newHeight
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">maxChar</span> <span style="color: #008080;">do</span>
if nsi == -1 then
<span style="color: #004080;">integer</span> <span style="color: #000000;">nci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
for i=1 to maxChar do
<span style="color: #008080;">if</span> <span style="color: #000000;">nci</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
integer nci = children[n][i]
<span style="color: #000000;">newHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">+</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">)</span>
if nci != NULL then
<span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">newHeight</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
newHeight = labelHeight+edgeLength(nci)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
doTraversal(nci, newHeight, pMaxHeight, pSubstringStartIndex)
<span style="color: #008080;">end</span> <span style="color: if#008080;">for</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">></span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
end for
<span style="color: #000000;">newHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">-</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">)</span>
elsif nsi > -1 then
<span style="color: #008080;">if</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">]<</span><span style="color: #000000;">newHeight</span> <span style="color: #008080;">then</span>
newHeight = labelHeight-edgeLength(n)
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newHeight</span>
if peekns(pMaxHeight)<newHeight then
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">nsi</span>
poken(pMaxHeight,newHeight)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
poken(pSubstringStartIndex,nsi)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
end if
end procedure
<span style="color: #008080;">function</span> <span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxHeightIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(),</span>
procedure getLongestRepeatedSubstring(string s)
<span style="color: #000000;">substringStartIndex</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">()</span>
atom pMaxHeight = allocate_word(0,true),
<span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">root</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
pSubstringStartIndex = allocate_word(0,true)
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">],</span>
doTraversal(root, 0, pMaxHeight, pSubstringStartIndex)
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">]</span>
integer maxHeight = peekns(pMaxHeight),
<span style="color: #004080;">string</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">maxHeight</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">?</span><span style="color: #008000;">"No repeated substring"</span>
start = peekns(pSubstringStartIndex)
<span style="color: #0000FF;">:</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">start</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">start</span><span style="color: #0000FF;">+</span><span style="color: #000000;">maxHeight</span><span style="color: #0000FF;">])</span>
-- Uncomment line below to print maxHeight and substringStartIndex
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span>
--printf(1,"maxHeight %d, substringStartIndex %d\n", {maxHeight, start})
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
string t = iff(maxHeight=0?"No repeated substring"
:text[start+1..start+maxHeight])
<span style="color: #008080;">constant</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"CAAAABAAAABD$"</span><span style="color: #0000FF;">,</span>
printf(1," %s is: %s\n", {iff(s=""?text:s),t})
<span style="color: #008000;">"GEEKSFORGEEKS$"</span><span style="color: #0000FF;">,</span>
end procedure
<span style="color: #008000;">"AAAAAAAAAA$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ABCDEFG$"</span><span style="color: #0000FF;">,</span>
constant tests = {"GEEKSFORGEEKS$",
<span style="AAAAAAAAAAcolor: #008000;">"ABABABA$"</span><span style="color: #0000FF;">,</span>
<span style="ABCDEFGcolor: #008000;">"ATCGATCGA$"</span><span style="color: #0000FF;">,</span>
<span style="ABABABAcolor: #008000;">"banana$"</span><span style="color: #0000FF;">,</span>
<span style="ATCGATCGAcolor: #008000;">"abcpqrabpqpq$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"pqrpqpqabab$"</span><span style="color: #0000FF;">}</span>
"banana$",
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Longest Repeated Substring in:\n"</span><span style="color: #0000FF;">)</span>
"abcpqrabpqpq$",
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
"pqrpqpqabab$"}
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
printf(1,"Longest Repeated Substring in:\n")
<span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
for i=1 to length(tests) do
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" %s is: %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">()})</span>
text = tests[i]
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
buildSuffixTree()
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)</span>
getLongestRepeatedSubstring("")
end for
<span style="color: #008080;">include</span> <span style="color: #004080;">mpfr</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
printf(1,"\n")
<span style="color: #004080;">string</span> <span style="color: #000000;">piStr</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">platform</span><span style="color: #0000FF;">()=</span><span style="color: #004600;">JS</span> <span style="color: #008080;">then</span>
-- Sadly gmp crashes when I try 100,000 dp, but I suspect it would be fine with more than my paltry 4GB ram
<span style="color: #004080;">mpfr</span> <span style="color: #000000;">pi</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">mpfr_init</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1001</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (set precision to 1,000 dp, plus the "3.")</span>
include mpfr.e
<span style="color: #7060A8;">mpfr_const_pi</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pi</span><span style="color: #0000FF;">)</span>
mpfr pi = mpfr_init(0,-10001) -- (set precision to 10,000 dp, plus the "3.")
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">mpfr_get_fixed</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1000</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (all we can really manage under pwa/p2js)</span>
mpfr_const_pi(pi)
<span style="color: #008080;">else</span>
string piStr = mpfr_sprintf("%.10000Rf", pi), s="Pi"
<span style="color: #000080;font-style:italic;">-- gmp crashes when I try 100,000 dp, so just get from file</span>
piStr = piStr[3..$] -- discard leading "3."
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`E:\downloads\misc\arm\pi-10million.txt`</span><span style="color: #0000FF;">)</span>
maxChar = '9'
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
for i=3 to 6 do
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">piStr</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">..$]</span> <span style="color: #000080;font-style:italic;">-- discard leading "3."</span>
atom t0 = time()
<span style="color: #000000;">maxChar</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'9'</span>
integer n = power(10,i)
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">3</span> <span style="color: #008080;">to</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">platform</span><span style="color: #0000FF;">()=</span><span style="color: #004600;">JS</span><span style="color: #0000FF;">?</span><span style="color: #000000;">3</span><span style="color: #0000FF;">:</span><span style="color: #000000;">6</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
if i>=5 then
<span style="color: #004080;">atom</span> <span style="color: #000000;">t0</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">time</span><span style="color: #0000FF;">()</span>
text = repeat('0',n)&"$"
<span style="color: #004080;">integer</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">power</span><span style="color: #0000FF;">(</span><span style="color: #000000;">10</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
for c=1 to n do
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">piStr</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">&</span> <span style="color: #008000;">"$"</span>
text[c] = rand(10)+'0'-1
<span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
end for
<span style="color: #004080;">string</span> <span style="color: #000000;">r</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">(),</span>
s = "a made up number"
<span style="color: #000000;">e</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">elapsed</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">time</span><span style="color: #0000FF;">()-</span><span style="color: #000000;">t0</span><span style="color: #0000FF;">)</span>
else
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" first %,d d.p. of Pi is: %s (%s)\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">r</span><span style="color: #0000FF;">,</span><span style="color: #000000;">e</span><span style="color: #0000FF;">})</span>
text = piStr[1..n] & "$"
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end if
<!--</syntaxhighlight>-->
buildSuffixTree()
getLongestRepeatedSubstring(sprintf("first %,d d.p. of %s", {n,s}))
printf(1," (this took %gs)\n\n", {time()-t0})
end for</lang>
{{out}}
<pre>
Longest Repeated Substring in:
CAAAABAAAABD$ is: AAAAB
GEEKSFORGEEKS$ is: GEEKS
AAAAAAAAAA$ is: AAAAAAAAA
Line 733 ⟶ 1,204:
pqrpqpqabab$ is: ab
 
first 1,000 d.p. of Pi is: 23846 (0s)
first 10,000 d.p. of Pi is: 7111369 (0.0s)
(this took 0s)
first 100,000 d.p. of Pi is: 041021944 (0.3s)
 
first 101,000,000 d.p. of Pi is: 7111369756130190263 (3.2s)
(this took 0.047s)
 
first 100,000 d.p. of a made up number is: 022105155
(this took 0.422s)
 
first 1,000,000 d.p. of a made up number is: 09014346795
(this took 4.734s)
</pre>
Note that mpfr_const_pi() struggles to generate more than 1,000 digits of pi under pwa/p2js [and will continue to do so unless someone graciously donates a decent/fast Chudnovsky method in pure Phix or JavaScript...]
 
=={{header|Wren}}==
Line 754 ⟶ 1,219:
 
Having done that, the timings for extracting the longest repeated sequence of digits are reasonably quick and fairly linear as expected.
<langsyntaxhighlight ecmascriptlang="wren">import "./big" for BigRat
import "./dynamic" for Struct
import "./trait" for ByRef
import "io" for File
 
Line 949 ⟶ 1,414:
var elapsed = (System.clock - start) * 1000
System.print(" (this took %(elapsed) ms)\n")
}</langsyntaxhighlight>
 
{{out}}
9,477

edits