Suffix tree: Difference between revisions

Rename Perl 6 -> Raku, alphabetize, minor clean-up
(Rename Perl 6 -> Raku, alphabetize, minor clean-up)
Line 20:
The computation time for an efficient algorithm should be <math>O(n)</math>, but such an algorithm might be difficult to implement. An easier, <math>O(n^2)</math> algorithm is accepted.
 
=={{header|C++ sharp|C#}}==
{{trans|D}}
<lang cpp>#include <functional>
#include <iostream>
#include <vector>
 
struct Node {
std::string sub = ""; // a substring of the input string
std::vector<int> ch; // vector of child nodes
 
Node() {
// empty
}
 
Node(const std::string& sub, std::initializer_list<int> children) : sub(sub) {
ch.insert(ch.end(), children);
}
};
 
struct SuffixTree {
std::vector<Node> nodes;
 
SuffixTree(const std::string& str) {
nodes.push_back(Node{});
for (size_t i = 0; i < str.length(); i++) {
addSuffix(str.substr(i));
}
}
 
void visualize() {
if (nodes.size() == 0) {
std::cout << "<empty>\n";
return;
}
 
std::function<void(int, const std::string&)> f;
f = [&](int n, const std::string & pre) {
auto children = nodes[n].ch;
if (children.size() == 0) {
std::cout << "- " << nodes[n].sub << '\n';
return;
}
std::cout << "+ " << nodes[n].sub << '\n';
 
auto it = std::begin(children);
if (it != std::end(children)) do {
if (std::next(it) == std::end(children)) break;
std::cout << pre << "+-";
f(*it, pre + "| ");
it = std::next(it);
} while (true);
 
std::cout << pre << "+-";
f(children[children.size() - 1], pre + " ");
};
 
f(0, "");
}
 
private:
void addSuffix(const std::string & suf) {
int n = 0;
size_t i = 0;
while (i < suf.length()) {
char b = suf[i];
int x2 = 0;
int n2;
while (true) {
auto children = nodes[n].ch;
if (x2 == children.size()) {
// no matching child, remainder of suf becomes new node
n2 = nodes.size();
nodes.push_back(Node(suf.substr(i), {}));
nodes[n].ch.push_back(n2);
return;
}
n2 = children[x2];
if (nodes[n2].sub[0] == b) {
break;
}
x2++;
}
// find prefix of remaining suffix in common with child
auto sub2 = nodes[n2].sub;
size_t j = 0;
while (j < sub2.size()) {
if (suf[i + j] != sub2[j]) {
// split n2
auto n3 = n2;
// new node for the part in common
n2 = nodes.size();
nodes.push_back(Node(sub2.substr(0, j), { n3 }));
nodes[n3].sub = sub2.substr(j); // old node loses the part in common
nodes[n].ch[x2] = n2;
break; // continue down the tree
}
j++;
}
i += j; // advance past part in common
n = n2; // continue down the tree
}
}
};
 
int main() {
SuffixTree("banana$").visualize();
}</lang>
{{out}}
<pre>+
+-- banana$
+-+ a
| +-+ na
| | +-- na$
| | +-- $
| +-- $
+-+ na
| +-- na$
| +-- $
+-- $</pre>
 
=={{header|C#|C sharp}}==
{{trans|C++}}
<lang csharp>using System;
Line 251 ⟶ 131:
}
}
}</lang>
{{out}}
<pre>+
+-- banana$
+-+ a
| +-+ na
| | +-- na$
| | +-- $
| +-- $
+-+ na
| +-- na$
| +-- $
+-- $</pre>
 
=={{header|C++}}==
{{trans|D}}
<lang cpp>#include <functional>
#include <iostream>
#include <vector>
 
struct Node {
std::string sub = ""; // a substring of the input string
std::vector<int> ch; // vector of child nodes
 
Node() {
// empty
}
 
Node(const std::string& sub, std::initializer_list<int> children) : sub(sub) {
ch.insert(ch.end(), children);
}
};
 
struct SuffixTree {
std::vector<Node> nodes;
 
SuffixTree(const std::string& str) {
nodes.push_back(Node{});
for (size_t i = 0; i < str.length(); i++) {
addSuffix(str.substr(i));
}
}
 
void visualize() {
if (nodes.size() == 0) {
std::cout << "<empty>\n";
return;
}
 
std::function<void(int, const std::string&)> f;
f = [&](int n, const std::string & pre) {
auto children = nodes[n].ch;
if (children.size() == 0) {
std::cout << "- " << nodes[n].sub << '\n';
return;
}
std::cout << "+ " << nodes[n].sub << '\n';
 
auto it = std::begin(children);
if (it != std::end(children)) do {
if (std::next(it) == std::end(children)) break;
std::cout << pre << "+-";
f(*it, pre + "| ");
it = std::next(it);
} while (true);
 
std::cout << pre << "+-";
f(children[children.size() - 1], pre + " ");
};
 
f(0, "");
}
 
private:
void addSuffix(const std::string & suf) {
int n = 0;
size_t i = 0;
while (i < suf.length()) {
char b = suf[i];
int x2 = 0;
int n2;
while (true) {
auto children = nodes[n].ch;
if (x2 == children.size()) {
// no matching child, remainder of suf becomes new node
n2 = nodes.size();
nodes.push_back(Node(suf.substr(i), {}));
nodes[n].ch.push_back(n2);
return;
}
n2 = children[x2];
if (nodes[n2].sub[0] == b) {
break;
}
x2++;
}
// find prefix of remaining suffix in common with child
auto sub2 = nodes[n2].sub;
size_t j = 0;
while (j < sub2.size()) {
if (suf[i + j] != sub2[j]) {
// split n2
auto n3 = n2;
// new node for the part in common
n2 = nodes.size();
nodes.push_back(Node(sub2.substr(0, j), { n3 }));
nodes[n3].sub = sub2.substr(j); // old node loses the part in common
nodes[n].ch[x2] = n2;
break; // continue down the tree
}
j++;
}
i += j; // advance past part in common
n = n2; // continue down the tree
}
}
};
 
int main() {
SuffixTree("banana$").visualize();
}</lang>
{{out}}
Line 801:
}
};</pre>
 
=={{header|Perl 6}}==
{{Works with|Rakudo|2018.04}}
Here is quite a naive algorithm, probably <math>O(n^2)</math>.
 
The display code is a variant of the [[visualize_a_tree#Perl6|visualize a tree]] task code.
 
<lang perl6>multi suffix-tree(Str $str) { suffix-tree flat map &flip, [\~] $str.flip.comb }
multi suffix-tree(@a) {
hash
@a == 0 ?? () !!
@a == 1 ?? ( @a[0] => [] ) !!
gather for @a.classify(*.substr(0, 1)) {
my $subtree = suffix-tree(grep *.chars, map *.substr(1), .value[]);
if $subtree == 1 {
my $pair = $subtree.pick;
take .key ~ $pair.key => $pair.value;
} else {
take .key => $subtree;
}
}
}
 
my $tree = root => suffix-tree 'banana$';
 
.say for visualize-tree $tree, *.key, *.value.List;
 
sub visualize-tree($tree, &label, &children,
:$indent = '',
:@mid = ('├─', '│ '),
:@end = ('└─', ' '),
) {
sub visit($node, *@pre) {
gather {
take @pre[0] ~ $node.&label;
my @children = sort $node.&children;
my $end = @children.end;
for @children.kv -> $_, $child {
when $end { take visit($child, (@pre[1] X~ @end)) }
default { take visit($child, (@pre[1] X~ @mid)) }
}
}
}
flat visit($tree, $indent xx 2);
}</lang>
 
{{out}}
<pre>root
├─$
├─a
│ ├─$
│ └─na
│ ├─$
│ └─na$
├─banana$
└─na
├─$
└─na$</pre>
 
=={{header|Phix}}==
Line 1,062 ⟶ 1,004:
| `-- na$
`-- banana$</pre>
 
=={{header|Raku}}==
(formerly Perl 6)
{{Works with|Rakudo|2018.04}}
Here is quite a naive algorithm, probably <math>O(n^2)</math>.
 
The display code is a variant of the [[visualize_a_tree#Perl6|visualize a tree]] task code.
 
<lang perl6>multi suffix-tree(Str $str) { suffix-tree flat map &flip, [\~] $str.flip.comb }
multi suffix-tree(@a) {
hash
@a == 0 ?? () !!
@a == 1 ?? ( @a[0] => [] ) !!
gather for @a.classify(*.substr(0, 1)) {
my $subtree = suffix-tree(grep *.chars, map *.substr(1), .value[]);
if $subtree == 1 {
my $pair = $subtree.pick;
take .key ~ $pair.key => $pair.value;
} else {
take .key => $subtree;
}
}
}
 
my $tree = root => suffix-tree 'banana$';
 
.say for visualize-tree $tree, *.key, *.value.List;
 
sub visualize-tree($tree, &label, &children,
:$indent = '',
:@mid = ('├─', '│ '),
:@end = ('└─', ' '),
) {
sub visit($node, *@pre) {
gather {
take @pre[0] ~ $node.&label;
my @children = sort $node.&children;
my $end = @children.end;
for @children.kv -> $_, $child {
when $end { take visit($child, (@pre[1] X~ @end)) }
default { take visit($child, (@pre[1] X~ @mid)) }
}
}
}
flat visit($tree, $indent xx 2);
}</lang>
 
{{out}}
<pre>root
├─$
├─a
│ ├─$
│ └─na
│ ├─$
│ └─na$
├─banana$
└─na
├─$
└─na$</pre>
 
=={{header|Sidef}}==
10,333

edits