Factorize string into Lyndon words: Difference between revisions
(Add Rust implementation) |
(New post.) |
||
(28 intermediate revisions by 11 users not shown) | |||
Line 1: | Line 1: | ||
{{task}} |
|||
{{task}}Given a finite alphabet, we can lexicographically order all strings in the alphabet. If two strings have different lengths, then pad the shorter string on the right with the smallest letter. For example, we have 01 > 001, but 01 = 010. As we see, this order is a total preorder, but not a total order, since it identifies different strings. |
|||
Given a finite alphabet, we can lexicographically order all strings in the alphabet. If two strings have different lengths, then pad the shorter string on the right with the smallest letter. For example, we have 01 > 001, but 01 = 010. As we see, this order is a total preorder, but not a total order, since it identifies different strings. |
|||
A [[wp:Lyndon_word|Lyndon word]] is a non-empty string that is ''strictly'' lower in lexicographic order than all its circular rotations. In particular, if a string is equal to a circular rotation, then it is not a Lyndon word. For example, since 0101 = 0101 (rotation by 2), it is not a Lyndon word. |
|||
A [[Lyndon word]] is a non-empty string that is ''strictly'' lower in lexicographic order than all its circular rotations. In particular, if a string is equal to a circular rotation, then it is not a Lyndon word. For example, since 0101 = 0101 (rotation by 2), it is not a Lyndon word. |
|||
The first Lyndon words on the binary alphabet {0, 1} are: |
|||
The first Lyndon words on the binary alphabet {0, 1} are:<blockquote> |
|||
: 0, 1, 01, 001, 011, 0001, 0011, 0111, 00001, 00011, 00101, 00111, 01011, 01111, ... |
: 0, 1, 01, 001, 011, 0001, 0011, 0111, 00001, 00011, 00101, 00111, 01011, 01111, ... |
||
</blockquote>Some basic properties: |
|||
Some basic properties: |
|||
* The only Lyndon word that ends with 0 is 0. |
* The only Lyndon word that ends with 0 is 0. |
||
Line 14: | Line 18: | ||
The Chen–Fox–Lyndon theorem states that any string is uniquely factorizable into a sequence of Lyndon words non-decreasing in lexicographic order. Duval's algorithm computes this in O(length of input) time and and O(1) space.<ref>Duval, Jean-Pierre (1983), "Factorizing words over an ordered alphabet", ''Journal of Algorithms'', '''4''' (4): 363–381, doi:10.1016/0196-6774(83)90017-2</ref> See <ref name=":0">https://cp-algorithms.com/string/lyndon_factorization.html</ref> for a description of Duval's algorithm. |
The Chen–Fox–Lyndon theorem states that any string is uniquely factorizable into a sequence of Lyndon words non-decreasing in lexicographic order. Duval's algorithm computes this in O(length of input) time and and O(1) space.<ref>Duval, Jean-Pierre (1983), "Factorizing words over an ordered alphabet", ''Journal of Algorithms'', '''4''' (4): 363–381, doi:10.1016/0196-6774(83)90017-2</ref> See <ref name=":0">https://cp-algorithms.com/string/lyndon_factorization.html</ref> for a description of Duval's algorithm. |
||
=={{header|ALGOL 68}}== |
|||
== [[C]] == |
|||
{{Trans|Python|with multiline output as with a number of other samples}} |
|||
Copied from <ref name=":0" />, under Creative Commons Attribution Share Alike 4.0 International License.<syntaxhighlight lang="c"> |
|||
<syntaxhighlight lang="algol68"> |
|||
BEGIN # Factorize a string into Lyndon words - translated of the Python sample # |
|||
PROC chen fox lyndon factorization = ( STRING s )[]STRING: |
|||
BEGIN |
|||
INT n = UPB s; |
|||
INT i := LWB s; |
|||
FLEX[ 1 : 100 ]STRING factorization; |
|||
INT f max := LWB factorization; |
|||
WHILE i <= n DO |
|||
INT j := i + 1, k := i; |
|||
WHILE IF j <= n THEN s[ k ] <= s[ j ] ELSE FALSE FI DO |
|||
IF s[ k ] < s[ j ] THEN |
|||
k := i |
|||
ELSE |
|||
k +:= 1 |
|||
FI; |
|||
j +:= 1 |
|||
OD; |
|||
WHILE i <= k DO |
|||
f max +:= 1; |
|||
IF f max > UPB factorization THEN |
|||
[ 1 : UPB factorization + 20 ]STRING new factorization; |
|||
new factorization[ 1 : UPB factorization ] := factorization; |
|||
factorization := new factorization |
|||
FI; |
|||
factorization[ f max ] := s[ i : i + j - k - 1 ]; |
|||
i +:= j - k |
|||
OD |
|||
OD; |
|||
STRING check := ""; |
|||
FOR i TO f max DO check +:= factorization[ i ] OD; |
|||
IF check /= s THEN print( ( "Incorrect factorization", newline ) ); stop FI; |
|||
factorization[ 1 : f max ] |
|||
END # chen fox lyndon factorization # ; |
|||
# Example use with Thue-Morse sequence # |
|||
STRING m := "0"; |
|||
TO 7 DO |
|||
STRING m0 = m; |
|||
FOR i FROM LWB m TO UPB m DO |
|||
IF m[ i ] = "0" THEN m[ i ] := "1" ELIF m[ i ] = "1" THEN m[ i ] := "0" FI |
|||
OD; |
|||
m0 +=: m |
|||
OD; |
|||
[]STRING m factors = chen fox lyndon factorization( m ); |
|||
FOR i FROM LWB m factors TO UPB m factors DO |
|||
print( ( m factors[ i ], newline ) ) |
|||
OD |
|||
END |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
011 |
|||
01 |
|||
0011 |
|||
00101101 |
|||
0010110011010011 |
|||
00101100110100101101001100101101 |
|||
001011001101001011010011001011001101001100101101 |
|||
001011001101 |
|||
001 |
|||
</pre> |
|||
== {{header|C++}} == |
|||
Copied from <ref name=":0" />, under Creative Commons Attribution Share Alike 4.0 International License and converted to a runnable C++ program.<syntaxhighlight lang="c++">#include <iostream> |
|||
#include <vector> |
|||
#include <string> |
|||
#include <algorithm> |
|||
using namespace std; |
|||
vector<string> duval(string const& s) { |
vector<string> duval(string const& s) { |
||
int n = s.size(); |
int n = s.size(); |
||
Line 36: | Line 114: | ||
return factorization; |
return factorization; |
||
} |
} |
||
int main() { |
|||
// Thue-Morse example |
|||
string m = "0"; |
|||
for (int i = 0; i < 7; ++i) { |
|||
string m0 = m; |
|||
replace(m.begin(), m.end(), '0', 'a'); |
|||
replace(m.begin(), m.end(), '1', '0'); |
|||
replace(m.begin(), m.end(), 'a', '1'); |
|||
m = m0 + m; |
|||
} |
|||
for (string s : duval(m)) cout << s << endl; |
|||
return 0; |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
011 |
|||
01 |
|||
0011 |
|||
00101101 |
|||
0010110011010011 |
|||
00101100110100101101001100101101 |
|||
001011001101001011010011001011001101001100101101 |
|||
001011001101 |
|||
001 |
|||
</pre> |
|||
=={{header|EasyLang}}== |
|||
{{trans|Julia}} |
|||
<syntaxhighlight> |
|||
proc lyndonfact s$ . . |
|||
n = len s$ |
|||
s$[] = strchars s$ |
|||
i = 1 |
|||
while i <= n |
|||
j = i + 1 |
|||
k = i |
|||
while j <= n and strcode s$[k] <= strcode s$[j] |
|||
if strcode s$[k] < strcode s$[j] |
|||
k = i |
|||
else |
|||
k += 1 |
|||
. |
|||
j += 1 |
|||
. |
|||
while i <= k |
|||
print substr s$ i (j - k) |
|||
i += j - k |
|||
. |
|||
. |
|||
. |
|||
m$ = "0" |
|||
for i to 7 |
|||
for c$ in strchars m$ |
|||
if c$ = "0" |
|||
m$ &= "1" |
|||
else |
|||
m$ &= "0" |
|||
. |
|||
. |
|||
. |
|||
lyndonfact m$ |
|||
</syntaxhighlight> |
</syntaxhighlight> |
||
== |
=={{header|Java}}== |
||
<syntaxhighlight lang="java"> |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
public class FactorizeStringIntoLyndonWords { |
|||
public static void main(String[] args) { |
|||
// Create a 128 character Thue-Morse word |
|||
String thueMorse = "0"; |
|||
for ( int i = 0; i < 7; i++ ) { |
|||
String thueMorseCopy = thueMorse; |
|||
thueMorse = thueMorse.replace("0", "a"); |
|||
thueMorse = thueMorse.replace("1", "0"); |
|||
thueMorse = thueMorse.replace("a", "1"); |
|||
thueMorse = thueMorseCopy + thueMorse; |
|||
} |
|||
System.out.println("The Thue-Morse word to be factorised:"); |
|||
System.out.println(thueMorse); |
|||
System.out.println(); |
|||
System.out.println("The factors are:"); |
|||
for ( String factor : duval(thueMorse) ) { |
|||
System.out.println(factor); |
|||
} |
|||
} |
|||
// Duval's algorithm |
|||
private static List<String> duval(String text) { |
|||
List<String> factorisation = new ArrayList<String>(); |
|||
int i = 0; |
|||
while ( i < text.length() ) { |
|||
int j = i + 1; |
|||
int k = i; |
|||
while ( j < text.length() && text.charAt(k) <= text.charAt(j) ) { |
|||
if ( text.charAt(k) < text.charAt(j) ) { |
|||
k = i; |
|||
} else { |
|||
k += 1; |
|||
} |
|||
j += 1; |
|||
} |
|||
while ( i <= k ) { |
|||
factorisation.addLast(text.substring(i, i + j - k)); |
|||
i += j - k; |
|||
} |
|||
} |
|||
return factorisation; |
|||
} |
|||
} |
|||
</syntaxhighlight> |
|||
<pre> |
|||
The Thue-Morse word to be factorised: |
|||
01101001100101101001011001101001100101100110100101101001100101101001011001101001011010011001011001101001100101101001011001101001 |
|||
The factors are: |
|||
011 |
|||
01 |
|||
0011 |
|||
00101101 |
|||
0010110011010011 |
|||
00101100110100101101001100101101 |
|||
001011001101001011010011001011001101001100101101 |
|||
001011001101 |
|||
</pre> |
|||
=={{header|jq}}== |
|||
{{Works with|jq}} |
|||
'''Works with gojq, the Go implementation of jq''' |
|||
'''Adapted from [[#Wren|Wren]]''' |
|||
<syntaxhighlight lang="jq"> |
|||
def duval: |
|||
. as $s |
|||
| def a($i): $s[$i:$i+1]; |
|||
length as $n |
|||
| { i: 0, factorization: []} |
|||
| until (.i >= $n; |
|||
.j = .i + 1 |
|||
| .k = .i |
|||
| until (.j >= $n or a(.k) > a(.j); |
|||
if a(.k) < a(.j) then .k = .i else .k += 1 end |
|||
| .j += 1 ) |
|||
| until (.i > .k; |
|||
.factorization += [$s[.i: .i+.j-.k]] |
|||
| .i += .j - .k ) ) |
|||
| .factorization ; |
|||
# Thue-Morse example |
|||
def m: |
|||
reduce range(0; 7) as $i ( "0"; |
|||
. as $m0 |
|||
| gsub("0"; "a") |
|||
| gsub("1"; "0") |
|||
| gsub("a"; "1") |
|||
| $m0 + .) ; |
|||
m | duval | join("\n") |
|||
</syntaxhighlight> |
|||
{{output}} |
|||
As for [[#Wren|Wren]]. |
|||
=={{header|Julia}}== |
|||
{{trans|Phix}}{{trans|Python}} |
|||
<syntaxhighlight lang="Julia">function chenfoxlyndonfactorization(s) |
|||
n = length(s) |
|||
i = 1 |
|||
factorization = String[] |
|||
while i <= n |
|||
j = i + 1 |
|||
k = i |
|||
while j <= n && s[k] <= s[j] |
|||
if s[k] < s[j] |
|||
k = i |
|||
else |
|||
k += 1 |
|||
end |
|||
j += 1 |
|||
end |
|||
while i <= k |
|||
push!(factorization, s[i:i+j-k-1]) |
|||
i += j - k |
|||
end |
|||
end |
|||
@assert s == prod(factorization) |
|||
return factorization |
|||
end |
|||
let m = "0" |
|||
for i in 1:7 |
|||
m *= replace(m, '0' => '1', '1' => '0') |
|||
end |
|||
println(chenfoxlyndonfactorization(m)) |
|||
end |
|||
</syntaxhighlight>{{out}} |
|||
<pre> |
|||
["011", "01", "0011", "00101101", "0010110011010011", "00101100110100101101001100101101", "001011001101001011010011001011001101001100101101", "001011001101", "001"] |
|||
</pre> |
|||
=={{header|MATLAB}}== |
|||
{{trans|Python}} |
|||
<syntaxhighlight lang="MATLAB"> |
|||
clear all;close all;clc; |
|||
m = '0'; |
|||
for i = 1:7 |
|||
m0 = m; |
|||
m = strrep(m, '0', 'a'); |
|||
m = strrep(m, '1', '0'); |
|||
m = strrep(m, 'a', '1'); |
|||
m = strcat(m0, m); |
|||
end |
|||
factorization = chenFoxLyndonFactorization(m); |
|||
for index=1:length(factorization) |
|||
disp(factorization(index)); |
|||
end |
|||
function factorization = chenFoxLyndonFactorization(s) |
|||
n = length(s); |
|||
i = 1; |
|||
factorization = {}; |
|||
while i <= n |
|||
j = i + 1; |
|||
k = i; |
|||
while j <= n && s(k) <= s(j) |
|||
if s(k) < s(j) |
|||
k = i; |
|||
else |
|||
k = k + 1; |
|||
end |
|||
j = j + 1; |
|||
end |
|||
while i <= k |
|||
factorization{end+1} = s(i:i + j - k - 1); |
|||
i = i + j - k; |
|||
end |
|||
end |
|||
assert(strcmp(join(factorization, ''), s)); |
|||
end |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
{'011'} |
|||
{'01'} |
|||
{'0011'} |
|||
{'00101101'} |
|||
{'0010110011010011'} |
|||
{'00101100110100101101001100101101'} |
|||
{'001011001101001011010011001011001101001100101101'} |
|||
{'001011001101'} |
|||
{'001'} |
|||
</pre> |
|||
=={{header|Phix}}== |
|||
{{trans|Python}} |
|||
<syntaxhighlight lang="Phix"> |
|||
with javascript_semantics |
|||
function chen_fox_lyndon_factorization(string s) |
|||
integer n = length(s), i = 1 |
|||
sequence factorization = {} |
|||
while i <= n do |
|||
integer j = i + 1, k = i |
|||
while j <= n and s[k] <= s[j] do |
|||
if s[k] < s[j] then |
|||
k = i |
|||
else |
|||
k += 1 |
|||
end if |
|||
j += 1 |
|||
end while |
|||
while i <= k do |
|||
factorization &= {s[i..i+j-k-1]} |
|||
i += j - k |
|||
end while |
|||
end while |
|||
assert(join(factorization,"") == s) |
|||
return factorization |
|||
end function |
|||
-- Example use with Thue-Morse sequence |
|||
string m = "0" |
|||
for i=1 to 7 do |
|||
m &= sq_sub('0'+'1',m) |
|||
end for |
|||
?chen_fox_lyndon_factorization(m) |
|||
-- alt: |
|||
printf(1,"\n%s\n",join(chen_fox_lyndon_factorization(m),"\n")) |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
{"011","01","0011","00101101","0010110011010011","00101100110100101101001100101101","001011001101001011010011001011001101001100101101","001011001101","001"} |
|||
011 |
|||
01 |
|||
0011 |
|||
00101101 |
|||
0010110011010011 |
|||
00101100110100101101001100101101 |
|||
001011001101001011010011001011001101001100101101 |
|||
001011001101 |
|||
001 |
|||
</pre> |
|||
== {{header|Python}} == |
|||
Duval's algorithm:<syntaxhighlight lang="python3"> |
Duval's algorithm:<syntaxhighlight lang="python3"> |
||
def chen_fox_lyndon_factorization(s): |
def chen_fox_lyndon_factorization(s): |
||
Line 71: | Line 472: | ||
</syntaxhighlight> |
</syntaxhighlight> |
||
=={{header|Raku}}== |
|||
{{trans|Julia}} |
|||
<syntaxhighlight lang="raku" line># 20240213 Raku programming solution |
|||
sub chenfoxlyndonfactorization(Str $s) { |
|||
my ($n, $i, @factorization) = $s.chars, 1; |
|||
while $i <= $n { |
|||
my ($j, $k) = $i X+ (1, 0); |
|||
while $j <= $n && substr($s, $k-1, 1) <= substr($s, $j-1, 1) { |
|||
substr($s, $k-1, 1) < substr($s, $j++ -1, 1) ?? ( $k = $i ) !! $k++; |
|||
} |
|||
while $i <= $k { |
|||
@factorization.push: substr($s, $i-1, $j-$k); |
|||
$i += $j - $k |
|||
} |
|||
} |
|||
die unless $s eq [~] @factorization; |
|||
return @factorization |
|||
} |
|||
my $m = "0"; |
|||
for ^7 { $m ~= $m.trans('0' => '1', '1' => '0') } |
|||
say chenfoxlyndonfactorization($m);</syntaxhighlight> |
|||
{{out}} Same as Julia example. |
|||
You may [https://ato.pxeger.com/run?1=fVJNToNAFE5ccoqvDSlMoA2sNGJt7-DGxGhCKYRpy6AzQ7Q27UXcdKGX8jS-4SeWxshiwrz3_b3J-_iU8bo6Hr8qnY2vvi_mqlogyVORlW-brViWIosTXUr-HmteCvdOS9iKYWcBKLZwbeHD5j7mPRzDlGCTJI-l8hFGBv2a801KWNxQTzQKnciKRNY1iePegxv6CFjUIlriqiWORqCQSkvXVoY2JnDITPO0vGrLnQ19f7L6JM9D25jN4BKsicQwGNDF87pI-360Zqb1qVn_PSbPlcqve17cGFFMmjv6pZGUNzWzjknv1Kw-ljxFJTapUvS6SF_wcHg8c6q1ZKorKc461t6y6LHtgmYaBsPIykqJp0vsTOlApsVEy1go1wkcTG_hhI5vjvo_cBhFUPH2v-WwCxY1i9TuU7dXPw Attempt This Online!] |
|||
=={{header|RPL}}== |
|||
Based on Duval's algorithm. |
|||
<code>THUEM</code> is defined at [[Thue-Morse#RPL|Thue-Morse]]. |
|||
{{works with|HP|48G}} |
|||
« DUP SIZE { } → s n res |
|||
« 1 |
|||
'''WHILE''' DUP n ≤ '''REPEAT''' |
|||
DUP 1 + OVER |
|||
'''WHILE''' s OVER DUP SUB s 4 PICK DUP SUB DUP2 ≤ 5 PICK n ≤ AND '''REPEAT''' |
|||
'''IF''' < '''THEN''' DROP OVER '''ELSE''' 1 + '''END''' |
|||
SWAP 1 + SWAP |
|||
'''END''' DROP2 |
|||
SWAP OVER - SWAP ROT |
|||
'''WHILE''' DUP2 ≥ '''REPEAT''' |
|||
'res' s 3 PICK DUP 7 PICK + 1 - SUB STO+ |
|||
3 PICK + |
|||
'''END''' ROT ROT DROP2 |
|||
'''END''' |
|||
DROP res |
|||
» » '<span style="color:blue">LYNDON</span>' STO <span style="color:grey">''@ ( "string" → { "f1".."fn" } )''</span> |
|||
« 2 7 ^ <span style="color:blue">THUEM</span> « →STR + » STREAM |
|||
<span style="color:blue">LYNDON</span> |
|||
» '<span style="color:blue">TASK</span>' STO |
|||
{{out}} |
|||
<pre> |
|||
1: { "011" "01" "0011" "00101101" "0010110011010011" "00101100110100101101001100101101" "001011001101001011010011001011001101001100101101" "001011001101" "001" } |
|||
</pre> |
|||
'''Using local variables''' |
|||
The version below favors local variables to the stack, which makes it a little bit slower - but easier to read. |
|||
« DUP SIZE 1 → s n ii |
|||
« { } |
|||
'''WHILE''' ii n ≤ '''REPEAT''' |
|||
ii DUP 1 + → k j |
|||
« '''WHILE''' s k DUP SUB s j DUP SUB DUP2 ≤ ii n ≤ AND '''REPEAT''' |
|||
< ii k 1 + IFTE 'k' STO |
|||
'j' 1 STO+ |
|||
» |
|||
'j' k STO- |
|||
'''WHILE''' ii k ≤ '''REPEAT''' |
|||
s ii DUP j + 1 - SUB + |
|||
'ii' j STO+ |
|||
'''END''' |
|||
» |
|||
'''END''' |
|||
» » '<span style="color:blue">LYNDON</span>' STO <span style="color:grey">''@ ( "string" → { "f1".."fn" } )''</span> |
|||
=={{header|Rust}}== |
=={{header|Rust}}== |
||
Line 114: | Line 588: | ||
<pre> |
<pre> |
||
["011", "01", "0011", "00101101", "0010110011010011", "00101100110100101101001100101101", "001011001101001011010011001011001101001100101101", "001011001101", "001"] |
["011", "01", "0011", "00101101", "0010110011010011", "00101100110100101101001100101101", "001011001101001011010011001011001101001100101101", "001011001101", "001"] |
||
</pre> |
|||
=={{header|Scala}}== |
|||
{{trans|Python}} |
|||
<syntaxhighlight lang="Scala"> |
|||
object ChenFoxLyndonFactorization extends App { |
|||
def chenFoxLyndonFactorization(s: String): List[String] = { |
|||
val n = s.length |
|||
var i = 0 |
|||
var factorization = List[String]() |
|||
while (i < n) { |
|||
var j = i + 1 |
|||
var k = i |
|||
while (j < n && s.charAt(k) <= s.charAt(j)) { |
|||
if (s.charAt(k) < s.charAt(j)) { |
|||
k = i |
|||
} else { |
|||
k += 1 |
|||
} |
|||
j += 1 |
|||
} |
|||
while (i <= k) { |
|||
factorization = factorization :+ s.substring(i, i + j - k) |
|||
i += j - k |
|||
} |
|||
} |
|||
assert(s == factorization.mkString) |
|||
factorization |
|||
} |
|||
var m = "0" |
|||
for (i <- 0 until 7) { |
|||
val m0 = m |
|||
m = m.replace('0', 'a').replace('1', '0').replace('a', '1') |
|||
m = m0 + m |
|||
} |
|||
println(chenFoxLyndonFactorization(m)) |
|||
} |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
List(011, 01, 0011, 00101101, 0010110011010011, 00101100110100101101001100101101, 001011001101001011010011001011001101001100101101, 001011001101, 001) |
|||
</pre> |
|||
=={{header|SETL}}== |
|||
{{trans|C++}} |
|||
<syntaxhighlight lang="setl">program lyndon_factorization; |
|||
tm := "01101001100101101001011001101001100101100110" |
|||
+ "10010110100110010110100101100110100101101001" |
|||
+ "1001011001101001100101101001011001101001"; |
|||
loop for part in duval(tm) do |
|||
print(part); |
|||
end loop; |
|||
proc duval(s); |
|||
i := 1; |
|||
fact := []; |
|||
loop while i <= #s do |
|||
j := i + 1; |
|||
k := i; |
|||
loop while j <= #s and s(k) <= s(j) do |
|||
k := if s(k) < s(j) then i else k+1 end; |
|||
j +:= 1; |
|||
end loop; |
|||
loop while i <= k do |
|||
fact with:= s(i..i+j-k-1); |
|||
i +:= j-k; |
|||
end loop; |
|||
end loop; |
|||
return fact; |
|||
end proc; |
|||
end program;</syntaxhighlight> |
|||
{{out}} |
|||
<pre>011 |
|||
01 |
|||
0011 |
|||
00101101 |
|||
0010110011010011 |
|||
00101100110100101101001100101101 |
|||
001011001101001011010011001011001101001100101101 |
|||
001011001101 |
|||
001</pre> |
|||
=={{header|Wren}}== |
|||
{{trans|Python}} |
|||
{{libheader|Wren-str}} |
|||
<syntaxhighlight lang="wren">import "./str" for Str |
|||
var duval = Fn.new { |s| |
|||
var n = s.count |
|||
var i = 0 |
|||
var factorization = [] |
|||
while (i < n) { |
|||
var j = i + 1 |
|||
var k = i |
|||
while (j < n && Str.le(s[k], s[j])) { |
|||
if (Str.lt(s[k], s[j])) k = i else k = k + 1 |
|||
j = j + 1 |
|||
} |
|||
while (i <= k) { |
|||
factorization.add(s[i...i+j-k]) |
|||
i = i + j - k |
|||
} |
|||
} |
|||
return factorization |
|||
} |
|||
// Thue-Morse example |
|||
var m = "0" |
|||
for (i in 0..6) { |
|||
var m0 = m |
|||
m = m.replace("0", "a") |
|||
m = m.replace("1", "0") |
|||
m = m.replace("a", "1") |
|||
m = m0 + m |
|||
} |
|||
System.print(duval.call(m).join("\n"))></syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
011 |
|||
01 |
|||
0011 |
|||
00101101 |
|||
0010110011010011 |
|||
00101100110100101101001100101101 |
|||
001011001101001011010011001011001101001100101101 |
|||
001011001101 |
|||
001 |
|||
</pre> |
</pre> |
Latest revision as of 19:33, 28 February 2024
You are encouraged to solve this task according to the task description, using any language you may know.
Given a finite alphabet, we can lexicographically order all strings in the alphabet. If two strings have different lengths, then pad the shorter string on the right with the smallest letter. For example, we have 01 > 001, but 01 = 010. As we see, this order is a total preorder, but not a total order, since it identifies different strings.
A Lyndon word is a non-empty string that is strictly lower in lexicographic order than all its circular rotations. In particular, if a string is equal to a circular rotation, then it is not a Lyndon word. For example, since 0101 = 0101 (rotation by 2), it is not a Lyndon word.
The first Lyndon words on the binary alphabet {0, 1} are:
- 0, 1, 01, 001, 011, 0001, 0011, 0111, 00001, 00011, 00101, 00111, 01011, 01111, ...
Some basic properties:
- The only Lyndon word that ends with 0 is 0.
- Proof. If s0 is a Lyndon word, and s is not empty, then s0 < 0s. If s contains 1 somewhere, then s0 > 0s. Therefore s has only 0. But then s0 = 0s, contradiction.
- The lexicographic order is a total order on the Lyndon words.
- Proof. For, the only way for two different strings s, s' to have the same lexicographic ordering is for one of them to pad to the other. We can assume that s00...0 = s'. If that is so, then s00...0 is a Lyndon word that ends with 0, so it is just 0, and so s is a Lyndon word that is also empty, contradiction.
The Chen–Fox–Lyndon theorem states that any string is uniquely factorizable into a sequence of Lyndon words non-decreasing in lexicographic order. Duval's algorithm computes this in O(length of input) time and and O(1) space.[1] See [2] for a description of Duval's algorithm.
ALGOL 68
BEGIN # Factorize a string into Lyndon words - translated of the Python sample #
PROC chen fox lyndon factorization = ( STRING s )[]STRING:
BEGIN
INT n = UPB s;
INT i := LWB s;
FLEX[ 1 : 100 ]STRING factorization;
INT f max := LWB factorization;
WHILE i <= n DO
INT j := i + 1, k := i;
WHILE IF j <= n THEN s[ k ] <= s[ j ] ELSE FALSE FI DO
IF s[ k ] < s[ j ] THEN
k := i
ELSE
k +:= 1
FI;
j +:= 1
OD;
WHILE i <= k DO
f max +:= 1;
IF f max > UPB factorization THEN
[ 1 : UPB factorization + 20 ]STRING new factorization;
new factorization[ 1 : UPB factorization ] := factorization;
factorization := new factorization
FI;
factorization[ f max ] := s[ i : i + j - k - 1 ];
i +:= j - k
OD
OD;
STRING check := "";
FOR i TO f max DO check +:= factorization[ i ] OD;
IF check /= s THEN print( ( "Incorrect factorization", newline ) ); stop FI;
factorization[ 1 : f max ]
END # chen fox lyndon factorization # ;
# Example use with Thue-Morse sequence #
STRING m := "0";
TO 7 DO
STRING m0 = m;
FOR i FROM LWB m TO UPB m DO
IF m[ i ] = "0" THEN m[ i ] := "1" ELIF m[ i ] = "1" THEN m[ i ] := "0" FI
OD;
m0 +=: m
OD;
[]STRING m factors = chen fox lyndon factorization( m );
FOR i FROM LWB m factors TO UPB m factors DO
print( ( m factors[ i ], newline ) )
OD
END
- Output:
011 01 0011 00101101 0010110011010011 00101100110100101101001100101101 001011001101001011010011001011001101001100101101 001011001101 001
C++
Copied from [2], under Creative Commons Attribution Share Alike 4.0 International License and converted to a runnable C++ program.
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
vector<string> duval(string const& s) {
int n = s.size();
int i = 0;
vector<string> factorization;
while (i < n) {
int j = i + 1, k = i;
while (j < n && s[k] <= s[j]) {
if (s[k] < s[j])
k = i;
else
k++;
j++;
}
while (i <= k) {
factorization.push_back(s.substr(i, j - k));
i += j - k;
}
}
return factorization;
}
int main() {
// Thue-Morse example
string m = "0";
for (int i = 0; i < 7; ++i) {
string m0 = m;
replace(m.begin(), m.end(), '0', 'a');
replace(m.begin(), m.end(), '1', '0');
replace(m.begin(), m.end(), 'a', '1');
m = m0 + m;
}
for (string s : duval(m)) cout << s << endl;
return 0;
}
- Output:
011 01 0011 00101101 0010110011010011 00101100110100101101001100101101 001011001101001011010011001011001101001100101101 001011001101 001
EasyLang
proc lyndonfact s$ . .
n = len s$
s$[] = strchars s$
i = 1
while i <= n
j = i + 1
k = i
while j <= n and strcode s$[k] <= strcode s$[j]
if strcode s$[k] < strcode s$[j]
k = i
else
k += 1
.
j += 1
.
while i <= k
print substr s$ i (j - k)
i += j - k
.
.
.
m$ = "0"
for i to 7
for c$ in strchars m$
if c$ = "0"
m$ &= "1"
else
m$ &= "0"
.
.
.
lyndonfact m$
Java
import java.util.ArrayList;
import java.util.List;
public class FactorizeStringIntoLyndonWords {
public static void main(String[] args) {
// Create a 128 character Thue-Morse word
String thueMorse = "0";
for ( int i = 0; i < 7; i++ ) {
String thueMorseCopy = thueMorse;
thueMorse = thueMorse.replace("0", "a");
thueMorse = thueMorse.replace("1", "0");
thueMorse = thueMorse.replace("a", "1");
thueMorse = thueMorseCopy + thueMorse;
}
System.out.println("The Thue-Morse word to be factorised:");
System.out.println(thueMorse);
System.out.println();
System.out.println("The factors are:");
for ( String factor : duval(thueMorse) ) {
System.out.println(factor);
}
}
// Duval's algorithm
private static List<String> duval(String text) {
List<String> factorisation = new ArrayList<String>();
int i = 0;
while ( i < text.length() ) {
int j = i + 1;
int k = i;
while ( j < text.length() && text.charAt(k) <= text.charAt(j) ) {
if ( text.charAt(k) < text.charAt(j) ) {
k = i;
} else {
k += 1;
}
j += 1;
}
while ( i <= k ) {
factorisation.addLast(text.substring(i, i + j - k));
i += j - k;
}
}
return factorisation;
}
}
The Thue-Morse word to be factorised: 01101001100101101001011001101001100101100110100101101001100101101001011001101001011010011001011001101001100101101001011001101001 The factors are: 011 01 0011 00101101 0010110011010011 00101100110100101101001100101101 001011001101001011010011001011001101001100101101 001011001101
jq
Works with gojq, the Go implementation of jq
Adapted from Wren
def duval:
. as $s
| def a($i): $s[$i:$i+1];
length as $n
| { i: 0, factorization: []}
| until (.i >= $n;
.j = .i + 1
| .k = .i
| until (.j >= $n or a(.k) > a(.j);
if a(.k) < a(.j) then .k = .i else .k += 1 end
| .j += 1 )
| until (.i > .k;
.factorization += [$s[.i: .i+.j-.k]]
| .i += .j - .k ) )
| .factorization ;
# Thue-Morse example
def m:
reduce range(0; 7) as $i ( "0";
. as $m0
| gsub("0"; "a")
| gsub("1"; "0")
| gsub("a"; "1")
| $m0 + .) ;
m | duval | join("\n")
- Output:
As for Wren.
Julia
function chenfoxlyndonfactorization(s)
n = length(s)
i = 1
factorization = String[]
while i <= n
j = i + 1
k = i
while j <= n && s[k] <= s[j]
if s[k] < s[j]
k = i
else
k += 1
end
j += 1
end
while i <= k
push!(factorization, s[i:i+j-k-1])
i += j - k
end
end
@assert s == prod(factorization)
return factorization
end
let m = "0"
for i in 1:7
m *= replace(m, '0' => '1', '1' => '0')
end
println(chenfoxlyndonfactorization(m))
end
- Output:
["011", "01", "0011", "00101101", "0010110011010011", "00101100110100101101001100101101", "001011001101001011010011001011001101001100101101", "001011001101", "001"]
MATLAB
clear all;close all;clc;
m = '0';
for i = 1:7
m0 = m;
m = strrep(m, '0', 'a');
m = strrep(m, '1', '0');
m = strrep(m, 'a', '1');
m = strcat(m0, m);
end
factorization = chenFoxLyndonFactorization(m);
for index=1:length(factorization)
disp(factorization(index));
end
function factorization = chenFoxLyndonFactorization(s)
n = length(s);
i = 1;
factorization = {};
while i <= n
j = i + 1;
k = i;
while j <= n && s(k) <= s(j)
if s(k) < s(j)
k = i;
else
k = k + 1;
end
j = j + 1;
end
while i <= k
factorization{end+1} = s(i:i + j - k - 1);
i = i + j - k;
end
end
assert(strcmp(join(factorization, ''), s));
end
- Output:
{'011'} {'01'} {'0011'} {'00101101'} {'0010110011010011'} {'00101100110100101101001100101101'} {'001011001101001011010011001011001101001100101101'} {'001011001101'} {'001'}
Phix
with javascript_semantics
function chen_fox_lyndon_factorization(string s)
integer n = length(s), i = 1
sequence factorization = {}
while i <= n do
integer j = i + 1, k = i
while j <= n and s[k] <= s[j] do
if s[k] < s[j] then
k = i
else
k += 1
end if
j += 1
end while
while i <= k do
factorization &= {s[i..i+j-k-1]}
i += j - k
end while
end while
assert(join(factorization,"") == s)
return factorization
end function
-- Example use with Thue-Morse sequence
string m = "0"
for i=1 to 7 do
m &= sq_sub('0'+'1',m)
end for
?chen_fox_lyndon_factorization(m)
-- alt:
printf(1,"\n%s\n",join(chen_fox_lyndon_factorization(m),"\n"))
- Output:
{"011","01","0011","00101101","0010110011010011","00101100110100101101001100101101","001011001101001011010011001011001101001100101101","001011001101","001"} 011 01 0011 00101101 0010110011010011 00101100110100101101001100101101 001011001101001011010011001011001101001100101101 001011001101 001
Python
Duval's algorithm:
def chen_fox_lyndon_factorization(s):
n = len(s)
i = 0
factorization = []
while i < n:
j, k = i + 1, i
while j < n and s[k] <= s[j]:
if s[k] < s[j]:
k = i
else:
k += 1
j += 1
while i <= k:
factorization.append(s[i:i + j - k])
i += j - k
assert ''.join(factorization) == s
return factorization
Example use with Thue-Morse sequence
m='0'
for i in range(0,7):
m0=m
m=m.replace('0','a')
m=m.replace('1','0')
m=m.replace('a','1')
m=m0+m
print(chen_fox_lyndon_factorization(m))
Output:
['011', '01', '0011', '00101101', '0010110011010011', '00101100110100101101001100101101', '001011001101001011010011001011001101001100101101', '001011001101', '001']
Raku
# 20240213 Raku programming solution
sub chenfoxlyndonfactorization(Str $s) {
my ($n, $i, @factorization) = $s.chars, 1;
while $i <= $n {
my ($j, $k) = $i X+ (1, 0);
while $j <= $n && substr($s, $k-1, 1) <= substr($s, $j-1, 1) {
substr($s, $k-1, 1) < substr($s, $j++ -1, 1) ?? ( $k = $i ) !! $k++;
}
while $i <= $k {
@factorization.push: substr($s, $i-1, $j-$k);
$i += $j - $k
}
}
die unless $s eq [~] @factorization;
return @factorization
}
my $m = "0";
for ^7 { $m ~= $m.trans('0' => '1', '1' => '0') }
say chenfoxlyndonfactorization($m);
- Output:
Same as Julia example.
You may Attempt This Online!
RPL
Based on Duval's algorithm.
THUEM
is defined at Thue-Morse.
« DUP SIZE { } → s n res « 1 WHILE DUP n ≤ REPEAT DUP 1 + OVER WHILE s OVER DUP SUB s 4 PICK DUP SUB DUP2 ≤ 5 PICK n ≤ AND REPEAT IF < THEN DROP OVER ELSE 1 + END SWAP 1 + SWAP END DROP2 SWAP OVER - SWAP ROT WHILE DUP2 ≥ REPEAT 'res' s 3 PICK DUP 7 PICK + 1 - SUB STO+ 3 PICK + END ROT ROT DROP2 END DROP res » » 'LYNDON' STO @ ( "string" → { "f1".."fn" } ) « 2 7 ^ THUEM « →STR + » STREAM LYNDON » 'TASK' STO
- Output:
1: { "011" "01" "0011" "00101101" "0010110011010011" "00101100110100101101001100101101" "001011001101001011010011001011001101001100101101" "001011001101" "001" }
Using local variables
The version below favors local variables to the stack, which makes it a little bit slower - but easier to read.
« DUP SIZE 1 → s n ii « { } WHILE ii n ≤ REPEAT ii DUP 1 + → k j « WHILE s k DUP SUB s j DUP SUB DUP2 ≤ ii n ≤ AND REPEAT < ii k 1 + IFTE 'k' STO 'j' 1 STO+ » 'j' k STO- WHILE ii k ≤ REPEAT s ii DUP j + 1 - SUB + 'ii' j STO+ END » END » » 'LYNDON' STO @ ( "string" → { "f1".."fn" } )
Rust
fn chen_fox_lyndon_factorization(s: &str) -> Vec<String> {
let n = s.len();
let mut i = 0;
let mut factorization = Vec::new();
while i < n {
let (mut j, mut k) = (i + 1, i);
while j < n && s.as_bytes()[k] <= s.as_bytes()[j] {
if s.as_bytes()[k] < s.as_bytes()[j] {
k = i;
} else {
k += 1;
}
j += 1;
}
while i <= k {
factorization.push(s[i..i + j - k].to_string());
i += j - k;
}
}
factorization
}
fn main() {
let mut m = String::from("0");
for _ in 0..7 {
let m0 = m.clone();
m = m.replace("0", "a");
m = m.replace("1", "0");
m = m.replace("a", "1");
m = m0 + &m;
}
let factorization = chen_fox_lyndon_factorization(&m);
println!("{:?}", factorization);
}
- Output:
["011", "01", "0011", "00101101", "0010110011010011", "00101100110100101101001100101101", "001011001101001011010011001011001101001100101101", "001011001101", "001"]
Scala
object ChenFoxLyndonFactorization extends App {
def chenFoxLyndonFactorization(s: String): List[String] = {
val n = s.length
var i = 0
var factorization = List[String]()
while (i < n) {
var j = i + 1
var k = i
while (j < n && s.charAt(k) <= s.charAt(j)) {
if (s.charAt(k) < s.charAt(j)) {
k = i
} else {
k += 1
}
j += 1
}
while (i <= k) {
factorization = factorization :+ s.substring(i, i + j - k)
i += j - k
}
}
assert(s == factorization.mkString)
factorization
}
var m = "0"
for (i <- 0 until 7) {
val m0 = m
m = m.replace('0', 'a').replace('1', '0').replace('a', '1')
m = m0 + m
}
println(chenFoxLyndonFactorization(m))
}
- Output:
List(011, 01, 0011, 00101101, 0010110011010011, 00101100110100101101001100101101, 001011001101001011010011001011001101001100101101, 001011001101, 001)
SETL
program lyndon_factorization;
tm := "01101001100101101001011001101001100101100110"
+ "10010110100110010110100101100110100101101001"
+ "1001011001101001100101101001011001101001";
loop for part in duval(tm) do
print(part);
end loop;
proc duval(s);
i := 1;
fact := [];
loop while i <= #s do
j := i + 1;
k := i;
loop while j <= #s and s(k) <= s(j) do
k := if s(k) < s(j) then i else k+1 end;
j +:= 1;
end loop;
loop while i <= k do
fact with:= s(i..i+j-k-1);
i +:= j-k;
end loop;
end loop;
return fact;
end proc;
end program;
- Output:
011 01 0011 00101101 0010110011010011 00101100110100101101001100101101 001011001101001011010011001011001101001100101101 001011001101 001
Wren
import "./str" for Str
var duval = Fn.new { |s|
var n = s.count
var i = 0
var factorization = []
while (i < n) {
var j = i + 1
var k = i
while (j < n && Str.le(s[k], s[j])) {
if (Str.lt(s[k], s[j])) k = i else k = k + 1
j = j + 1
}
while (i <= k) {
factorization.add(s[i...i+j-k])
i = i + j - k
}
}
return factorization
}
// Thue-Morse example
var m = "0"
for (i in 0..6) {
var m0 = m
m = m.replace("0", "a")
m = m.replace("1", "0")
m = m.replace("a", "1")
m = m0 + m
}
System.print(duval.call(m).join("\n"))>
- Output:
011 01 0011 00101101 0010110011010011 00101100110100101101001100101101 001011001101001011010011001011001101001100101101 001011001101 001
- ↑ Duval, Jean-Pierre (1983), "Factorizing words over an ordered alphabet", Journal of Algorithms, 4 (4): 363–381, doi:10.1016/0196-6774(83)90017-2
- ↑ 2.0 2.1 https://cp-algorithms.com/string/lyndon_factorization.html