Word break problem: Difference between revisions

m
m (→‎{{header|Wren}}: Minor tidy)
 
(6 intermediate revisions by 4 users not shown)
Line 9:
{{trans|D}}
 
<langsyntaxhighlight lang="11l">T Node
String val
[String] parsed
Line 44:
 
d = [‘abc’, ‘a’, ‘ac’, ‘b’, ‘c’, ‘cb’, ‘d’]
process(d, [‘abcd’, ‘abbc’, ‘abcbcd’, ‘acdbc’, ‘abcdd’])</langsyntaxhighlight>
 
{{out}}
Line 78:
 
=={{header|Aime}}==
<langsyntaxhighlight lang="aime">integer
wordbreak(record dict, text phrase, integer p, list words)
{
Line 133:
 
return 0;
}</langsyntaxhighlight>
{{Out}}
<pre>abcd: a b cd
Line 140:
acdbc: a cd bc
abcdd: can't break</pre>
 
=={{header|ALGOL 68}}==
{{Trans|Nim}}
<syntaxhighlight lang="algol68"># utility functions: #
 
PROC includes = ([]STRING row, STRING s) BOOL:
# whether row includes s #
BEGIN
FOR i TO UPB row DO IF s = row[i] THEN true FI OD;
FALSE EXIT
true: TRUE
END;
 
PRIO & = 6;
OP & = (STRING s, []STRING row) []STRING:
# returns row with s prepended to it #
BEGIN
[UPB row + 1]STRING result;
result[2 : UPB result] := row;
result[1] := s;
result
END;
 
PROC add = (REF FLEX[]FLEX[]STRING rows, []STRING row) VOID:
# adds row to the end of rows (in place) #
BEGIN
[UPB rows+1]FLEX[0]STRING tmp;
tmp[:UPB rows] := rows;
tmp[UPB tmp] := row;
rows := tmp
END;
 
# actual solution: #
 
PROC word breaks = ([]STRING dict, STRING word) [][]STRING:
# build recursively the list of breaks for a word, using the given dictionary #
BEGIN
FLEX[0]FLEX[0]STRING result;
FOR last TO UPB word - 1 DO
STRING part1 := word[1 : last];
IF includes(dict, part1) THEN
STRING part2 := word[last + 1 : ];
IF includes(dict, part2) THEN add(result, (part1, part2)) FI;
[][]STRING sub results = word breaks(dict, part2);
FOR i FROM LWB sub results TO UPB sub results DO
add(result, part1 & sub results[i])
OD
FI
OD;
result
END;
 
PROC break word = ([]STRING dict, STRING word) VOID:
# find the ways to break a word and display the result #
BEGIN
print((word, ":", newline));
[][]STRING word seqs = word breaks(dict, word);
IF UPB word seqs = 0 THEN
print((" <no break possible>", newline))
ELSE
FOR i FROM LWB word seqs TO UPB word seqs DO
[]STRING seq = word seqs[i];
print(" ");
FOR j FROM LWB seq TO UPB seq DO
print((" ", seq[j]))
OD;
print(newline)
OD
FI
END;
 
[]STRING dict = ("a", "bc", "abc", "cd", "b");
 
[]STRING words = ("abcd", "abbc", "abcbcd", "acdbc", "abcdd");
 
FOR i TO UPB words DO
break word(dict, words[i])
OD</syntaxhighlight>
 
{{out}}
<pre>abcd:
a b cd
abbc:
a b bc
abcbcd:
a bc b cd
abc b cd
acdbc:
a cd bc
abcdd:
<no break possible>
</pre>
 
=={{header|C++}}==
{{trans|Rust}}
<langsyntaxhighlight lang="cpp">#include <algorithm>
#include <iostream>
#include <optional>
Line 229 ⟶ 321:
return 0;
}
</syntaxhighlight>
</lang>
 
{{out}}
Line 238 ⟶ 330:
=={{header|D}}==
{{trans|Java}}
<langsyntaxhighlight lang="d">import std.algorithm;
import std.range;
import std.stdio;
Line 297 ⟶ 389:
parsed = p;
}
}</langsyntaxhighlight>
{{out}}
<pre>String = aab, Dictionary = ["a", "aa", "b", "ab", "aab"]. Solutions = 4
Line 330 ⟶ 422:
{{libheader| System.Generics.Collections}}
{{Trans|Go}}
<syntaxhighlight lang="delphi">
<lang Delphi>
program Word_break_problem;
 
Line 418 ⟶ 510:
 
Readln;
end.</langsyntaxhighlight>
=={{header|Go}}==
<langsyntaxhighlight lang="go">package main
 
import (
Line 471 ⟶ 563:
}
}
}</langsyntaxhighlight>
{{out}}
<pre>
Line 483 ⟶ 575:
=={{header|Haskell}}==
{{Trans|Javascript}}
<langsyntaxhighlight lang="haskell">import Data.List (isPrefixOf, intercalate)
import Data.Tree (Tree(..))
 
Line 516 ⟶ 608:
 
main :: IO ()
main = (putStrLn . unlines) $ wordBreaks ws <$> texts</langsyntaxhighlight>
{{Out}}
<pre>abcd:
Line 537 ⟶ 629:
With such short sentences we can find the partition sets, then check that all are words.
 
<syntaxhighlight lang="j">
<lang J>
all_partitions=: <@(<;.1)"1 _~ (1,.[:#:[:i.2^<:@:#) NB. all_partitions 'abcd'
word_break=: ([ #~ 0 = [: #&>@:] -.L:_1 _)~ all_partitions
main=: (] , (;:inv L:_1@:word_break >))"_ 0 boxopen
</syntaxhighlight>
</lang>
 
<pre>
Line 595 ⟶ 687:
=={{header|Java}}==
Accept string to be parsed, and dictionary of words, as method arguments.
<syntaxhighlight lang="java">
<lang Java>
import java.util.ArrayList;
import java.util.Arrays;
Line 665 ⟶ 757:
 
}
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 700 ⟶ 792:
Composing a solution from generic functions.
{{Trans|Python}}
<langsyntaxhighlight lang="javascript">(() => {
'use strict';
 
Line 811 ⟶ 903:
// MAIN ---
return main();
})();</langsyntaxhighlight>
{{Out}}
<pre>abcd:
Line 842 ⟶ 934:
used to determine the number of possible parses of the string "totalreturn".
 
<syntaxhighlight lang="jq">
<lang jq>
def words: ["a", "bc", "abc", "cd", "b"];
def strings: ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"];
Line 888 ⟶ 980:
demo1, " ", demo2, "", demo3
 
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 908 ⟶ 1,000:
 
=={{header|Julia}}==
Some extra loops to record and print all solutions.<langsyntaxhighlight Julialang="julia">
words = ["a", "bc", "abc", "cd", "b"]
strings = ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"]
Line 935 ⟶ 1,027:
end
 
wordbreak()</langsyntaxhighlight>
{{output}}<pre>
1 Solution(s) for abcd:
Line 950 ⟶ 1,042:
=={{header|Kotlin}}==
I've downloaded the free dictionary at http://www.puzzlers.org/pub/wordlists/unixdict.txt for this task. All single letters from 'a' to 'z' are considered to be words by this dictionary but 'bc' and 'cd' which I'd have expected to be present are not.
<langsyntaxhighlight lang="scala">// version 1.1.3
 
import java.io.File
Line 997 ⟶ 1,089:
println()
}
}</langsyntaxhighlight>
 
{{out}}
Line 1,023 ⟶ 1,115:
 
=={{header|Lua}}==
<langsyntaxhighlight lang="lua">-- a specialized dict format is used to minimize the
-- possible candidates for this particalur problem
function genDict(ws)
Line 1,088 ⟶ 1,180:
for i=1,#test do
print(test[i],wordbreak(test[i]))
end</langsyntaxhighlight>
{{out}}
<pre>abcd a b cd
Line 1,099 ⟶ 1,191:
We provide two ways to initialize the dictionary: either explicitly by providing the list of words or by providing a file name from which extract the words. In the second case, it is possible to specify the minimal length of the words to keep. This is useful mainly to eliminate all the one letter words we get with “unixdict.txt”.
 
<langsyntaxhighlight Nimlang="nim">import sequtils, sets, strutils
 
type
Line 1,146 ⟶ 1,238:
dict = initDict("unixdict.txt", 2)
dict.breakWord("because")
dict.breakWord("software")</langsyntaxhighlight>
 
{{out}}
Line 1,174 ⟶ 1,266:
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">use strict;
use warnings;
 
Line 1,186 ⟶ 1,278:
@matches = $word =~ /^ ($one_of) ($one_of)? ($one_of)? ($one_of)? $/x; # sub-optimal: limited number of matches
return join(' ', grep {$_} @matches) || "(not possible)";
}</langsyntaxhighlight>
{{out}}
<pre>a: a
Line 1,203 ⟶ 1,295:
=={{header|Phix}}==
The distributed version also contains a few experiments with applying a real dictionary, largely unsuccessful.
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\Word_break_problem.exw
Line 1,316 ⟶ 1,408:
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 1,332 ⟶ 1,424:
===Non-determinism using member/2===
<code>s/2</code> checks all possible combinations. It uses <code>member/2</code> to select some element from Dict and backtracks if no solution is found.
<langsyntaxhighlight Picatlang="picat">main =>
Tests = [["aab", ["a", "aa", "b", "ab", "aab"]],
["abba", ["a", "aa", "b", "ab", "aab"]],
Line 1,344 ⟶ 1,436:
foreach([String,Dict] in Tests)
println([string=String,dict=Dict]),
All = findall(S, S = s3s(Dict,String)),
println(All),
nl
Line 1,356 ⟶ 1,448:
S := S ++ [D]
end,
S.flatten==String.</langsyntaxhighlight>
 
{{out}}
Line 1,385 ⟶ 1,477:
===Non-determinism using member/2 and append/3===
<code>s2/2</code> is more efficient. It uses <code>append/3</code> to extract the found prefix from the string.
<langsyntaxhighlight Picatlang="picat">s2(Dict,String) = S =>
String2 = copy_term(String),
S = [],
Line 1,400 ⟶ 1,492:
S := S ++ [D]
end,
S.flatten==String.</langsyntaxhighlight>
 
===Using recursion===
<code>s3/2</code> use the same idea as <code>s2/2</code> but use recursion. Neater and more efficient.
<langsyntaxhighlight Picatlang="picat">s3(Dict,String) = S =>
s3(Dict,String,S).
 
Line 1,411 ⟶ 1,503:
member(E,Dict),
append(E,String2,String),
s3(Dict,String2,S).</langsyntaxhighlight>
 
===Some tests===
Here is the test engine.
<langsyntaxhighlight Picatlang="picat">main =>
garbage_collect(300_000_000),
_ = random2(),
Line 1,435 ⟶ 1,527:
end,
println(len=All.len),
nl.</langsyntaxhighlight>
 
As can be seen by this summary, <code>s3/2</code> is the most efficient of these versions. <code>s/2</code> is too slow but for the simplest examples.
Line 1,476 ⟶ 1,568:
 
=={{header|PicoLisp}}==
<langsyntaxhighlight PicoLisplang="picolisp">(setq *Dict (quote "a" "bc" "abc" "cd" "b"))
(setq *Dict2
(quote
Line 1,516 ⟶ 1,608:
(println (word "samsungandmango" *Dict2))
(println (word "samsungandmangok" *Dict2))
(println (word "ksamsungandmango" *Dict2))</langsyntaxhighlight>
{{out}}
<pre>
Line 1,538 ⟶ 1,630:
 
{{Works with|Python|3.7}}
<langsyntaxhighlight lang="python">'''Parsing a string for word breaks'''
 
from itertools import (chain)
Line 1,640 ⟶ 1,732:
# MAIN ---
if __name__ == '__main__':
main()</langsyntaxhighlight>
{{Out}}
<pre>abcd:
Line 1,657 ⟶ 1,749:
This returns all the possible splits (and null list if none is possible). Who's to say which is the best?
 
<langsyntaxhighlight lang="racket">#lang racket
 
(define render-phrases pretty-print)
Line 1,688 ⟶ 1,780:
(render-phrases (word-splits "samsungandmango" dict-2))
(render-phrases (word-splits "samsungandmangok" dict-2))
(render-phrases (word-splits "ksamsungandmango" dict-2)))</langsyntaxhighlight>
{{out}}
<pre>'("a b cd")
Line 1,710 ⟶ 1,802:
This implementation does not necessarily find ''every'' combination, it returns the one with the longest matching tokens.
 
<syntaxhighlight lang="raku" perl6line>my @words = <a bc abc cd b>;
my $regex = @words.join('|');
 
put "$_: ", word-break($_) for <abcd abbc abcbcd acdbc abcdd>;
 
sub word-break (Str $word) { ($word ~~ / ^ (<$regex>)+ $ /)[0] // "Not possible" }</langsyntaxhighlight>
{{out}}
<pre>abcd: a b cd
Line 1,725 ⟶ 1,817:
=={{header|REXX}}==
This REXX version allows the words to be tested (and the dictionary words) to be specified on the command line.
<langsyntaxhighlight lang="rexx">/*REXX program breaks up a word (or string) into a list of words from a dictionary.*/
parse arg a '/' x; a=space(a); x=space(x) /*get optional args; elide extra blanks*/
if a=='' | a=="," then a= 'abcd abbc abcbcd acdbc abcdd' /*Not specififed? Use default*/
Line 1,759 ⟶ 1,851:
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
fw: parse arg z,L; do k=L by -1 for L; ?=left(z,k); if @.? then leave; end; return k</langsyntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
Line 1,772 ⟶ 1,864:
 
=={{header|Ring}}==
<langsyntaxhighlight lang="ring">
# Project : Word break problem
 
Line 1,837 ⟶ 1,929:
next
next
</syntaxhighlight>
</lang>
Output:
<pre>
Line 1,844 ⟶ 1,936:
abcbcd = abc b cd
acdbc = a cd bc
</pre>
 
=={{header|Ruby}}==
 
<syntaxhighlight lang="ruby">
def split_text_with_dict(text, dict, splited=[])
solutions = []
dict.each do |word|
if text.start_with? word
new_text = text.delete_prefix word
new_splited = splited.dup<< word
if new_text.empty?
solutions << new_splited
else
sols = split_text_with_dict(new_text, dict, new_splited)
sols.each { |s| solutions << s }
end
end
end
return solutions
end
</syntaxhighlight>
 
{{out}}
<pre>
dict = ["a", "abc", "b", "bc", "cd"]
 
abcd => a b cd
abbc => a b bc
abcbcd
=> a bc b cd
=> abc b cd
acdbc => a cd bc
abcdd => No solution
 
</pre>
 
=={{header|Rust}}==
===Dynamic programming===
<langsyntaxhighlight lang="rust">use std::collections::HashSet;
fn create_string(s: &str, v: Vec<Option<usize>>) -> String {
let mut idx = s.len();
Line 1,893 ⟶ 2,020:
set.insert("b");
println!("{:?}", word_break("abcd", set).unwrap());
}</langsyntaxhighlight>
{{output}}
<pre>"a b cd"</pre>
Line 1,900 ⟶ 2,027:
===First solution===
Finds all possible solutions recursively, using a trie representation of the dictionary:
<langsyntaxhighlight lang="scala">case class TrieNode(isWord: Boolean, children: Map[Char, TrieNode]) {
def add(s: String): TrieNode = s match {
case "" => copy(isWord = true)
Line 1,933 ⟶ 2,060:
def wordBreak(s: String, dict: TrieNode): List[List[String]] = {
wordBreakRec(s, dict, dict, "")
}</langsyntaxhighlight>
Calling it with some example strings:
<langsyntaxhighlight lang="scala">val dict = buildTrie("a", "bc", "abc", "cd", "b")
val testCases = List("abcd", "abbc", "abcbcd", "acdbc", "abcdd")
for (s <- testCases) {
Line 1,943 ⟶ 2,070:
println("\t" + words.mkString(" "))
}
}</langsyntaxhighlight>
{{out}}
<pre>abcd has 1 solution(s):
Line 1,957 ⟶ 2,084:
===Combined solution===
{{Out}}Best seen running in your browser either by [https://scalafiddle.io/sf/49YwsD5/1 ScalaFiddle (ES aka JavaScript, non JVM)] or [https://scastie.scala-lang.org/L47OzgmjSkOnQ5wfrZ5snQ Scastie (remote JVM)].
<langsyntaxhighlight Scalalang="scala">object WordBreak extends App {
val dict = buildTrie("a", "bc", "abc", "cd", "b")
lazy val empty = TrieNode(isWord = false, Map.empty) // lazy or in a companion object
Line 2,006 ⟶ 2,133:
})
 
}</langsyntaxhighlight>
 
=={{header|Seed7}}==
<langsyntaxhighlight lang="seed7">$ include "seed7_05.s7i";
 
const func boolean: wordBreak (in string: stri, in array string: words, in string: resultList) is func
Line 2,042 ⟶ 2,169:
end if;
end for;
end func;</langsyntaxhighlight>
 
{{out}}
Line 2,056 ⟶ 2,183:
=={{header|Sidef}}==
{{trans|zkl}}
<langsyntaxhighlight lang="ruby">func word_break (str, words) {
 
var r = ->(str, arr=[]) {
Line 2,078 ⟶ 2,205:
for str in (strs) {
printf("%11s: %s\n", str, word_break(str, words) \\ '(not possible)')
}</langsyntaxhighlight>
{{out}}
<pre>
Line 2,095 ⟶ 2,222:
{{trans|Rust}}
 
<langsyntaxhighlight lang="swift">infix operator ??= : AssignmentPrecedence
 
@inlinable
Line 2,166 ⟶ 2,293:
print("\(test):")
print(" \(wordBreak(str: test, dict: words) ?? "did not parse with given words")")
}</langsyntaxhighlight>
 
{{out}}
Line 2,183 ⟶ 2,310:
=={{header|Tailspin}}==
Does a depth-first search (after generating all possibilities on the current level). We could stop looking further after one is found, just add a condition to do nothing if a done-flag is set.
<langsyntaxhighlight lang="tailspin">
templates wordBreaks&{dict:}
composer starts&{with:}
Line 2,208 ⟶ 2,335:
'ababab' -> wordBreaks&{dict: ['a', 'ab', 'bab']} -> !OUT::write
'abcbab' -> wordBreaks&{dict: ['a', 'ab', 'bab']} -> !OUT::write
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 2,222 ⟶ 2,349:
=={{header|Visual Basic .NET}}==
{{trans|Java}}
<langsyntaxhighlight lang="vbnet">Module Module1
 
Structure Node
Line 2,294 ⟶ 2,421:
End Sub
 
End Module</langsyntaxhighlight>
{{out}}
<pre>String = aab, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 4
Line 2,327 ⟶ 2,454:
{{trans|Go}}
{{libheader|Wren-fmt}}
<langsyntaxhighlight ecmascriptlang="wren">import "./fmt" for Fmt
 
class Prefix {
Line 2,366 ⟶ 2,493:
System.print("can't break")
}
}</langsyntaxhighlight>
 
{{out}}
Line 2,378 ⟶ 2,505:
 
=={{header|zkl}}==
<langsyntaxhighlight lang="zkl">fcn wordBreak(str,words){ // words is string of space seperated words
words=words.split(" "); // to list of words
r:=fcn(str,words,sink){ // recursive search, easy to collect answer
Line 2,392 ⟶ 2,519:
if(False==r) return("not possible");
r.reverse().concat(" ")
}</langsyntaxhighlight>
<langsyntaxhighlight lang="zkl">foreach text in (T("abcd","abbc","abcbcd","acdbc","abcdd")){
println(text,": ",wordBreak(text,"a bc abc cd b"))
}</langsyntaxhighlight>
{{out}}
<pre>
9,476

edits