Word break problem: Difference between revisions

Content added Content deleted

Inline

Revision as of 06:58, 27 May 2022

Task: Given an input string and a dictionary of words, segment the input string into a space-separated sequence of dictionary words if possible.

Other tasks related to string operations:

Metrics

Counting

Remove/replace

Anagrams/Derangements/shuffling

Find/Search/Determine

Formatting

Song lyrics/poems/Mad Libs/phrases

Tokenize

Sequences

11l

Translation of: D

<lang 11l>T Node

  String val
  [String] parsed
  F (val, [String] parsed = [String]())
     .val = val
     .parsed = copy(parsed)

F word_break(s, dictionary)

  String matches
  V queue = Deque([Node(s)])
  L !queue.empty
     V node = queue.pop_left()
     I node.val.empty
        matches [+]= node.parsed
     E
        L(word) dictionary
           I node.val.starts_with(word)
              V val_new = node.val[word.len .< node.val.len]
              V parsed_new = copy(node.parsed)
              parsed_new [+]= word
              queue [+]= Node(val_new, parsed_new)
  R matches

F process(d, test_strings)

  L(test_string) test_strings
     V matches = word_break(test_string, d)
     print((‘String = ’test_string‘, Dictionary = ’String(d)‘.  Solutions =’)‘ ’matches.len)
     L(match) matches
        print(‘  Word Break = ’match)
     print()

V d = [‘a’, ‘aa’, ‘b’, ‘ab’, ‘aab’] process(d, [‘aab’, ‘aa b’])

d = [‘abc’, ‘a’, ‘ac’, ‘b’, ‘c’, ‘cb’, ‘d’] process(d, [‘abcd’, ‘abbc’, ‘abcbcd’, ‘acdbc’, ‘abcdd’])</lang>

Output:

String = aab, Dictionary = [a, aa, b, ab, aab].  Solutions = 4
  Word Break = [aab]
  Word Break = [a, ab]
  Word Break = [aa, b]
  Word Break = [a, a, b]

String = aa b, Dictionary = [a, aa, b, ab, aab].  Solutions = 0

String = abcd, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 2
  Word Break = [abc, d]
  Word Break = [a, b, c, d]

String = abbc, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 1
  Word Break = [a, b, b, c]

String = abcbcd, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 3
  Word Break = [abc, b, c, d]
  Word Break = [a, b, cb, c, d]
  Word Break = [a, b, c, b, c, d]

String = acdbc, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 2
  Word Break = [ac, d, b, c]
  Word Break = [a, c, d, b, c]

String = abcdd, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 2
  Word Break = [abc, d, d]
  Word Break = [a, b, c, d, d]

Aime

<lang aime>integer wordbreak(record dict, text phrase, integer p, list words) {

   integer complete, n;
   text s;

   complete = 0;
   if (rsk_lower(dict, phrase, s)) {
       if (s == phrase) {
           words.append(s);
           complete = 1;
       } else {
           do {
               n = 0;
               while (phrase[n] == s[n]) {
                   n += 1;
               }
               if (!n) {
                   break;
               }
               words.append(cut(s, 0, n));
               complete = wordbreak(dict, project(phrase, n), p + 1, words);
               if (complete) {
                   break;
               }
               words.delete(-1);
           } while (rsk_less(dict, s, s));
       }
   }

   if (!p) {
       o_(phrase, ":");
       if (complete) {
           words.ucall(o_, 1, " ");
       } else {
           o_(" can't break");
       }
       o_newline();

       words.clear;
   }

   complete;

}

integer main(void) {

   record dict;

   dict.fit("a", 0, "bc", 0, "abc", 0, "cd", 0, "b", 0);
   list("abcd", "abbc", "abcbcd", "acdbc", "abcdd").ucall(wordbreak, 1, dict, 0, list());

   return 0;

}</lang>

Output:

abcd: a b cd
abbc: a b bc
abcbcd: abc b cd
acdbc: a cd bc
abcdd: can't break

C++

Translation of: Rust

<lang cpp>#include <algorithm>

include <iostream>
include <optional>
include <set>
include <string>
include <string_view>
include <vector>

struct string_comparator {

   using is_transparent = void;
   bool operator()(const std::string& lhs, const std::string& rhs) const {
       return lhs < rhs;
   }
   bool operator()(const std::string& lhs, const std::string_view& rhs) const {
       return lhs < rhs;
   }
   bool operator()(const std::string_view& lhs, const std::string& rhs) const {
       return lhs < rhs;
   }

};

using dictionary = std::set<std::string, string_comparator>;

template <typename iterator, typename separator> std::string join(iterator begin, iterator end, separator sep) {

   std::string result;
   if (begin != end) {
       result += *begin++;
       for (; begin != end; ++begin) {
           result += sep;
           result += *begin;
       }
   }
   return result;

}

auto create_string(const std::string_view& s,

                  const std::vector<std::optional<size_t>>& v) {
   auto idx = s.size();
   std::vector<std::string_view> sv;
   while (v[idx].has_value()) {
       size_t prev = v[idx].value();
       sv.push_back(s.substr(prev, idx - prev));
       idx = prev;
   }
   std::reverse(sv.begin(), sv.end());
   return join(sv.begin(), sv.end(), ' ');

}

std::optional<std::string> word_break(const std::string_view& str,

                                     const dictionary& dict) {
   auto size = str.size() + 1;
   std::vector<std::optional<size_t>> possible(size);
   auto check_word = [&dict, &str](size_t i, size_t j)
           -> std::optional<size_t> {
       if (dict.find(str.substr(i, j - i)) != dict.end())
           return i;
       return std::nullopt;
   };
   for (size_t i = 1; i < size; ++i) {
       if (!possible[i].has_value())
           possible[i] = check_word(0, i);
       if (possible[i].has_value()) {
           for (size_t j = i + 1; j < size; ++j) {
               if (!possible[j].has_value())
                   possible[j] = check_word(i, j);
           }
           if (possible[str.size()].has_value())
               return create_string(str, possible);
       }
   }
   return std::nullopt;

}

int main(int argc, char** argv) {

   dictionary dict;
   dict.insert("a");
   dict.insert("bc");
   dict.insert("abc");
   dict.insert("cd");
   dict.insert("b");
   auto result = word_break("abcd", dict);
   if (result.has_value())
       std::cout << result.value() << '\n';
   return 0;

} </lang>

Output:

a b cd

D

Translation of: Java

<lang d>import std.algorithm; import std.range; import std.stdio;

void main() {

   string[] dict = ["a", "aa", "b", "ab", "aab"];
   process(dict, ["aab", "aa b"]);

   dict = ["abc", "a", "ac", "b", "c", "cb", "d"];
   process(dict, ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"]);

}

void process(string[] dict, string[] testStrings) {

   foreach (testString; testStrings) {
       auto matches = wordBreak(testString, dict);
       writeln("String = ", testString, ", Dictionary = ", dict, ".  Solutions = ", matches.length);
       foreach (match; matches) {
           writeln("  Word Break = ", match);
       }
       writeln();
   }

}

string[][] wordBreak(string s, string[] dictionary) {

   string[][] matches;
   Node[] queue = [Node(s)];
   while (!queue.empty) {
       auto node = queue.front;
       queue.popFront;
       // Check if fully parsed
       if (node.val.length == 0) {
           matches ~= node.parsed;
       } else {
           foreach (word; dictionary) {
               // Check for match
               if (node.val.startsWith(word)) {
                   auto valNew = node.val[word.length .. node.val.length];
                   auto parsedNew = node.parsed.dup;
                   parsedNew ~= word;
                   queue ~= Node(valNew, parsedNew);
               }
           }
       }
   }
   return matches;

}

struct Node {

   string val;
   string[] parsed;

   this(string initial) {
       val = initial;
   }

   this(string s, string[] p) {
       val = s;
       parsed = p;
   }

}</lang>

Output:

String = aab, Dictionary = ["a", "aa", "b", "ab", "aab"].  Solutions = 4
  Word Break = ["aab"]
  Word Break = ["a", "ab"]
  Word Break = ["aa", "b"]
  Word Break = ["a", "a", "b"]

String = aa b, Dictionary = ["a", "aa", "b", "ab", "aab"].  Solutions = 0

String = abcd, Dictionary = ["abc", "a", "ac", "b", "c", "cb", "d"].  Solutions = 2
  Word Break = ["abc", "d"]
  Word Break = ["a", "b", "c", "d"]

String = abbc, Dictionary = ["abc", "a", "ac", "b", "c", "cb", "d"].  Solutions = 1
  Word Break = ["a", "b", "b", "c"]

String = abcbcd, Dictionary = ["abc", "a", "ac", "b", "c", "cb", "d"].  Solutions = 3
  Word Break = ["abc", "b", "c", "d"]
  Word Break = ["a", "b", "cb", "c", "d"]
  Word Break = ["a", "b", "c", "b", "c", "d"]

String = acdbc, Dictionary = ["abc", "a", "ac", "b", "c", "cb", "d"].  Solutions = 2
  Word Break = ["ac", "d", "b", "c"]
  Word Break = ["a", "c", "d", "b", "c"]

String = abcdd, Dictionary = ["abc", "a", "ac", "b", "c", "cb", "d"].  Solutions = 2
  Word Break = ["abc", "d", "d"]
  Word Break = ["a", "b", "c", "d", "d"]

Go

<lang go>package main

import (

   "fmt"
   "strings"

)

type dict map[string]bool

func newDict(words ...string) dict {

   d := dict{}
   for _, w := range words {
       d[w] = true
   }
   return d

}

func (d dict) wordBreak(s string) (broken []string, ok bool) {

   if s == "" {
       return nil, true
   }
   type prefix struct {
       length int
       broken []string
   }
   bp := []prefixTemplate:0, nil
   for end := 1; end <= len(s); end++ {
       for i := len(bp) - 1; i >= 0; i-- {
           w := s[bp[i].length:end]
           if d[w] {
               b := append(bp[i].broken, w)
               if end == len(s) {
                   return b, true
               }
               bp = append(bp, prefix{end, b})
               break
           }
       }
   }
   return nil, false

}

func main() {

   d := newDict("a", "bc", "abc", "cd", "b")
   for _, s := range []string{"abcd", "abbc", "abcbcd", "acdbc", "abcdd"} {
       if b, ok := d.wordBreak(s); ok {
           fmt.Printf("%s: %s\n", s, strings.Join(b, " "))
       } else {
           fmt.Println("can't break")
       }
   }

}</lang>

Output:

abcd: a b cd
abbc: a b bc
abcbcd: a bc b cd
acdbc: a cd bc
can't break

Haskell

Translation of: Javascript

<lang haskell>import Data.List (isPrefixOf, intercalate) import Data.Tree (Tree(..))

wordBreaks :: [String] -> String -> String wordBreaks ws = (++) <*> (":\n" ++) . report . fmap go . tokenTrees ws

 where
   go t
     | null (subForest t) = [rootLabel t]
     | otherwise = subForest t >>= ((:) (rootLabel t) . go)
   report xs
     | null xs = "\tNot parseable with these words"
     | otherwise = unlines $ ('\t' :) . intercalate " -> " <$> xs

tokenTrees :: [String] -> String -> [Tree String] tokenTrees ws = go

 where
   go s
     | s `elem` ws = [Node s []]
     | otherwise = ws >>= next s
   next s w
     | w `isPrefixOf` s = parse w (go (drop (length w) s))
     | otherwise = []
   parse w xs
     | null xs = []
     | otherwise = [Node w xs]

TEST ---------------------------

ws, texts :: [String] ws = words "a bc abc cd b"

texts = words "abcd abbc abcbcd acdbc abcdd"

main :: IO () main = (putStrLn . unlines) $ wordBreaks ws <$> texts</lang>

Output:

abcd:
    a -> b -> cd

abbc:
    a -> b -> bc

abcbcd:
    a -> bc -> b -> cd
    abc -> b -> cd

acdbc:
    a -> cd -> bc

abcdd:
    Not parseable with these words

J

With such short sentences we can find the partition sets, then check that all are words.

<lang J> all_partitions=: <@(<;.1)"1 _~ (1,.[:#:[:i.2^<:@:#) NB. all_partitions 'abcd' word_break=: ([ #~ 0 = [: #&>@:] -.L:_1 _)~ all_partitions main=: (] , (;:inv L:_1@:word_break >))"_ 0 boxopen </lang>

   NB. demonstrate partitions of four integers
   all_partitions i. 4
┌─────────┬─────────┬─────────┬─────────┬─────────┬─────────┬─────────┬─────────┐
│┌───────┐│┌─────┬─┐│┌───┬───┐│┌───┬─┬─┐│┌─┬─────┐│┌─┬───┬─┐│┌─┬─┬───┐│┌─┬─┬─┬─┐│
││0 1 2 3│││0 1 2│3│││0 1│2 3│││0 1│2│3│││0│1 2 3│││0│1 2│3│││0│1│2 3│││0│1│2│3││
│└───────┘│└─────┴─┘│└───┴───┘│└───┴─┴─┘│└─┴─────┘│└─┴───┴─┘│└─┴─┴───┘│└─┴─┴─┴─┘│
└─────────┴─────────┴─────────┴─────────┴─────────┴─────────┴─────────┴─────────┘


   NB. demonstrate word_break
   NB.   (,L:_1]0;1 2;0 1 2;2 3;1)
   NB. ┌─┬───┬─────┬───┬─┐
   NB. │0│1 2│0 1 2│2 3│1│
   NB. └─┴───┴─────┴───┴─┘
   (,L:_1]0;1 2;0 1 2;2 3;1) word_break i. 4
┌─────────┐
│┌─┬─┬───┐│
││0│1│2 3││
│└─┴─┴───┘│
└─────────┘


   NB. save and display the dictionary
   [dictionary=: ;: 'a bc abc cd b'
┌─┬──┬───┬──┬─┐
│a│bc│abc│cd│b│
└─┴──┴───┴──┴─┘

   NB. demonstrate main
   dictionary main 'abc'
┌───┬───┬────┐
│abc│abc│a bc│
└───┴───┴────┘

   NB. solution
   dictionary main ;: 'abcd abbc abcbcd acdbc abcdd'
┌──────┬────────┬─────────┐
│abcd  │a b cd  │         │
├──────┼────────┼─────────┤
│abbc  │a b bc  │         │
├──────┼────────┼─────────┤
│abcbcd│abc b cd│a bc b cd│
├──────┼────────┼─────────┤
│acdbc │a cd bc │         │
├──────┼────────┼─────────┤
│abcdd │        │         │
└──────┴────────┴─────────┘

Java

Accept string to be parsed, and dictionary of words, as method arguments. <lang Java> import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; import java.util.Queue;

public class WordBreak {

   public static void main(String[] args) {
       List<String> dict = Arrays.asList("a", "aa", "b", "ab", "aab");
       for ( String testString : Arrays.asList("aab", "aa b") ) {
           List<List<String>> matches = wordBreak(testString, dict);
           System.out.printf("String = %s, Dictionary = %s.  Solutions = %d:%n", testString, dict, matches.size());
           for ( List<String> match : matches ) {
               System.out.printf(" Word Break = %s%n", match);
           }
           System.out.printf("%n");
       }
       dict = Arrays.asList("abc", "a", "ac", "b", "c", "cb", "d");
       for ( String testString : Arrays.asList("abcd", "abbc", "abcbcd", "acdbc", "abcdd") ) {
           List<List<String>> matches = wordBreak(testString, dict);
           System.out.printf("String = %s, Dictionary = %s.  Solutions = %d:%n", testString, dict, matches.size());
           for ( List<String> match : matches ) {
               System.out.printf(" Word Break = %s%n", match);
           }
           System.out.printf("%n");
       }
   }
   
   private static List<List<String>> wordBreak(String s, List<String> dictionary) {
       List<List<String>> matches = new ArrayList<>();
       Queue<Node> queue = new LinkedList<>();
       queue.add(new Node(s));
       while ( ! queue.isEmpty() ) {
           Node node = queue.remove();
           //  Check if fully parsed
           if ( node.val.length() == 0 ) {
               matches.add(node.parsed);
           }
           else {
               for ( String word : dictionary ) {
                   //  Check for match
                   if ( node.val.startsWith(word) ) {
                       String valNew = node.val.substring(word.length(), node.val.length());
                       List<String> parsedNew = new ArrayList<>();
                       parsedNew.addAll(node.parsed);
                       parsedNew.add(word);
                       queue.add(new Node(valNew, parsedNew));
                   }
               }
           }
       }
       return matches;
   }
   
   private static class Node {
       private String val;  //  Current unmatched string
       private List<String> parsed;  //  Matches in dictionary
       public Node(String initial) {
           val = initial;
           parsed = new ArrayList<>();
       }
       public Node(String s, List<String> p) {
           val = s;
           parsed = p;
       }
   }

} </lang>

Output:

String = aab, Dictionary = [a, aa, b, ab, aab].  Solutions = 4:
 Word Break = [aab]
 Word Break = [a, ab]
 Word Break = [aa, b]
 Word Break = [a, a, b]

String = aa b, Dictionary = [a, aa, b, ab, aab].  Solutions = 0:

String = abcd, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 2:
 Word Break = [abc, d]
 Word Break = [a, b, c, d]

String = abbc, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 1:
 Word Break = [a, b, b, c]

String = abcbcd, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 3:
 Word Break = [abc, b, c, d]
 Word Break = [a, b, cb, c, d]
 Word Break = [a, b, c, b, c, d]

String = acdbc, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 2:
 Word Break = [ac, d, b, c]
 Word Break = [a, c, d, b, c]

String = abcdd, Dictionary = [abc, a, ac, b, c, cb, d].  Solutions = 2:
 Word Break = [abc, d, d]
 Word Break = [a, b, c, d, d]

JavaScript

Composing a solution from generic functions.

Translation of: Python

<lang javascript>(() => {

   'use strict';

   const main = () => {
       const
           wds = words('a bc abc cd b'),
           texts = words('abcd abbc abcbcd acdbc abcdd');

       return unlines(
           map(wordBreaks(wds),
               texts
           )
       );
   };

   // WORD BREAKS ----------------------------------------

   // tokenTrees :: [String] -> String -> [Tree String]
   const tokenTrees = (wds, s) => {
       const go = s =>
           wds.includes(s) ? (
               [Node(s, [])]
           ) : bindList(wds, next(s));
       const next = s => w =>
           s.startsWith(w) ? (
               parse(w, go(s.slice(w.length)))
           ) : [];
       const parse = (w, xs) =>
           0 < xs.length ? [Node(w, xs)] : xs;
       return go(s);
   };

   // wordBreaks :: [String] -> String -> String
   const wordBreaks = wds => s => {
       const
           // go :: Tree a -> [a]
           go = t => isNull(t.nest) ? [
               t.root
           ] : bindList(
               t.nest,
               compose(cons(t.root), go),
           ),
           parses = map(go, tokenTrees(wds, s));
       return `${s}:\n` + (
           0 < parses.length ? unlines(
               map(x => '\t' + intercalateS(' -> ', x),
                   parses
               )
           ) : '\t(Not parseable with these words)'
       );
   };

   // GENERIC FUNCTIONS ----------------------------------

   // Node :: a -> [Tree a] -> Tree a
   const Node = (v, xs) => ({
       type: 'Node',
       root: v,
       nest: xs || []
   });

   // bindList (>>=) :: [a] -> (a -> [b]) -> [b]
   const bindList = (xs, mf) => [].concat.apply([], xs.map(mf));

   // compose (<<<) :: (b -> c) -> (a -> b) -> a -> c
   const compose = (f, g) => x => f(g(x));

   // cons :: a -> [a] -> [a]
   const cons = x => xs => [x].concat(xs);

   // intercalateS :: String -> [String] -> String
   const intercalateS = (s, xs) =>
       xs.join(s);

   // isNull :: [a] -> Bool
   // isNull :: String -> Bool
   const isNull = xs =>
       Array.isArray(xs) || ('string' === typeof xs) ? (
           1 > xs.length
       ) : undefined;

   // isPrefixOf takes two lists or strings and returns
   // true iff the first is a prefix of the second.

   // isPrefixOf :: [a] -> [a] -> Bool
   // isPrefixOf :: String -> String -> Bool
   const isPrefixOf = (xs, ys) => {
       const pfx = (xs, ys) => {
           const intX = xs.length;
           return 0 < intX ? (
               ys.length >= intX ? xs[0] === ys[0] && pfx(
                   xs.slice(1), ys.slice(1)
               ) : false
           ) : true;
       };
       return 'string' !== typeof xs ? (
           pfx(xs, ys)
       ) : ys.startsWith(xs);
   };

   // map :: (a -> b) -> [a] -> [b]
   const map = (f, xs) => xs.map(f);

   // unlines :: [String] -> String
   const unlines = xs => xs.join('\n');

   // words :: String -> [String]
   const words = s => s.split(/\s+/);

   // MAIN ---
   return main();

})();</lang>

Output:

abcd:
    a -> b -> cd
abbc:
    a -> b -> bc
abcbcd:
    a -> bc -> b -> cd
    abc -> b -> cd
acdbc:
    a -> cd -> bc
abcdd:
    (Not parseable with these words)

jq

Works with: jq

Works with gojq, the Go implementation of jq

The solution offered here does not use regular expressions.

The function `string2words` is a generator that can produce all possible parses of a string but which can be stopped after finding the first parse, as in the first demonstration.

In the second demonstration, the generator is used to count the number of possible parses using the same toy dictionary as used in the first demonstration.

In the third demonstration, the well-known dictionary unixdict.txt is used to determine the number of possible parses of the string "totalreturn".

<lang jq> def words: ["a", "bc", "abc", "cd", "b"]; def strings: ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"];

input: an array of allowed words
output: a stream giving all possible parses of the given string into
the allowed words; each output is an array showing how the string
has been parsed.

def string2words(string):

  . as $dict
  # Input: array of words
  # Output: augmented array
  | def s2w(s):
      if s=="" then .
      else $dict[] as $word
      | (s|startswith($word)) as $ix
      | if $ix
        then s[$word|length:] as $rest
        | (. + [$word]) | s2w($rest)

else empty end

      end;
  [] | s2w(string);

def count(s): reduce s as $x (0; .+1) ;

def demo1:

 strings[] as $s
 | words
 | (first(string2words($s)) // []) as $parsed
 | "\($s) => \($parsed|join(" "))" ;

def demo2:

 strings[] as $s
 | words
 | count(string2words($s)) 
 | "\($s) has \(.) parse\(if . == 1 then "" else "s" end)." ;

demo3 assumes an invocation along the lines of:
jq -Rrn -f program.jq unixdict.txt

def demo3:

 "returntotal" as $s
 | "\($s) has \([inputs] |  count(string2words($s)) parses."

demo1, " ", demo2, "", demo3

</lang>

Output:

abcd => a b cd
abbc => a b bc
abcbcd => a bc b cd
acdbc => a cd bc
abcdd => 

abcd has 1 parse.
abbc has 1 parse.
abcbcd has 2 parses.
acdbc has 1 parse.
abcdd has 0 parses.

"returntotal" has 99 parses.

Julia

Some extra loops to record and print all solutions.<lang Julia> words = ["a", "bc", "abc", "cd", "b"] strings = ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"]

subregex = join(words, ")|(") regexes = ["\^$\($subregex$\)\{$i}\$" for i in 6:-1:1]

function wordbreak()

   for s in strings
       solutions = []
       for regex in regexes
           rmat = match(Regex(regex), s)
           if rmat != nothing
               push!(solutions, ["$w" for w in Set(rmat.captures) if w != nothing])
           end
       end
       if length(solutions) > 0
           println("$(length(solutions)) Solution(s) for $s:")
           for sol in solutions
               println("   Solution: $(sol)")
           end
       else
           println("No solutions for $s : No fitting matches found.")
       end
   end

end

wordbreak()</lang>

Output:


1 Solution(s) for abcd:

  Solution: SubString{String}["cd", "b", "a"]

1 Solution(s) for abbc:

  Solution: SubString{String}["b", "a", "bc"]

2 Solution(s) for abcbcd:

  Solution: SubString{String}["cd", "b", "a", "bc"]
  Solution: SubString{String}["cd", "abc", "b"]

1 Solution(s) for acdbc:

  Solution: SubString{String}["cd", "a", "bc"]

No solutions for abcdd : No fitting matches found.

Kotlin

I've downloaded the free dictionary at http://www.puzzlers.org/pub/wordlists/unixdict.txt for this task. All single letters from 'a' to 'z' are considered to be words by this dictionary but 'bc' and 'cd' which I'd have expected to be present are not. <lang scala>// version 1.1.3

import java.io.File

val partitions = mutableListOf<List<String>>()

fun partitionString(s: String, ml: MutableList<String>, level: Int) {

   for (i in s.length - 1 downTo 1) {
       val part1 = s.substring(0, i)
       val part2 = s.substring(i)
       ml.add(part1)
       ml.add(part2)
       partitions.add(ml.toList())
       if (part2.length > 1) {
           ml.removeAt(ml.lastIndex)
           partitionString(part2, ml, level + 1)
       }
       while (ml.size > level) ml.removeAt(ml.lastIndex)
   }

}

fun main(args: Array<String>) {

   val words = File("unixdict.txt").readLines()
   val strings = listOf("abcd", "abbc", "abcbcd", "acdbc", "abcdd")
   for (s in strings) {
       partitions.clear()
       partitions.add(listOf(s))
       val ml = mutableListOf<String>()
       partitionString(s, ml, 0)
       val solutions = mutableListOf<List<String>>()
       for (partition in partitions) {
           var allInDict = true
           for (item in partition) {
               if (words.indexOf(item) == -1) {
                   allInDict = false
                   break
               }
           }
           if (allInDict) solutions.add(partition)
       }
       val plural = if (solutions.size == 1) "" else "s"
       println("$s: ${solutions.size} solution$plural")
       for (solution in solutions) {
           println("    ${solution.joinToString(" ")}")
       }
       println() 
   }

}</lang>

Output:

abcd: 2 solutions
    abc d
    a b c d

abbc: 1 solution
    a b b c

abcbcd: 3 solutions
    abc b c d
    a b cb c d
    a b c b c d

acdbc: 2 solutions
    ac d b c
    a c d b c

abcdd: 2 solutions
    abc d d
    a b c d d

Lua

<lang lua>-- a specialized dict format is used to minimize the -- possible candidates for this particalur problem function genDict(ws)

 local d,dup,head,rest = {},{}
 for w in ws:gmatch"%w+" do
   local lw = w:lower()
   if not dup[lw] then
     dup[lw], head,rest = true, lw:match"^(%w)(.-)$"
     d[head] = d[head] or {n=-1}
     local len = #rest
     d[head][len] = d[head][len] or {}
     d[head][len][rest] = true
     if len>d[head].n then 
       d[head].n = len 
     end
   end
 end
 return d

end

-- sample default dict local defWords = "a;bc;abc;cd;b" local defDict = genDict(defWords)

function wordbreak(w, dict)

 if type(w)~='string' or w:len()==0 then 
   return nil,'emprty or not a string'
 end
 
 dict = type(dict)=='string' and genDict(dict) or dict or defDict
 
 local r, len = {}, #w
 
 -- backtracking
 local function try(i)
   if i>len then return true end
   local head = w:sub(i,i):lower()
   local d = dict[head]
   if not d then return end
   for j=math.min(d.n, len-i),0,-1 do -- prefer longer first
     if d[j] then
       local rest = w:sub(i+1,i+j):lower()
       if d[j][rest] then
         r[1+#r] = w:sub(i,i+j)
         if try(i+j+1) then 
           return true 
         else 
           r[#r]=nil 
         end
       end            
     end
   end        
 end
 
 if try(1) then 
   return table.unpack(r) 
 else 
   return nil,'-no solution-'
 end

end

-- test local test = {'abcd','abbc','abcbcd','acdbc','abcdd' } for i=1,#test do

 print(test[i],wordbreak(test[i]))

end</lang>

Output:

abcd	a	b	cd
abbc	a	b	bc
abcbcd	abc	b	cd
acdbc	a	cd	bc
abcdd	nil	-no solution-

Nim

We provide two ways to initialize the dictionary: either explicitly by providing the list of words or by providing a file name from which extract the words. In the second case, it is possible to specify the minimal length of the words to keep. This is useful mainly to eliminate all the one letter words we get with “unixdict.txt”.

<lang Nim>import sequtils, sets, strutils

type

 Dict = HashSet[string]
 WordSeq = seq[string]

proc initDict(words: openArray[string]): Dict =

 ## Initialize a dictionary from a list of words.
 words.toHashSet

proc initDict(fileName: string; minlength = 0): Dict =

 ## Initialize a dictionary with words from a file.
 ## Only words with minimal length are retained.
 for word in filename.lines:
   if word.len >= minLength:
     result.incl word

func wordBreaks(dict: Dict; word: string): seq[WordSeq] =

 ## Build recursively the list of breaks for a word, using the given dictionary.
 for last in 0..<word.high:
   let part1 = word[0..last]
   if part1 in dict:
     let part2 = word[last+1..^1]
     if part2 in dict: result.add(@[part1, part2])
     result.add dict.wordBreaks(part2).mapIt(part1 & it)

proc breakWord(dict: Dict; word: string) =

 ## Find the ways to break a word and display the result.
 echo word, ": "
 let wordSeqs = dict.wordBreaks(word)
 if wordSeqs.len == 0:
   echo "    <no break possible>"
 else:
   for wordSeq in wordSeqs:
     echo "    ", wordSeq.join(" ")

when isMainModule:

 const EDict = ["a", "bc", "abc", "cd", "b"]
 echo "Using explicit dictionary: ", EDict
 var dict = initDict(EDict)
 for s in ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"]:
   dict.breakWord(s)

 echo("\nUsing “unixdict.txt” dictionary without single letter words.")
 dict = initDict("unixdict.txt", 2)
 dict.breakWord("because")
 dict.breakWord("software")</lang>

Output:

We used the same dictionary and words than in the other languages solutions and also added two examples using “unixdict.txt”.

Using explicit dictionary: ["a", "bc", "abc", "cd", "b"]
abcd: 
    a b cd
abbc: 
    a b bc
abcbcd: 
    a bc b cd
    abc b cd
acdbc: 
    a cd bc
abcdd: 
    <no break possible>

Using “unixdict.txt” dictionary without single letter words.
because: 
    be cause
    be ca use
software: 
    so ft ware
    so ft wa re
    soft ware
    soft wa re

Perl

<lang perl>use strict; use warnings;

my @words = <a o is pi ion par per sip miss able>; print "$_: " . word_break($_,@words) . "\n" for <a aa amiss parable opera operable inoperable permission mississippi>;

sub word_break {

   my($word,@dictionary) = @_;
   my @matches;
   my $one_of = join '|', @dictionary;
   @matches = $word =~ /^ ($one_of) ($one_of)? ($one_of)? ($one_of)? $/x; # sub-optimal: limited number of matches
   return join(' ', grep {$_} @matches) || "(not possible)";

}</lang>

Output:

a: a
aa: a a
ado: ad o
amiss: a miss
admission: ad miss ion
parable: par able
opera: o per a
operable: o per able
inoperable: in o per able
permission: per miss ion
permissible: Not possible
mississippi: miss is sip pi

Phix

The distributed version also contains a few experiments with applying a real dictionary, largely unsuccessful.

--
-- demo\rosetta\Word_break_problem.exw
-- ===================================
--
with javascript_semantics
procedure populate_dict(sequence s)
    for i=1 to length(s) do setd(s[i],0) end for
end procedure
 
populate_dict(split("a bc abc cd b"))
 
function prrec(sequence wordstarts, integer idx, sequence sofar, bool show)
    if idx>length(wordstarts) then
        if show then
            ?sofar
        end if
        return 1
    end if
    integer res = 0
    for i=1 to length(wordstarts[idx]) do
        string w = wordstarts[idx][i]
        res += prrec(wordstarts,idx+length(w),append(deep_copy(sofar),w),show)
    end for
    return res
end function
 
function flattens(sequence s)
-- remove all nesting and empty sequences from a nested sequence of strings
    sequence res = {}, si
    for i=1 to length(s) do
        si = s[i]
        if string(si) then
            res = append(res,si)
        else
            res &= flattens(si)
        end if
    end for
    return res
end function
 
procedure test(string s)
    integer l = length(s),
            wordend = 1 -- (pretend a word just ended at start)
    sequence wordstarts = repeat({},l), 
             wordends = repeat(0,l)
    for i=1 to l do
        if wordend then
            for j=i to l do
                object pkey = getd_partial_key(s[i..j])
                if string(pkey) and length(pkey)>j-i and s[i..j]=pkey[1..j-i+1] then
                    if length(pkey)=j-i+1 then
                        -- exact match
                        wordstarts[i] = append(wordstarts[i],pkey)
                        wordends[j] += 1
                    end if
                else
                    exit
                end if  
            end for
        end if
        wordend = wordends[i]
    end for
    bool worthwhile = true
    while worthwhile do
        worthwhile = false
        wordend = 1 -- (pretend a word just ended at start)
        for i=1 to l do
            if wordend then
                -- eliminate any words that end before a wordstarts of {}.
                for j=length(wordstarts[i]) to 1 by -1 do
                    integer wl = length(wordstarts[i][j])
                    if i+wl<=l and wordstarts[i+wl]={} then
                        integer wedx = i+wl-1
                        wordends[wedx] -= 1
                        wordstarts[i][j..j] = {}
                        worthwhile = true
                    end if
                end for
            else
                -- elimitate all words that start here.
                for j=1 to length(wordstarts[i]) do
                    integer wl = length(wordstarts[i][j])
                    if i+wl<=l then
                        integer wedx = i+wl-1
                        wordends[wedx] -= 1
                        worthwhile = true
                    end if
                end for
                wordstarts[i] = {}
            end if
            wordend = wordends[i]
        end for
    end while
    if sum(wordends)=0 then
        printf(1,"%s: not possible\n",{s})
    else
        integer count = prrec(wordstarts,1,{},false)
        if count=1 then
            printf(1,"%s: 1 solution: %s\n",{s,join(flattens(wordstarts))})
        elsif count>20 then
            printf(1,"%s: %d solution(s): (too many to show)\n",{s,count})
            pp({wordstarts,wordends})
        else
            printf(1,"%s: %d solution(s):\n",{s,count})
            count = prrec(wordstarts,1,{},true)
        end if
    end if
end procedure
 
papply({"abcd","abbc","abcbcd","acdbc","abcdd"},test)

{} = wait_key()

Output:

{"a","bc","abc","cd","b"}
abcd: 1 solution: a b cd
abbc: 1 solution: a b bc
abcbcd: 2 solution(s):
{"a","bc","b","cd"}
{"abc","b","cd"}
acdbc: 1 solution: a cd bc
abcdd: not possible

Picat

Non-determinism using member/2

s/2 checks all possible combinations. It uses member/2 to select some element from Dict and backtracks if no solution is found. <lang Picat>main =>

 Tests = [["aab", ["a", "aa", "b", "ab", "aab"]],
          ["abba", ["a", "aa", "b", "ab", "aab"]],
          ["aa b", ["a", "aa", "b", "ab", "aab"]],
          ["abcd", ["abc", "a", "ac", "b", "c", "cb", "d"]],
          ["abbc", ["abc", "a", "ac", "b", "c", "cb", "d"]],
          ["abcbcd", ["abc", "a", "ac", "b", "c", "cb", "d"]],
          ["acdbc", ["abc", "a", "ac", "b", "c", "cb", "d"]],
          ["abcdd", ["abc", "a", "ac", "b", "c", "cb", "d"]]
          ],
 foreach([String,Dict] in Tests)
   println([string=String,dict=Dict]),
   All = findall(S, S = s(Dict,String)),
   println(All),
   nl
 end,
 nl.

s(Dict,String) = S =>

 S = [],
 while(S.flatten != String, S.flatten.len < String.len)
   member(D,Dict), % pick some element in Dict
   S := S ++ [D]
 end,
 S.flatten==String.</lang>

Output:

[string = aab,dict = [a,aa,b,ab,aab]]
[[a,a,b],[a,ab],[aa,b],[aab]]

[string = abba,dict = [a,aa,b,ab,aab]]
[[a,b,b,a],[ab,b,a]]

[string = aa b,dict = [a,aa,b,ab,aab]]
[]

[string = abcd,dict = [abc,a,ac,b,c,cb,d]]
[[abc,d],[a,b,c,d]]

[string = abbc,dict = [abc,a,ac,b,c,cb,d]]
[[a,b,b,c]]

[string = abcbcd,dict = [abc,a,ac,b,c,cb,d]]
[[abc,b,c,d],[a,b,c,b,c,d],[a,b,cb,c,d]]

[string = acdbc,dict = [abc,a,ac,b,c,cb,d]]
[[a,c,d,b,c],[ac,d,b,c]]

[string = abcdd,dict = [abc,a,ac,b,c,cb,d]]
[[abc,d,d],[a,b,c,d,d]]

Non-determinism using member/2 and append/3

s2/2 is more efficient. It uses append/3 to extract the found prefix from the string. <lang Picat>s2(Dict,String) = S =>

 String2 = copy_term(String),
 S = [],
 while(S.flatten != String, S.flatten.len < String.len)
   % Pick an element from Dict
   member(D,Dict),
   
   % Check that it fits and remove it from the string.
   % If not: backtrack
   append(D,String3,String2),
   
   % ok!
   String2 := String3,
   S := S ++ [D]
 end,
 S.flatten==String.</lang>

Using recursion

s3/2 use the same idea as s2/2 but use recursion. Neater and more efficient. <lang Picat>s3(Dict,String) = S =>

 s3(Dict,String,S).

s3(_Dict,[],[]). s3(Dict,String,[E|S]) :-

  member(E,Dict),
  append(E,String2,String),
  s3(Dict,String2,S).</lang>

Some tests

Here is the test engine. <lang Picat>main =>

 garbage_collect(300_000_000),
 _ = random2(),
 Chars = "ab",
 Dict = ["a", "aa", "b", "ab", "aab"],  
 println(dict=Dict),
 String = [Chars[random(1,Chars.len)] : _ in 1..11],
 % The tested strings
 % String := "abbaabba",
 % String := "babbbbaabbababb",
 % String := "babbbbaabbababbaaaaababbaaabbb",
 % String := "babbbbaabbababbaaaaababbaaabbbbaaabbbaba",
 % String := "aabababbbaaabaababaaabababaaabbabbaabbba",
 println(string=String),
 All = findall(S, S = s3(Dict,String)),
 Len = All.len,
 if Len < 100 then
   println(All)
 end,
 println(len=All.len),
 nl.</lang>

As can be seen by this summary, s3/2 is the most efficient of these versions. s/2 is too slow but for the simplest examples.

Output:

dict = [a,aa,b,ab,aab]
string = abbaabba
[[a,b,b,a,a,b,b,a],[a,b,b,a,ab,b,a],[a,b,b,aa,b,b,a],[a,b,b,aab,b,a],[ab,b,a,a,b,b,a],[ab,b,a,ab,b,a],[ab,b,aa,b,b,a],[ab,b,aab,b,a]]
len = 8

s: 0.012s
s2: 0.0s
s3: 0.0

Dict = ["a", "aa", "b", "ab", "aab"],  
string = babbbbaabbababb
[[b,a,b,b,b,b,a,a,b,b,a,b,a,b,b],[b,a,b,b,b,b,a,a,b,b,a,b,ab,b],[b,a,b,b,b,b,a,a,b,b,ab,a,b,b],[b,a,b,b,b,b,a,a,b,b,ab,ab,b],[b,a,b,b,b,b,a,ab,b,a,b,a,b,b],[b,a,b,b,b,b,a,ab,b,a,b,ab,b],[b,a,b,b,b,b,a,ab,b,ab,a,b,b],[b,a,b,b,b,b,a,ab,b,ab,ab,b],[b,a,b,b,b,b,aa,b,b,a,b,a,b,b],[b,a,b,b,b,b,aa,b,b,a,b,ab,b],[b,a,b,b,b,b,aa,b,b,ab,a,b,b],[b,a,b,b,b,b,aa,b,b,ab,ab,b],[b,a,b,b,b,b,aab,b,a,b,a,b,b],[b,a,b,b,b,b,aab,b,a,b,ab,b],[b,a,b,b,b,b,aab,b,ab,a,b,b],[b,a,b,b,b,b,aab,b,ab,ab,b],[b,ab,b,b,b,a,a,b,b,a,b,a,b,b],[b,ab,b,b,b,a,a,b,b,a,b,ab,b],[b,ab,b,b,b,a,a,b,b,ab,a,b,b],[b,ab,b,b,b,a,a,b,b,ab,ab,b],[b,ab,b,b,b,a,ab,b,a,b,a,b,b],[b,ab,b,b,b,a,ab,b,a,b,ab,b],[b,ab,b,b,b,a,ab,b,ab,a,b,b],[b,ab,b,b,b,a,ab,b,ab,ab,b],[b,ab,b,b,b,aa,b,b,a,b,a,b,b],[b,ab,b,b,b,aa,b,b,a,b,ab,b],[b,ab,b,b,b,aa,b,b,ab,a,b,b],[b,ab,b,b,b,aa,b,b,ab,ab,b],[b,ab,b,b,b,aab,b,a,b,a,b,b],[b,ab,b,b,b,aab,b,a,b,ab,b],[b,ab,b,b,b,aab,b,ab,a,b,b],[b,ab,b,b,b,aab,b,ab,ab,b]]
len = 32
s: 22.513s
s2: 0s
s3: 0s

Dict = ["a", "aa", "b", "ab", "aab"],  
string = babbbbaabbababbaaaaababbaaabbb
len = 6144
s2: 0.088s
s3: 0.012s

Dict = [a,aa,b,ab,aab]
string = babbbbaabbababbaaaaababbaaabbbbaaabbbaba
len = 73728
s2: 1.466s
s3: 0.341

sdict = [a,aa,b,ab,aab]
string = aabababbbaaabaababaaabababaaabbabbaabbba
len = 884736
s2: 19.431 seconds
s3: 3.81 seconds

PicoLisp

<lang PicoLisp>(setq *Dict (quote "a" "bc" "abc" "cd" "b")) (setq *Dict2

  (quote
     "mobile" "samsung" "sam" "sung" "man" "mango"
     "icecream" "and" "go" "i" "like" "ice" "cream" ) )

(de word (Str D)

  (let
     (Str (chop Str)
        Len (length Str)
        DP (need (inc Len))
        Res (need (inc Len))
        B 1 )
     (set DP 0)
     (map
        '((L)
           (and
              (get DP B)
              (for N (length L)
                 (let Str (pack (head N L))
                    (when (member Str D)
                       (set (nth Res (+ B N))
                          (copy (get Res B)) )
                       (queue (nth Res (+ B N)) Str)
                       (set (nth DP (+ B N))
                          (inc (get DP B)) ) ) ) ) )
           (inc 'B) )
        Str )
     (last Res) ) )

(println (word "abcd" *Dict)) (println (word "abbc" *Dict)) (println (word "abcbcd" *Dict)) (println (word "acdbc" *Dict)) (println (word "abcdd" *Dict)) (println (word "ilikesamsung" *Dict2)) (println (word "iii" *Dict2)) (println (word "ilikelikeimangoiii" *Dict2)) (println (word "samsungandmango" *Dict2)) (println (word "samsungandmangok" *Dict2)) (println (word "ksamsungandmango" *Dict2))</lang>

Output:

("a" "b" "cd")
("a" "b" "bc")
("a" "bc" "b" "cd")
("a" "cd" "bc")
NIL
("i" "like" "sam" "sung")
("i" "i" "i")
("i" "like" "like" "i" "man" "go" "i" "i" "i")
("sam" "sung" "and" "man" "go")
NIL
NIL

Python

Functional

The tokenTrees function recursively builds a tree of possible token sequences, using a list monad (concatMap with a function which returns its result wrapped in a list – an empty list where a parse has failed) to discard all branches which lead to dead ends. This allows us to return more than one possible word-break parse for a given lexicon and input string. (Searches for 'monadic parsing in Python' will yield references to more sophisticated uses of this general approach).

Works with: Python version 3.7

<lang python>Parsing a string for word breaks

from itertools import (chain)

stringParse :: [String] -> String -> Tree String

def stringParse(lexicon):

   A tree of strings representing a parse of s
      in terms of the tokens in lexicon.
   
   return lambda s: Node(s)(
       tokenTrees(lexicon)(s)
   )

tokenTrees :: [String] -> String -> [Tree String]

def tokenTrees(wds):

   A list of possible parse trees for s,
      based on the lexicon supplied in wds.
   
   def go(s):
       return [Node(s)([])] if s in wds else (
           concatMap(nxt(s))(wds)
       )

   def nxt(s):
       return lambda w: parse(
           w, go(s[len(w):])
       ) if s.startswith(w) else []

   def parse(w, xs):
       return [Node(w)(xs)] if xs else xs

   return lambda s: go(s)

showParse :: Tree String -> String

def showParse(tree):

   Multi line display of a string followed by any
      possible parses of it, or an explanatory
      message, if no parse was possible.
   
   def showTokens(x):
       xs = x['nest']
       return ' ' + x['root'] + (showTokens(xs[0]) if xs else )
   parses = tree['nest']
   return tree['root'] + ':\n' + (
       '\n'.join(
           map(showTokens, parses)
       ) if parses else ' ( Not parseable in terms of these words )'
   )

TEST -------------------------------------------------
main :: IO ()

def main():

   Parse test and display of results.

   lexicon = 'a bc abc cd b'.split()
   testSamples = 'abcd abbc abcbcd acdbc abcdd'.split()

   print(unlines(
       map(
           showParse,
           map(
               stringParse(lexicon),
               testSamples
           )
       )
   ))

GENERIC FUNCTIONS ---------------------------------------

Node :: a -> [Tree a] -> Tree a

def Node(v):

   Contructor for a Tree node which connects a
      value of some kind to a list of zero or
      more child trees.
   return lambda xs: {'type': 'Node', 'root': v, 'nest': xs}

concatMap :: (a -> [b]) -> [a] -> [b]

def concatMap(f):

   A concatenated list over which a function has been mapped.
      The list monad can be derived by using a function f which
      wraps its output in a list,
      (using an empty list to represent computational failure).
   return lambda xs: list(
       chain.from_iterable(map(f, xs))
   )

unlines :: [String] -> String

def unlines(xs):

   A single string derived by the intercalation
      of a list of strings with the newline character.
   return '\n'.join(xs)

MAIN ---

if __name__ == '__main__':

   main()</lang>

Output:

abcd:
 a b cd
abbc:
 a b bc
abcbcd:
 a bc b cd
 abc b cd
acdbc:
 a cd bc
abcdd:
 ( Not parseable in terms of these words )

Racket

This returns all the possible splits (and null list if none is possible). Who's to say which is the best?

<lang racket>#lang racket

(define render-phrases pretty-print)

(define dict-1 (list "a" "bc" "abc" "cd" "b")) (define dict-2 (list "mobile" "samsung" "sam" "sung" "man" "mango"

                    "icecream" "and" "go" "i" "like" "ice" "cream"))

(define (word-splits str d)

 (let ((memo (make-hash)))
   (let inr ((s str))
     (hash-ref! memo s
                (λ () (append* (filter-map (λ (w)
                                             (and (string-prefix? s w)
                                                  (if (string=? w s)
                                                      (list s)
                                                      (map (λ (tl) (string-append w " " tl))
                                                           (inr (substring s (string-length w)))))))
                                           d)))))))

(module+ main

 (render-phrases (word-splits "abcd" dict-1))
 (render-phrases (word-splits "abbc" dict-1))
 (render-phrases (word-splits "abcbcd" dict-1))
 (render-phrases (word-splits "acdbc" dict-1))
 (render-phrases (word-splits "abcdd" dict-1))
 (render-phrases (word-splits "ilikesamsung" dict-2))
 (render-phrases (word-splits "iii" dict-2))
 (render-phrases (word-splits "ilikelikeimangoiii" dict-2))
 (render-phrases (word-splits "samsungandmango" dict-2))
 (render-phrases (word-splits "samsungandmangok" dict-2))
 (render-phrases (word-splits "ksamsungandmango" dict-2)))</lang>

Output:

'("a b cd")
'("a b bc")
'("a bc b cd" "abc b cd")
'("a cd bc")
'()
'("i like samsung" "i like sam sung")
'("i i i")
'("i like like i man go i i i" "i like like i mango i i i")
'("samsung and man go"
  "samsung and mango"
  "sam sung and man go"
  "sam sung and mango")
'()
'()

Raku

(formerly Perl 6)

Works with: Rakudo version 2017.04

This implementation does not necessarily find every combination, it returns the one with the longest matching tokens.

<lang perl6>my @words = <a bc abc cd b>; my $regex = @words.join('|');

put "$_: ", word-break($_) for <abcd abbc abcbcd acdbc abcdd>;

sub word-break (Str $word) { ($word ~~ / ^ (<$regex>)+ $ /)[0] // "Not possible" }</lang>

Output:

abcd: a b cd
abbc: a b bc
abcbcd: abc b cd
acdbc: a cd bc
abcdd: Not possible

REXX

This REXX version allows the words to be tested (and the dictionary words) to be specified on the command line. <lang rexx>/*REXX program breaks up a word (or string) into a list of words from a dictionary.*/ parse arg a '/' x; a=space(a); x=space(x) /*get optional args; elide extra blanks*/ if a== | a=="," then a= 'abcd abbc abcbcd acdbc abcdd' /*Not specififed? Use default*/ if x== | x=="," then x= 'a bc abc cd b' /* " " " " */ na= words(a) /*the number of words to be tested. */ nx= words(x) /* " " " " " the dictionary*/ say nx ' dictionary words: ' x /*display the words in the dictionary. */ aw= 0 /*maximum word width obtained (so far).*/ say /*display a blank line to the terminal.*/

     do i=1  for na;           _= word(a, i)    /*obtain a word that will be tested.   */
     aw= max(aw, length(_) )                    /*find widest width word being tested. */
     end   /*i*/                                /* [↑]  AW  is used to align the output*/

@.= 0 /*initialize the dictionary to "null". */ xw= 0

     do i=1  for nx;           _= word(x, i)    /*obtain a word from the dictionary.   */
     xw= max(xw, length(_) );  @._= 1           /*find widest width dictionary word.   */
     end   /*i*/                                /* [↑]  define a dictionary word.      */

p= 0 /* [↓] process a word in the A list.*/

     do j=1  for na;           yy= word(a, j)   /*YY:   test a word  from the  A  list.*/
       do t=(nx+1)**(xw+1)  by -1  to 1  until y==;  y= yy    /*try word possibility.*/
       $=                                       /*nullify the (possible) result list.  */
          do try=t  while y\=                 /*keep testing until  Y  is exhausted. */
          p= (try + p)  // xw    + 1            /*use a possible width for this attempt*/
          p= fw(y, p);  if p==0  then iterate t /*is this part of the word not found ? */
          $= $ ?                                /*It was found. Add partial to the list*/
          y= substr(y,  p + 1)                  /*now, use and test the rest of word.  */
          end   /*try*/
       end      /*t*/

     if t==0  then $= '(not possible)'          /*indicate that the word isn't possible*/
     say right(yy, aw)    '───►'    strip($)    /*display the result to the terminal.  */
     end   /*j*/

exit /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ fw: parse arg z,L; do k=L by -1 for L; ?=left(z,k); if @.? then leave; end; return k</lang>

output when using the default inputs:

5  dictionary words:  a bc abc cd b

  abcd ───► a b cd
  abbc ───► a b bc
abcbcd ───► abc b cd
 acdbc ───► a cd bc
 abcdd ───► (not possible)

Ring

Project : Word break problem

load "stdlib.ring" list = ["a", "bc", "abc", "cd", "b"] inslist = list for n = 1 to len(inslist) - 1

     for m = len(inslist) to 1 step -1
           insert(list,0,inslist[m])
     next

next strings = ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"] ind = len(list) items = newlist(pow(2,len(list))-1,ind) powerset(list,ind)

for p = 1 to len(strings)

     showarray(items,strings[p])

       num = 0
       num2 = 0
       items = newlist(pow(2,len(list))-1,2*ind)
       for i = 2 to (2 << len(list)) - 1 step 2
            num2 = 0
            num = num + 1
            for j = 1 to len(list) 
                 if i & (1 << j)
                     num2 = num2 + 1
                     if list[j] != 0
                       items[num][num2] = list[j]
                    ok
                 ok
            next
       next
       return items

func showarray(items,par)

       ready = []
       for n = 1 to len(items)
             for m = n + 1 to len(items) - 1
                   flag = 0
                   str = ""
                   for x = 1 to len(items[n])
                        if items[n][x] != 0  
                           str = str + items[n][x] + " "
                        ok
                   next 
                   str = left(str, len(str) - 1)
                   strsave = str
                   str = substr(str, " ", "") 
                   if str = par
                      pos = find(ready,strsave)               
                      if pos = 0              
                         add(ready,strsave)
                         flag = 1 
                         see par + " = " + strsave + nl
                     ok
                     if flag != 1 
                        del(items,m)
                     ok
                  ok
             next
       next

</lang> Output:

abcd = a b cd
abbc = a b bc
abcbcd = abc b cd
acdbc = a cd bc

Rust

Dynamic programming

<lang rust>use std::collections::HashSet; fn create_string(s: &str, v: Vec<Option<usize>>) -> String {

   let mut idx = s.len();
   let mut slice_vec = vec![];
   while let Some(prev) = v[idx] {
       slice_vec.push(&s[prev..idx]); 
       idx = prev;
   }
   slice_vec.reverse();
   slice_vec.join(" ")

}

fn word_break(s: &str, dict: HashSet<&str>) -> Option<String> {

   let size = s.len() + 1;
   let mut possible = vec![None; size];

   let check_word = |i,j| dict.get(&s[i..j]).map(|_| i);

   for i in 1..size {
       possible[i] = possible[i].or_else(|| check_word(0,i));

       if possible[i].is_some() {
           for j in i+1..size {
               possible[j] = possible[j].or_else(|| check_word(i,j));
           }

           if possible[s.len()].is_some() {
               return Some(create_string(s, possible));
           }

       };
   }
   None

}

fn main() {

   let mut set = HashSet::new();
   set.insert("a");
   set.insert("bc");
   set.insert("abc");
   set.insert("cd");
   set.insert("b");
   println!("{:?}", word_break("abcd", set).unwrap());

}</lang>

Output:

"a b cd"

Scala

First solution

Finds all possible solutions recursively, using a trie representation of the dictionary: <lang scala>case class TrieNode(isWord: Boolean, children: Map[Char, TrieNode]) {

 def add(s: String): TrieNode = s match {
   case "" => copy(isWord = true)
   case _ => {
     val child = children.getOrElse(s.head, TrieNode(false, Map.empty))
     copy(children = children + (s.head -> child.add(s.tail)))
   }
 }

}

def buildTrie(xs: String*): TrieNode = {

 xs.foldLeft(TrieNode(false, Map.empty))(_.add(_))

}

def wordBreakRec(s: String, root: TrieNode, currentPos: TrieNode, soFar: String): List[List[String]] = {

 val usingCurrentWord = if (currentPos.isWord) {
   if (s.isEmpty) {
     List(List(soFar))
   } else {
     wordBreakRec(s, root, root, "").map(soFar :: _)
   }
 } else {
   List.empty[List[String]]
 }
 val usingCurrentPrefix = (for {
   ch <- s.headOption
   child <- currentPos.children.get(ch)
 } yield wordBreakRec(s.tail, root, child, soFar + ch)).getOrElse(List.empty)
 usingCurrentWord ++ usingCurrentPrefix

}

def wordBreak(s: String, dict: TrieNode): List[List[String]] = {

 wordBreakRec(s, dict, dict, "")

}</lang> Calling it with some example strings: <lang scala>val dict = buildTrie("a", "bc", "abc", "cd", "b") val testCases = List("abcd", "abbc", "abcbcd", "acdbc", "abcdd") for (s <- testCases) {

 val solutions = wordBreak(s, dict)
 println(s"$s has ${solutions.size} solution(s):")
 for (words <- solutions) {
   println("\t" + words.mkString(" "))
 }

}</lang>

Output:

abcd has 1 solution(s):
	a b cd
abbc has 1 solution(s):
	a b bc
abcbcd has 2 solution(s):
	a bc b cd
	abc b cd
acdbc has 1 solution(s):
	a cd bc
abcdd has 0 solution(s):

Combined solution

Output:

Best seen running in your browser either by ScalaFiddle (ES aka JavaScript, non JVM) or Scastie (remote JVM).

<lang Scala>object WordBreak extends App {

 val dict = buildTrie("a", "bc", "abc", "cd", "b")
 lazy val empty = TrieNode(isWord = false, Map.empty) // lazy or in a companion object

 case class TrieNode(isWord: Boolean, children: Map[Char, TrieNode]) {

   def add(s: String): TrieNode = {
     def child = children.withDefaultValue(empty)(s.head)

     if (s.isEmpty) copy(isWord = true)
     else copy(children = children.updated(s.head, child.add(s.tail)))
   }
 }

 def buildTrie(xs: String*): TrieNode = xs.foldLeft(empty)(_.add(_))

 def wordBreak(s: String, dict: TrieNode): List[List[String]] = {

   def wordBreakRec(s: String,
                    root: TrieNode,
                    currentPos: TrieNode,
                    soFar: String): List[List[String]] = {

     def usingCurrentWord =
       if (currentPos.isWord)
         if (s.isEmpty) List(List(soFar))
         else wordBreakRec(s, root, root, "").map(soFar :: _)
       else Nil

     def usingCurrentPrefix =
        (for {ch <- s.headOption
             child <- currentPos.children.get(ch)
       } yield wordBreakRec(s.tail, root, child, soFar + ch))
         .getOrElse(Nil)

     usingCurrentWord ++ usingCurrentPrefix
   }

   wordBreakRec(s, dict, dict, "")
 }

 // Calling it with some example strings:
 List("abcd", "abbc", "abcbcd", "acdbc", "abcdd").foreach(s => {
   val solutions = wordBreak(s, dict)

   println(s"$s has ${solutions.size} solution(s):")
   solutions.foreach(words => println(words.mkString("\t", " ", "")))
 })

}</lang>

Seed7

<lang seed7>$ include "seed7_05.s7i";

const func boolean: wordBreak (in string: stri, in array string: words, in string: resultList) is func

 result
   var boolean: found is FALSE;
 local
   var string: word is "";
 begin
   if stri = "" then
     writeln(resultList);
     found := TRUE;
   else
     for word range words do
       if startsWith(stri, word) and
           wordBreak(stri[succ(length(word)) ..], words, resultList & " " & word) then
         found := TRUE;
       end if;
     end for;
   end if;
 end func;

const proc: main is func

 local
   const array string: words is [] ("a", "bc", "abc", "cd", "b");
   var string: stri is "";
   var string: resultList is "";
 begin
   for stri range [] ("abcd", "abbc", "abcbcd", "acdbc", "abcdd") do
     write(stri <& ": ");
     if not wordBreak(stri, words, resultList) then
       writeln("can't break");
     end if;
   end for;
 end func;</lang>

Output:

abcd:  a b cd
abbc:  a b bc
abcbcd:  a bc b cd
 abc b cd
acdbc:  a cd bc
abcdd: can't break

Sidef

Translation of: zkl

<lang ruby>func word_break (str, words) {

   var r = ->(str, arr=[]) {
       return true if str.is_empty
       for word in (words) {
           str.begins_with(word) || next
           if (__FUNC__(str.substr(word.len), arr)) {
               arr << word
               return arr
           }
       }
       return false
   }(str)

   r.kind_of(Array) ? r.reverse : nil

}

var words = %w(a o is pi ion par per sip miss able) var strs = %w(a amiss parable opera operable inoperable permission mississippi)

for str in (strs) {

  printf("%11s: %s\n", str, word_break(str, words) \\ '(not possible)')

}</lang>

Output:

          a: ["a"]
      amiss: ["a", "miss"]
    parable: ["par", "able"]
      opera: ["o", "per", "a"]
   operable: ["o", "per", "able"]
 inoperable: (not possible)
 permission: ["per", "miss", "ion"]
mississippi: ["miss", "is", "sip", "pi"]

Swift

Translation of: Rust

<lang swift>infix operator ??= : AssignmentPrecedence

@inlinable public func ??= <T>(lhs: inout T?, rhs: T?) {

 lhs = lhs ?? rhs

}

private func createString(_ from: String, _ v: [Int?]) -> String {

 var idx = from.count
 var sliceVec = [Substring]()

 while let prev = v[idx] {
   let s = from.index(from.startIndex, offsetBy: prev)
   let e = from.index(from.startIndex, offsetBy: idx)
   
   sliceVec.append(from[s..<e])
   idx = prev
 }

 return sliceVec.reversed().joined(separator: " ")

}

public func wordBreak(str: String, dict: Set<String>) -> String? {

 let size = str.count + 1
 var possible = [Int?](repeating: nil, count: size)

 func checkWord(i: Int, j: Int) -> Int? {
   let s = str.index(str.startIndex, offsetBy: i)
   let e = str.index(str.startIndex, offsetBy: j)

   return dict.contains(String(str[s..<e])) ? i : nil
 }

 for i in 1..<size {
   possible[i] ??= checkWord(i: 0, j: i)

   guard possible[i] != nil else {
     continue
   }

   for j in i+1..<size {
     possible[j] ??= checkWord(i: i, j: j)
   }

   if possible[str.count] != nil {
     return createString(str, possible)
   }
 }

 return nil

}

let words = [

 "a",
 "bc",
 "abc",
 "cd",
 "b"

] as Set

let testCases = [

 "abcd",
 "abbc",
 "abcbcd",
 "acdbc",
 "abcdd"

]

for test in testCases {

 print("\(test):")
 print("  \(wordBreak(str: test, dict: words) ?? "did not parse with given words")")

}</lang>

Output:

abcd:
  a b cd
abbc:
  a b bc
abcbcd:
  a bc b cd
acdbc:
  a cd bc
abcdd:
  did not parse with given words

Tailspin

Does a depth-first search (after generating all possibilities on the current level). We could stop looking further after one is found, just add a condition to do nothing if a done-flag is set. <lang tailspin> templates wordBreaks&{dict:}

 composer starts&{with:}
   <does|not>
   rule does: (<=$with>) <'.*'>
   rule not: <'.*'> -> \(!VOID\)
 end starts
 
 'Breaking "$;" by $dict;:$#10;' -> !OUT::write
 { words:[], remaining: $} -> #
 '--done--$#10;' !

 when <{remaining: <=>}> do
   $.words -> \spaceSeparate[i](when <?($i <=1>)> do $! otherwise ' ' ! $ ! \spaceSeparate) -> '$...;$#10;' !
   
 otherwise
    def base: $;
    $dict... -> \(
      def word: $;
      $base.remaining -> starts&{with: $word} -> {words: [$base.words..., $word], remaining: $} !
    \) -> #

end wordBreaks

'ababab' -> wordBreaks&{dict: ['a', 'ab', 'bab']} -> !OUT::write 'abcbab' -> wordBreaks&{dict: ['a', 'ab', 'bab']} -> !OUT::write </lang>

Output:

Breaking "ababab" by [a, ab, bab]:
a bab ab
ab a bab
ab ab ab
--done--
Breaking "abcbab" by [a, ab, bab]:
--done--

Visual Basic .NET

Translation of: Java

<lang vbnet>Module Module1

   Structure Node
       Private ReadOnly m_val As String
       Private ReadOnly m_parsed As List(Of String)

       Sub New(initial As String)
           m_val = initial
           m_parsed = New List(Of String)
       End Sub

       Sub New(s As String, p As List(Of String))
           m_val = s
           m_parsed = p
       End Sub

       Public Function Value() As String
           Return m_val
       End Function

       Public Function Parsed() As List(Of String)
           Return m_parsed
       End Function
   End Structure

   Function WordBreak(s As String, dictionary As List(Of String)) As List(Of List(Of String))
       Dim matches As New List(Of List(Of String))
       Dim q As New Queue(Of Node)
       q.Enqueue(New Node(s))
       While q.Count > 0
           Dim node = q.Dequeue()
           REM check if fully parsed
           If node.Value.Length = 0 Then
               matches.Add(node.Parsed)
           Else
               For Each word In dictionary
                   REM check for match
                   If node.Value.StartsWith(word) Then
                       Dim valNew = node.Value.Substring(word.Length, node.Value.Length - word.Length)
                       Dim parsedNew As New List(Of String)
                       parsedNew.AddRange(node.Parsed)
                       parsedNew.Add(word)
                       q.Enqueue(New Node(valNew, parsedNew))
                   End If
               Next
           End If
       End While
       Return matches
   End Function

   Sub Main()
       Dim dict As New List(Of String) From {"a", "aa", "b", "ab", "aab"}
       For Each testString In {"aab", "aa b"}
           Dim matches = WordBreak(testString, dict)
           Console.WriteLine("String = {0}, Dictionary = {1}. Solutions = {2}", testString, dict, matches.Count)
           For Each match In matches
               Console.WriteLine(" Word Break = [{0}]", String.Join(", ", match))
           Next
           Console.WriteLine()
       Next

       dict = New List(Of String) From {"abc", "a", "ac", "b", "c", "cb", "d"}
       For Each testString In {"abcd", "abbc", "abcbcd", "acdbc", "abcdd"}
           Dim matches = WordBreak(testString, dict)
           Console.WriteLine("String = {0}, Dictionary = {1}. Solutions = {2}", testString, dict, matches.Count)
           For Each match In matches
               Console.WriteLine(" Word Break = [{0}]", String.Join(", ", match))
           Next
           Console.WriteLine()
       Next
   End Sub

End Module</lang>

Output:

String = aab, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 4
 Word Break = [aab]
 Word Break = [a, ab]
 Word Break = [aa, b]
 Word Break = [a, a, b]

String = aa b, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 0

String = abcd, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 2
 Word Break = [abc, d]
 Word Break = [a, b, c, d]

String = abbc, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 1
 Word Break = [a, b, b, c]

String = abcbcd, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 3
 Word Break = [abc, b, c, d]
 Word Break = [a, b, cb, c, d]
 Word Break = [a, b, c, b, c, d]

String = acdbc, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 2
 Word Break = [ac, d, b, c]
 Word Break = [a, c, d, b, c]

String = abcdd, Dictionary = System.Collections.Generic.List`1[System.String]. Solutions = 2
 Word Break = [abc, d, d]
 Word Break = [a, b, c, d, d]

Wren

Translation of: Go

Library: Wren-fmt

<lang ecmascript>import "/fmt" for Fmt

class Prefix {

   construct new(length, broken) {
       _length = length
       _broken = broken
   }
   length { _length }
   broken { _broken }

}

var wordBreak = Fn.new { |d, s|

   if (s == "") return [[], true]
   var bp = [Prefix.new(0, [])]
   for (end in 1..s.count) {
       for (i in bp.count-1..0) {
           var w = s[bp[i].length...end]
           if (d[w]) {
               var b = bp[i].broken.toList
               b.add(w)
               if (end == s.count) return [b, true]
               bp.add(Prefix.new(end, b))
               break
           }
       }
   }
   return [[], false]

}

var words = ["a", "bc", "abc", "cd", "b"] var d = {} words.each { |w| d[w] = true } for (s in ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"]) {

   var res = wordBreak.call(d, s)
   if (res[1]) {
       Fmt.print("$s: $s", s, res[0].join(" "))
   } else {
       System.print("can't break")
   }

}</lang>

Output:

abcd: a b cd
abbc: a b bc
abcbcd: a bc b cd
acdbc: a cd bc
can't break

zkl

<lang zkl>fcn wordBreak(str,words){ // words is string of space seperated words

  words=words.split(" ");	// to list of words
  r:=fcn(str,words,sink){	// recursive search, easy to collect answer
     foreach word in (words){

if(not str) return(True); // consumed string ie matched everything if(str.find(word)==0){ // word starts str, 0 so answer is ordered z:=word.len(); if(self.fcn(str.del(0,z),words,sink)) return(sink.write(word)); }

     }
     False		// can't make forward progress, back out & retry
  }(str,words,List());		// run the lambda
  if(False==r) return("not possible");
  r.reverse().concat(" ")

}</lang> <lang zkl>foreach text in (T("abcd","abbc","abcbcd","acdbc","abcdd")){

  println(text,": ",wordBreak(text,"a bc abc cd b"))

}</lang>

Output:

abcd: a b cd
abbc: a b bc
abcbcd: a bc b cd
acdbc: a cd bc
abcdd: not possible