Word break problem: Difference between revisions

Content added Content deleted
m (→‎{{header|REXX}}: added/changed whitespace and comments.)
(C++ - now a translation of Rust)
Line 67: Line 67:


=={{header|C++}}==
=={{header|C++}}==
{{trans|Rust}}
<lang cpp>#include <fstream>
<lang cpp>#include <algorithm>
#include <iostream>
#include <iostream>
#include <list>
#include <optional>
#include <set>
#include <set>
#include <string>
#include <string>
#include <string_view>
#include <vector>


struct string_comparator {
auto load_dictionary(const std::string& filename) {
using is_transparent = void;
std::ifstream in(filename);
bool operator()(const std::string& lhs, const std::string& rhs) const {
if (!in)
return lhs < rhs;
throw std::runtime_error("Cannot open file " + filename);
}
std::set<std::string> words;
bool operator()(const std::string& lhs, const std::string_view& rhs) const {
std::string word;
return lhs < rhs;
while (getline(in, word))
}
words.insert(word);
bool operator()(const std::string_view& lhs, const std::string& rhs) const {
return words;
return lhs < rhs;
}
}
};


bool split_into_words(const std::set<std::string>& dict,
using dictionary = std::set<std::string, string_comparator>;

const std::string& str,
template <typename iterator, typename separator>
std::list<std::string>& words) {
std::string join(iterator begin, iterator end, separator sep) {
for (size_t len = str.size(); len > 0; --len) {
std::string result;
auto word = str.substr(0, len);
result += *begin++;
if (dict.find(word) == dict.end())
continue;
for (; begin != end; ++begin) {
if (len == str.size()) {
result += ' ';
words.push_back(word);
result += *begin;
return true;
} else if (split_into_words(dict, str.substr(len), words)) {
words.push_front(word);
return true;
}
}
}
return false;
return result;
}
}


void print_words(const std::list<std::string>& words) {
auto create_string(const std::string_view& s,
const std::vector<std::optional<size_t>>& v) {
if (words.empty())
return;
auto idx = s.size();
std::vector<std::string_view> sv;
auto i = words.begin();
while (v[idx].has_value()) {
std::cout << *i++;
for (; i != words.end(); ++i)
size_t prev = v[idx].value();
std::cout << ' ' << *i;
sv.push_back(s.substr(prev, idx - prev));
std::cout << '\n';
idx = prev;
}
std::reverse(sv.begin(), sv.end());
return join(sv.begin(), sv.end(), ' ');
}
}


void test(const std::set<std::string>& dict, const std::string& input) {
std::optional<std::string> word_break(const std::string_view& str,
const dictionary& dict) {
std::list<std::string> words;
auto size = str.size() + 1;
std::cout << "input: " << input << "\nresult: ";
std::vector<std::optional<size_t>> possible(size);
if (split_into_words(dict, input, words))
auto check_word = [&dict, &str](size_t i, size_t j)
print_words(words);
-> std::optional<size_t> {
else
if (dict.find(str.substr(i, j - i)) != dict.end())
std::cout << "impossible!\n";
return i;
return std::nullopt;
};
for (size_t i = 1; i < size; ++i) {
if (!possible[i].has_value())
possible[i] = check_word(0, i);
if (possible[i].has_value()) {
for (size_t j = i + 1; j < size; ++j) {
if (!possible[j].has_value())
possible[j] = check_word(i, j);
}
if (possible[str.size()].has_value())
return create_string(str, possible);
}
}
return std::nullopt;
}
}


int main(int argc, char** argv) {
int main(int argc, char** argv) {
dictionary dict;
if (argc != 2) {
dict.insert("a");
std::cerr << "usage: " << argv[0] << " dictionary\n";
dict.insert("bc");
return EXIT_FAILURE;
dict.insert("abc");
}
dict.insert("cd");
try {
auto dict = load_dictionary(argv[1]);
dict.insert("b");
test(dict, "wordbreakproblem");
auto result = word_break("abcd", dict);
if (result.has_value())
test(dict, "segmenttheinputstring");
test(dict, "1234");
std::cout << result.value() << '\n';
return 0;
} catch (const std::exception& ex) {
}
std::cerr << ex.what() << '\n';
</lang>
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}</lang>


{{out}}
{{out}}
Using the dictionary at http://wiki.puzzlers.org/pub/wordlists/unixdict.txt:
Using the dictionary at http://wiki.puzzlers.org/pub/wordlists/unixdict.txt:
<pre>
<pre>
a b cd
input: wordbreakproblem
result: word break problem
input: segmenttheinputstring
result: segment the input string
input: 1234
result: impossible!
</pre>
</pre>