Words from neighbour ones: Difference between revisions

Content added Content deleted

Inline

Revision as of 23:52, 6 February 2021

Task

Use the dictionary unixdict.txt

Ignore any word in the dictionary whose length is less than 9.

Let's take the words from next characters:
1 <= n < (dictionary length) - 9.
char1 = 1^st character of n^th word.
char2 = 2^nd character of (n+1)^th word.
char3 = 3^rd character of (n+2)^th word.
⋮
char9 = 9^th character of (n+8)^th word.

Concatenate (append) the nine characters by:

      newword = char1 + char2 + char3 + ... + char9

If newword is in the dictionary, then show on this page.

Length of newword = 9

Other tasks related to string operations:

Metrics

Counting

Remove/replace

Anagrams/Derangements/shuffling

Find/Search/Determine

Formatting

Song lyrics/poems/Mad Libs/phrases

Tokenize

Sequences

C

<lang c>#include <stdio.h>

include <stdlib.h>
include <string.h>

define MAX_WORD_SIZE 80
define MIN_LENGTH 9
define WORD_SIZE (MIN_LENGTH + 1)

void fatal(const char* message) {

   fprintf(stderr, "%s\n", message);
   exit(1);

}

void* xmalloc(size_t n) {

   void* ptr = malloc(n);
   if (ptr == NULL)
       fatal("Out of memory");
   return ptr;

}

void* xrealloc(void* p, size_t n) {

   void* ptr = realloc(p, n);
   if (ptr == NULL)
       fatal("Out of memory");
   return ptr;

}

int word_compare(const void* p1, const void* p2) {

   return memcmp(p1, p2, WORD_SIZE);

}

int main(int argc, char** argv) {

   const char* filename = argc < 2 ? "unixdict.txt" : argv[1];
   FILE* in = fopen(filename, "r");
   if (!in) {
       perror(filename);
       return EXIT_FAILURE;
   }
   char line[MAX_WORD_SIZE];
   size_t size = 0, capacity = 1024;
   char* words = xmalloc(WORD_SIZE * capacity);
   while (fgets(line, sizeof(line), in)) {
       size_t len = strlen(line) - 1; // last character is newline
       if (len < MIN_LENGTH)
           continue;
       line[len] = '\0';
       if (size == capacity) {
           capacity *= 2;
           words = xrealloc(words, WORD_SIZE * capacity);
       }
       memcpy(&words[size * WORD_SIZE], line, WORD_SIZE);
       ++size;
   }
   fclose(in);
   qsort(words, size, WORD_SIZE, word_compare);
   int count = 0;
   char prev_word[WORD_SIZE] = { 0 };
   for (size_t i = 0; i + MIN_LENGTH <= size; ++i) {
       char word[WORD_SIZE] = { 0 };
       for (size_t j = 0; j < MIN_LENGTH; ++j)
           word[j] = words[(i + j) * WORD_SIZE + j];
       if (word_compare(word, prev_word) == 0)
           continue;
       if (bsearch(word, words, size, WORD_SIZE, word_compare))
           printf("%2d. %s\n", ++count, word);
       memcpy(prev_word, word, WORD_SIZE);
   }
   free(words);
   return EXIT_SUCCESS;

}</lang>

Output:

 1. applicate
 2. architect
 3. astronomy
 4. christine
 5. christoph
 6. committee
 7. composite
 8. constrict
 9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose

C++

<lang cpp>#include <algorithm>

include <cstdlib>
include <fstream>
include <iomanip>
include <iostream>
include <string>
include <vector>

int main(int argc, char** argv) {

   const int min_length = 9;
   const char* filename(argc < 2 ? "unixdict.txt" : argv[1]);
   std::ifstream in(filename);
   if (!in) {
       std::cerr << "Cannot open file '" << filename << "'.\n";
       return EXIT_FAILURE;
   }
   std::string line;
   std::vector<std::string> words;
   while (getline(in, line)) {
       if (line.size() >= min_length)
           words.push_back(line);
   }
   std::sort(words.begin(), words.end());
   std::string previous_word;
   int count = 0;
   for (size_t i = 0, n = words.size(); i + min_length <= n; ++i) {
       std::string word;
       word.reserve(min_length);
       for (size_t j = 0; j < min_length; ++j)
           word += words[i + j][j];
       if (previous_word == word)
           continue;
       auto w = std::lower_bound(words.begin(), words.end(), word);
       if (w != words.end() && *w == word)
           std::cout << std::setw(2) << ++count << ". " << word << '\n';
       previous_word = word;
   }
   return EXIT_SUCCESS;

}</lang>

Output:

 1. applicate
 2. architect
 3. astronomy
 4. christine
 5. christoph
 6. committee
 7. composite
 8. constrict
 9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose

Phix

Oh gosh, this is all rather new and exciting.... <lang Phix>function over9(string word) return length(word)>=9 end function sequence dictionary = filter(split_any(get_text("demo/unixdict.txt")," \r\n"),over9) function slicen(integer n) return vslice(dictionary,n)[n..-10+n] end function sequence neighwords = unique(filter(columnize(apply(tagset(9),slicen)),"in",dictionary)) printf(1,"%d words: %s\n",{length(neighwords),join(shorten(neighwords,"",3))})</lang>

Output:

24 words: applicate architect astronomy ... transcend transport transpose

Raku

<lang perl6>my @words_ge_9 = 'unixdict.txt'.IO.lines.grep( *.chars >= 9 ); my %words_eq_9 = @words_ge_9 .grep( *.chars == 9 ).Set;

my @new_words = gather for @words_ge_9.rotor( 9 => -8 ) -> @nine_words {

   my $new_word = [~] map { @nine_words[$_].substr($_, 1) }, ^9;

   take $new_word if %words_eq_9{$new_word};

}

.say for unique @new_words;</lang>

Output:

applicate
architect
astronomy
christine
christoph
committee
composite
constrict
construct
different
extensive
greenwood
implement
improvise
intercept
interpret
interrupt
philosoph
prescript
receptive
telephone
transcend
transport
transpose

REXX

<lang rexx>/*REXX pgm finds words that're composed from neighbor words (within an identified dict).*/ parse arg minL iFID . /*obtain optional arguments from the CL*/ if minL== | minL=="," then minL= 9 /*Not specified? Then use the default.*/ if iFID== | iFID=="," then iFID='unixdict.txt' /* " " " " " " */

= 0; @.=; !.= 0 /*number of usable words in dictionary.*/

           do recs=0  while lines(iFID)\==0     /*read each word in the file  (word=X).*/
           x= strip( linein( iFID) )            /*pick off a word from the input line. */
           if length(x)<minL  then iterate      /*Is the word too short?  Then skip it.*/
           #= # + 1                             /*bump the count of usable words.      */
           @.#= x;       upper x;      !.x= 1   /*original case;  create findable word.*/
           end   /*recs*/                       /* [↑]   semaphore name is uppercased. */

say copies('─', 30) recs "words in the dictionary file: " iFID say copies('─', 30) right(#, length(recs) ) "usable words in the dictionary file." finds= 0 /*count of the changable words found.*/ say; $=

       do j=1  for #;           y= left(@.j, 1) /*initialize the new word to be built. */
            do k=2  to 9  until n>#;   n= j + k /*use next 8 usable words in dictionary*/
            y= y || substr(@.n, k, 1)           /*build a new word, 1 letter at a time.*/
            end   /*k*/
       uy=y;                    upper uy        /*obtain uppercase version of the word.*/
       if \!.uy  then iterate                   /*Does the new word exist?  No, skip it*/
       if wordpos(uy, $)>0  then iterate        /*Word is a dup?  Then skip duplicate. */
       finds= finds + 1                         /*bump count of found neighboring words*/
       $= $ uy                                  /*add a word to the list of words found*/
       say right( left(y, 30), 40)              /*indent original word for readability.*/
       end      /*j*/
                                                /*stick a fork in it,  we're all done. */

say copies('─', 30) finds ' neighbor words found with a minimum length of ' minL</lang>

output when using the default inputs:

────────────────────────────── 25104 words in the dictionary file:  unixdict.txt
──────────────────────────────  7250 usable words in the dictionary file.

          applicate
          architect
          astronomy
          christine
          christoph
          committee
          composite
          constrict
          construct
          different
          extensive
          greenwood
          implement
          improvise
          intercept
          interpret
          interrupt
          philosoph
          prescript
          receptive
          telephone
          transcend
          transport
          transpose
────────────────────────────── 24  neighbor words found with a minimum length of  9

Ring

<lang ring> cStr = read("unixdict.txt") wordList = str2list(cStr) nextwords = [] num = 0

see "working..." + nl

ln = len(wordList) for n = ln to 1 step -1

   if len(wordList[n]) < 9
      del(wordList,n)
   ok

for n = 1 to len(wordList)-9

   c1 = substr(wordList[n],1,1)
   c2 = substr(wordList[n+1],2,1)
   c3 = substr(wordList[n+2],3,1)
   c4 = substr(wordList[n+3],4,1)
   c5 = substr(wordList[n+4],5,1)
   c6 = substr(wordList[n+5],6,1)
   c7 = substr(wordList[n+6],7,1)
   c8 = substr(wordList[n+7],8,1)
   c9 = substr(wordList[n+8],9,1)
   str = c1 + c2 + c3 + c4 + c5 + c6 + c7 + c8 + c9 
   ind = find(wordList,str)
   if ind > 0
      add(nextwords,wordList[ind])
   ok

   if nextwords[n] = nextwords[n-1]
      del(nextwords,n)
   ok

   see "" + n + ". " + nextwords[n] + nl

working...
New words are:
1. applicate
2. architect
3. astronomy
4. christine
5. christoph
6. committee
7. composite
8. constrict
9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose
done...

Wren

Library: Wren-sort

Library: Wren-fmt

<lang ecmascript>import "io" for File import "/sort" for Find import "/fmt" for Fmt

var wordList = "unixdict.txt" // local copy var words = File.read(wordList).trimEnd().split("\n").where { |w| w.count >= 9 }.toList var count = 0 var alreadyFound = [] for (i in 0...words.count - 9) {

   var word = ""
   for (j in i...i+9) word = word + words[j][j-i]
   if (Find.all(words, word)[0] && !Find.all(alreadyFound, word)[0]) {
       count = count + 1
       Fmt.print("$2d: $s", count, word)
       alreadyFound.add(word)
   }

}</lang>

Output:

 1: applicate
 2: architect
 3: astronomy
 4: christine
 5: christoph
 6: committee
 7: composite
 8: constrict
 9: construct
10: different
11: extensive
12: greenwood
13: implement
14: improvise
15: intercept
16: interpret
17: interrupt
18: philosoph
19: prescript
20: receptive
21: telephone
22: transcend
23: transport
24: transpose

@@ Line 10: / Line 10: @@
 <br>1 <= n < (dictionary length) - 9.
 <br>char1 = 1<sup>st</sup> character of n<sup>th</sup> word.
-<br>char2 = 2(sup>nd</sup> character of (n+1)<sup>th</sup> word.
+<br>char2 = 2<sup>nd</sup> character of (n+1)<sup>th</sup> word.
 <br>char3 = 3<sup>rd</sup> character of (n+2)<sup>th</sup> word.
 <br> &nbsp; &nbsp; <big> &#8942; </big>