I'm working on modernizing Rosetta Code's infrastructure. Starting with communications. Please accept this time-limited open invite to RC's Slack.. --Michael Mol (talk) 20:59, 30 May 2020 (UTC)

Find words whose first and last three letters are equal

From Rosetta Code
Find words whose first and last three letters are equal is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Using the dictionary  unixdict.txt

find the words whose first and last three letters are equal.

The length of any word shown should have a length   >  5.


Other tasks related to string operations:
Metrics
Counting
Remove/replace
Anagrams/Derangements/shuffling
Find/Search/Determine
Formatting
Song lyrics/poems/Mad Libs/phrases
Tokenize
Sequences



Action![edit]

In the following solution the input file unixdict.txt is loaded from H6 drive. Altirra emulator automatically converts CR/LF character from ASCII into 155 character in ATASCII charset used by Atari 8-bit computer when one from H6-H10 hard drive under DOS 2.5 is used.

BYTE FUNC IsValidWord(CHAR ARRAY word)
BYTE len
 
len=word(0)
IF len<=5 THEN RETURN (0) FI
IF word(1)#word(len-2) THEN RETURN(0) FI
IF word(2)#word(len-1) THEN RETURN(0) FI
IF word(3)#word(len) THEN RETURN(0) FI
RETURN (1)
 
PROC FindWords(CHAR ARRAY fname)
CHAR ARRAY line(256)
CHAR ARRAY tmp(256)
BYTE dev=[1]
 
Close(dev)
Open(dev,fname,4)
WHILE Eof(dev)=0
DO
InputSD(dev,line)
IF IsValidWord(line) THEN
PrintE(line)
FI
OD
Close(dev)
RETURN
 
PROC Main()
CHAR ARRAY fname="H6:UNIXDICT.TXT"
 
FindWords(fname)
RETURN
Output:

Screenshot from Atari 8-bit computer

antiperspirant calendrical einstein hotshot murmur oshkosh tartar testes

Ada[edit]

with Ada.Text_Io;
with Ada.Strings.Fixed;
 
procedure Find_Three_Equals is
use Ada.Text_Io;
use Ada.Strings.Fixed;
 
Filename : constant String := "unixdict.txt";
File  : File_Type;
begin
Open (File, In_File, Filename);
while not End_Of_File (File) loop
declare
Word  : constant String  := Get_Line (File);
First : String renames Head (Word, 3);
Last  : String renames Tail (Word, 3);
begin
if First = Last and Word'Length > 5 then
Put_Line (Word);
end if;
end;
end loop;
Close (File);
end Find_Three_Equals;

ALGOL 68[edit]

# find 6 (or more) character words with the same first and last 3 letters #
IF FILE input file;
STRING file name = "unixdict.txt";
open( input file, file name, stand in channel ) /= 0
THEN
# failed to open the file #
print( ( "Unable to open """ + file name + """", newline ) )
ELSE
# file opened OK #
BOOL at eof := FALSE;
# set the EOF handler for the file #
on logical file end( input file, ( REF FILE f )BOOL:
BEGIN
# note that we reached EOF on the #
# latest read #
at eof := TRUE;
# return TRUE so processing can continue #
TRUE
END
);
INT count := 0;
WHILE STRING word;
get( input file, ( word, newline ) );
NOT at eof
DO
IF INT w len = ( UPB word + 1 ) - LWB word;
w len > 5
THEN
IF word[ 1 : 3 ] = word[ w len - 2 : ]
THEN
count +:= 1;
print( ( word, " " ) );
IF count MOD 5 = 0
THEN print( ( newline ) )
ELSE FROM w len + 1 TO 14 DO print( ( " " ) ) OD
FI
FI
FI
OD;
print( ( newline, "found ", whole( count, 0 ), " words with the same first and last 3 characters", newline ) );
close( input file )
FI
Output:
antiperspirant calendrical    einstein       hotshot        murmur
oshkosh        tartar         testes
found 8 words with the same first and last 3 characters

Arturo[edit]

words: read.lines relative "unixdict.txt"
equalHeadTail?: function [w][
equal? first.n: 3 w last.n: 3 w
]
 
loop words 'word [
if 5 < size word [
if equalHeadTail? word ->
print word
]
]
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

AutoHotkey[edit]

FileRead, db, % A_Desktop "\unixdict.txt"
for i, word in StrSplit(db, "`n", "`r")
if StrLen(word) < 6
continue
else if (SubStr(word, 1, 3) = SubStr(word, -2))
result .= word "`n"
MsgBox, 262144, , % result
return
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

AWK[edit]

 
# syntax: GAWK -f FIND_WORDS_WHICH_FIRST_AND_LAST_THREE_LETTERS_ARE_EQUALS.AWK unixdict.txt
(length($0) >= 6 && substr($0,1,3) == substr($0,length($0)-2,3))
END {
exit(0)
}
 
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

C++[edit]

#include <cstdlib>
#include <fstream>
#include <iostream>
 
int main(int argc, char** argv) {
const char* filename(argc < 2 ? "unixdict.txt" : argv[1]);
std::ifstream in(filename);
if (!in) {
std::cerr << "Cannot open file '" << filename << "'.\n";
return EXIT_FAILURE;
}
std::string word;
int n = 0;
while (getline(in, word)) {
const size_t len = word.size();
if (len > 5 && word.compare(0, 3, word, len - 3) == 0)
std::cout << ++n << ": " << word << '\n';
}
return EXIT_SUCCESS;
}
Output:
1. antiperspirant
2. calendrical
3. einstein
4. hotshot
5. murmur
6. oshkosh
7. tartar
8. testes

F#[edit]

 
// First and last three letters are equal. Nigel Galloway: February 18th., 2021
let fN g=if String.length g<6 then false else g.[..2]=g.[g.Length-3..]
seq{use n=System.IO.File.OpenText("unixdict.txt") in while not n.EndOfStream do yield n.ReadLine()}|>Seq.filter fN|>Seq.iter(printfn "%s")
 
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Factor[edit]

Read entire file[edit]

This version reads the entire dictionary file into memory and filters it. This is the fastest version by far. Factor is optimized for making multiple passes over data; it actually takes longer if we combine the two filters into one, either with short-circuiting or non-short-circuiting and.

USING: io io.encodings.ascii io.files kernel math sequences ;
 
"unixdict.txt" ascii file-lines
[ length 5 > ] filter
[ [ 3 head-slice ] [ 3 tail-slice* ] bi = ] filter
[ print ] each
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Read file line by line[edit]

This version reads the dictionary file line by line and prints out words that fit the criteria. This ends up being a bit more imperative and deeply nested, but unlike the version above, we only load one word at a time, saving quite a bit of memory.

USING: combinators.short-circuit io io.encodings.ascii io.files
kernel math sequences ;
 
"unixdict.txt" ascii [
[
readln dup
[
dup
{
[ length 5 > ]
[ [ 3 head-slice ] [ 3 tail-slice* ] bi = ]
} 1&&
[ print ] [ drop ] if
] when*
] loop
] with-file-reader
Output:

As above.

Lazy file I/O[edit]

This version lazily reads the input file by treating a stream like a lazy list with the llines word. This allows us the nice style of the first example with the memory benefits of the second example. Unlike in the first example, combining the filters would buy us some time here, as lazy lists aren't as efficient as sequences.

USING: io io.encodings.ascii io.files kernel lists lists.lazy
math sequences ;
 
"unixdict.txt" ascii <file-reader> llines
[ length 5 > ] lfilter
[ [ 3 head-slice ] [ 3 tail-slice* ] bi = ] lfilter
[ print ] leach
Output:

As above.

Forth[edit]

Works with: Gforth
: first-last-three-equal { addr len -- ? }
len 5 <= if false exit then
addr 3 addr len 3 - + 3 compare 0= ;
 
256 constant max-line
 
: main
0 0 { count fd-in }
s" unixdict.txt" r/o open-file throw to fd-in
begin
here max-line fd-in read-line throw
while
here swap 2dup first-last-three-equal if
count 1+ to count
count 1 .r ." . " type cr
else
2drop
then
repeat
drop
fd-in close-file throw ;
 
main
bye
Output:
1. antiperspirant
2. calendrical
3. einstein
4. hotshot
5. murmur
6. oshkosh
7. tartar
8. testes

FreeBASIC[edit]

#define NULL 0
 
type node
word as string*32 'enough space to store any word in the dictionary
nxt as node ptr
end type
 
function addword( tail as node ptr, word as string ) as node ptr
'allocates memory for a new node, links the previous tail to it,
'and returns the address of the new node
dim as node ptr newnode = allocate(sizeof(node))
tail->nxt = newnode
newnode->nxt = NULL
newnode->word = word
return newnode
end function
 
function length( word as string ) as uinteger
'necessary replacement for the built-in len function, which in this
'case would always return 32
for i as uinteger = 1 to 32
if asc(mid(word,i,1)) = 0 then return i-1
next i
return 999
end function
 
dim as string word
dim as node ptr tail = allocate( sizeof(node) )
dim as node ptr head = tail, curr = head, currj
dim as uinteger ln
tail->nxt = NULL
tail->word = "XXXXHEADER"
 
open "unixdict.txt" for input as #1
while true
line input #1, word
if word = "" then exit while
if length(word)>5 then tail = addword( tail, word )
wend
close #1
 
while curr->nxt <> NULL
word = curr->word
ln = length(word)
for i as uinteger = 1 to 3
if mid(word,i,1) <> mid(word,ln-3+i,1) then goto nextword
next i
print word
nextword:
curr = curr->nxt
wend
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Go[edit]

package main
 
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"unicode/utf8"
)
 
func main() {
wordList := "unixdict.txt"
b, err := ioutil.ReadFile(wordList)
if err != nil {
log.Fatal("Error reading file")
}
bwords := bytes.Fields(b)
count := 0
for _, bword := range bwords {
s := string(bword)
if utf8.RuneCountInString(s) > 5 && (s[0:3] == s[len(s)-3:]) {
count++
fmt.Printf("%d: %s\n", count, s)
}
}
}
Output:
1: antiperspirant
2: calendrical
3: einstein
4: hotshot
5: murmur
6: oshkosh
7: tartar
8: testes

jq[edit]

Works with: jq

Works with gojq, the Go implementation of jq

select(length > 5 and .[:3] == .[-3:])
Output:

Invocation example: jq -rRM -f program.jq unixdict.txt

antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Julia[edit]

See Alternade_words#Julia for the foreachword function.

matchfirstlast3(word, _) = length(word) > 5 && word[1:3] == word[end-2:end] ? word : ""
foreachword("unixdict.txt", matchfirstlast3, numcols=4)
Output:
Word source: unixdict.txt

antiperspirant calendrical    einstein       hotshot
murmur         oshkosh        tartar         testes

Ksh[edit]

#!/bin/ksh
 
# Find list of words (> 5 chars) where 1st 3 and last 3 letters are the same
 
# # Variables:
#
dict='../unixdict.txt'
integer MIN_LEN=5
integer MATCH_NO=3
 
######
# main #
######
 
while read word; do
(( ${#word} <= MIN_LEN )) && continue
 
first=${word:0:${MATCH_NO}}
last=${word:$((${#word}-MATCH_NO)):${#word}}
 
[[ ${first} == ${last} ]] && print ${word}
 
done < ${dict}
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Mathematica/Wolfram Language[edit]

dict = Once[Import["https://web.archive.org/web/20180611003215/http://www.puzzlers.org/pub/wordlists/unixdict.txt"]];
dict //= StringSplit[#, "\n"] &;
dict //= Select[StringLength /* GreaterThan[5]];
Select[dict, StringTake[#, 3] === StringTake[#, -3] &]
Output:
{"antiperspirant", "calendrical", "einstein", "hotshot", "murmur", "oshkosh", "tartar", "testes"}

Nim[edit]

for word in "unixdict.txt".lines:
if word.len > 5:
if word[0..2] == word[^3..^1]:
echo word
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Perl[edit]

as one-liner ..

// 20210212 Perl programming solution
 
perl -ne '/(?=^(.{3}).*\1$)^.{6,}$/&&print' unixdict.txt
 
# minor variation
 
perl -ne 's/(?=^(.{3}).*\1$)^.{6,}$/print/e' unixdict.txt

Phix[edit]

with javascript_semantics
function flaste(string word) return length(word)>5 and word[1..3]=word[-3..-1] end function
sequence flastes = filter(unix_dict(),flaste)
printf(1,"%d words: %s\n",{length(flastes),join(shorten(flastes,"",3))})
Output:
8 words: antiperspirant calendrical einstein hotshot murmur oshkosh tartar testes

PL/I[edit]

firstAndLast3Equal: procedure options(main);
declare dict file;
open file(dict) title('unixdict.txt');
on endfile(dict) stop;
 
declare word char(32) varying, (first3, last3) char(3);
do while('1'b);
get file(dict) list(word);
first3 = substr(word, 1, 3);
last3 = substr(word, length(word)-2, 3);
if length(word) > 5 & first3 = last3 then
put skip list(word);
end;
end firstAndLast3Equal;
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Python[edit]

Tested on Python 3+, the file download will work only if the link is still active. It is possible that you may be able to fetch the file in your browser but download via code may still fail. Check whether you are connected to a VPN, it works on open networks

 
import urllib.request
urllib.request.urlretrieve("http://wiki.puzzlers.org/pub/wordlists/unixdict.txt", "unixdict.txt")
 
dictionary = open("unixdict.txt","r")
 
wordList = dictionary.read().split('\n')
 
dictionary.close()
 
for word in wordList:
if len(word)>5 and word[:3].lower()==word[-3:].lower():
print(word)
 
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Quackery[edit]

  [ [] swap ]'[ swap
witheach [
dup nested
unrot over do
iff [ dip join ]
else nip
] drop ] is filter ( [ --> [ )
 
$ "unixdict.txt" sharefile drop nest$
filter [ size 5 > ]
filter [ 3 split -3 split nip = ]
witheach [ echo$ cr ]
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

R[edit]

dict <- scan("https://web.archive.org/web/20180611003215/http://www.puzzlers.org/pub/wordlists/unixdict.txt", what = character())
dict[nchar(dict) > 5 & substr(dict, 1, 3) == substr(dict, nchar(dict) - 2, nchar(dict))]

Racket[edit]

#lang racket
 
(define ((prefix-and-suffix-match? len) str)
(let ((l (string-length str)))
(and (>= l (* 2 len))
(string=? (substring str 0 len)
(substring str (- l len))))))
 
(module+ main
(filter (prefix-and-suffix-match? 3) (file->lines "../../data/unixdict.txt")))
Output:
'("antiperspirant" "calendrical" "einstein" "hotshot" "murmur" "oshkosh" "tartar" "testes")

Raku[edit]

# 20210210 Raku programming solution
 
my ( \L, \N, \IN ) = 5, 3, 'unixdict.txt';
 
for IN.IO.lines { .say if .chars > L and .substr(0,N) eq .substr(*-N,*) }
 
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

REXX[edit]

This REXX version doesn't care what order the words in the dictionary are in,   nor does it care what
case  (lower/upper/mixed)  the words are in,   the search for the words and vowels is   caseless.

The program verifies that the first and last three characters are, indeed, letters.

It also allows the length (3) of the first and last number of letters to be specified,   and also the minimum length of the
words to be searched on the command line (CL) as well as specifying the dictionary file identifier.

/*REXX pgm finds words in an specified dict. which have the same 1st and last 3 letters.*/
parse arg minL many iFID . /*obtain optional arguments from the CL*/
if minL=='' | minL=="," then minL= 6 /* " " " " " " */
if many=='' | many=="," then many= 3 /* " " " " " " */
if iFID=='' | iFID=="," then iFID='unixdict.txt' /* " " " " " " */
 
do #=1 while lines(iFID)\==0 /*read each word in the file (word=X).*/
x= strip( linein( iFID) ) /*pick off a word from the input line. */
@.#= x /*save: the original case of the word.*/
end /*#*/
#= # - 1 /*adjust word count because of DO loop.*/
say copies('─', 30) # "words in the dictionary file: " iFID
finds= 0 /*word count which have matching end. */
/*process all the words that were found*/
do j=1 for #; $= @.j; upper $ /*obtain dictionary word; uppercase it.*/
if length($)<minL then iterate /*Word not long enough? Then skip it.*/
lhs= left($, many); rhs= right($, many) /*obtain the left & right side of word.*/
if \datatype(lhs || rhs, 'U') then iterate /*are the left and right side letters? */
if lhs \== rhs then iterate /*Left side match right side? No, skip*/
finds= finds + 1 /*bump count of only "e" vowels found. */
say right( left(@.j, 30), 40) /*indent original word for readability.*/
end /*j*/
/*stick a fork in it, we're all done. */
say copies('─', 30) finds " words found that the left " many ' letters match the' ,
"right letters which a word has a minimal length of " minL
output   when using the default inputs:
────────────────────────────── 25104 words in the dictionary file:  unixdict.txt
          antiperspirant
          calendrical
          einstein
          hotshot
          murmur
          oshkosh
          tartar
          testes
────────────────────────────── 8  words found that the left  3  letters match the right letters which a word has a minimal length of  6

Ring[edit]

 
load "stdlib.ring"
 
cStr = read("unixdict.txt")
wordList = str2list(cStr)
num = 0
 
see "working..." + nl
see "Words are:" + nl
 
ln = len(wordList)
for n = ln to 1 step -1
if len(wordList[n]) < 6
del(wordList,n)
ok
next
 
for n = 1 to len(wordList)
if left(wordList[n],3) = right(wordList[n],3)
num = num + 1
see "" + num + ". " + wordList[n] + nl
ok
next
 
see "done..." + nl
 

Output:

working...
Words are:
1. antiperspirant
2. calendrical
3. einstein
4. hotshot
5. murmur
6. oshkosh
7. tartar
8. testes
done...

Ruby[edit]

words = File.readlines("unixdict.txt").map(&:chomp)
puts words.select{|w| w.end_with?(w[0,3]) && w.size > 5}
 
Output:
antiperspirant
calendrical
einstein
hotshot
murmur
oshkosh
tartar
testes

Swift[edit]

import Foundation
 
do {
try String(contentsOfFile: "unixdict.txt", encoding: String.Encoding.ascii)
.components(separatedBy: "\n")
.filter{$0.count > 5 && $0.prefix(3) == $0.suffix(3)}
.enumerated()
.forEach{print("\($0.0 + 1). \($0.1)")}
} catch {
print(error.localizedDescription)
}
Output:
1. antiperspirant
2. calendrical
3. einstein
4. hotshot
5. murmur
6. oshkosh
7. tartar
8. testes

Wren[edit]

Library: Wren-fmt
import "io" for File
import "/fmt" for Fmt
 
var wordList = "unixdict.txt" // local copy
var count = 0
File.read(wordList).trimEnd().split("\n").
where { |w|
return w.count > 5 && (w[0..2] == w[-3..-1])
}.
each { |w|
count = count + 1
Fmt.print("$d: $s", count, w)
}
Output:
1: antiperspirant
2: calendrical
3: einstein
4: hotshot
5: murmur
6: oshkosh
7: tartar
8: testes