Word frequency: Difference between revisions

(→‎{{header|Python}}: Added a version defining frequencies in terms of sorted and groupby)
Line 195:
(["the" 41036] ["of" 19946] ["and" 14940] ["a" 14589] ["to" 13939]
["in" 11204] ["he" 9645] ["was" 8619] ["that" 7922] ["it" 6659])
</pre>
 
=={{header|Common Lisp}}==
<lang lisp>
(defun count-word (n pathname)
(with-open-file (s pathname :direction :input)
(loop for line = (read-line s nil nil) while line
nconc (list-symb (drop-noise line)) into words
finally (return (subseq (sort (pair words)
#'> :key #'cdr)
0 n)))))
 
(defun list-symb (s)
(let ((*read-eval* nil))
(read-from-string (concatenate 'string "(" s ")"))))
 
(defun drop-noise (s)
(delete-if-not #'(lambda (x) (or (alpha-char-p x)
(equal x #\space)
(equal x #\-))) s))
 
(defun pair (words &aux (hash (make-hash-table)) acc)
(dolist (word words) (incf (gethash word hash 0)))
(maphash #'(lambda (e n) (push `(,e . ,n) acc)) hash) acc)
</lang>
 
{{Out}}
<pre>
> (count-word 10 "c:/temp/135-0.txt")
((THE . 40738) (OF . 19922) (AND . 14878) (A . 14419) (TO . 13702) (IN . 11172)
(HE . 9577) (WAS . 8612) (THAT . 7768) (IT . 6467))
</pre>
 
Anonymous user