Most frequent k chars distance: Difference between revisions
Content added Content deleted
No edit summary |
|||
Line 147: | Line 147: | ||
<lang Haskell>module MostFrequentK |
<lang Haskell>module MostFrequentK |
||
where |
where |
||
import Data.List ( |
import Data.List ( nub , sortBy , groupBy ) |
||
import qualified Data.Set as S |
import qualified Data.Set as S |
||
Line 156: | Line 156: | ||
|otherwise = count xs k |
|otherwise = count xs k |
||
⚫ | |||
--frequency of letters |
|||
orderedStatistics s = sortBy myCriterion $ nub $ zip s ( map (\c -> count s c ) s ) |
|||
⚫ | |||
letterFrequency s = nubBy (\e f -> fst e == fst f ) letterfrequencies |
|||
where |
where |
||
myCriterion :: (Char , Int) -> (Char , Int) -> Ordering |
|||
⚫ | |||
|n1 > n2 = LT |
|||
|n1 < n2 = GT |
|||
⚫ | |||
⚫ | |||
⚫ | |||
mostFreqKHashing :: String -> Int -> String |
|||
mostFreqKHashing s n = foldl ((++)) [] $ map toString $ take n $ orderedStatistics s |
|||
letterStatistics myWord = sortBy (\c d -> compare ( snd c ) ( snd d ) ) $ letterFrequency myWord |
|||
--frequency of letters , if identical, ordered by first occurrence in string |
|||
--function mostFrequentKHashing starts at the last elements, therefore the sublists have to be reversed |
|||
⚫ | |||
orderStatistics s = concat $ map ( reverse . ( sortBy myCriterion ) ) orderedStatistics |
|||
where |
where |
||
⚫ | |||
orderedStatistics = groupBy (\g h -> snd g == snd h ) $ letterStatistics s |
|||
toString ( c , i ) = c : show i |
|||
⚫ | |||
⚫ | |||
⚫ | |||
mostFrequentKHashing :: String -> Int -> String |
|||
mostFrequentKHashing s n = toString lastElement ++ toString secondFromLast |
|||
⚫ | |||
statistics = orderStatistics s |
|||
lastElement = last statistics |
|||
secondFromLast = last $ init statistics |
|||
⚫ | |||
toString ( c , i ) = c : show i |
|||
mostFreqKSimilarity :: String -> String -> Int |
mostFreqKSimilarity :: String -> String -> Int |
||
mostFreqKSimilarity s t = |
mostFreqKSimilarity s t = snd $ head $ S.toList $ S.fromList ( doublets s ) `S.intersection` |
||
S.fromList ( doublets t ) |
|||
where |
|||
⚫ | |||
⚫ | |||
toPair :: String -> (Char , Int) |
|||
⚫ | |||
toPair s = ( head s , fromEnum ( head $ tail s ) - 48 ) |
|||
⚫ | |||
⚫ | |||
⚫ | |||
mostFreqKSDF s1 s2 k dist = dist - mostFreqKSimilarity ( mostFrequentKHashing s1 k ) ( mostFrequentKHashing s2 k ) |
|||
⚫ | |||
mostFreqKSDF s t n = mostFreqKSimilarity ( mostFreqKHashing s n ) (mostFreqKHashing t n ) |
|||
</lang> |
</lang> |
||
{{out}} |
{{out}} |