Most frequent k chars distance: Difference between revisions

Content added Content deleted
No edit summary
Line 147: Line 147:
<lang Haskell>module MostFrequentK
<lang Haskell>module MostFrequentK
where
where
import Data.List ( nubBy , sortBy , groupBy )
import Data.List ( nub , sortBy , groupBy )
import qualified Data.Set as S
import qualified Data.Set as S


Line 156: Line 156:
|otherwise = count xs k
|otherwise = count xs k


orderedStatistics :: String -> [(Char , Int)]
--frequency of letters
orderedStatistics s = sortBy myCriterion $ nub $ zip s ( map (\c -> count s c ) s )
letterFrequency :: String -> [( Char , Int )]
letterFrequency s = nubBy (\e f -> fst e == fst f ) letterfrequencies
where
where
letterfrequencies = zip s ( map (\c -> count s c ) s )
myCriterion :: (Char , Int) -> (Char , Int) -> Ordering
myCriterion (c1 , n1) (c2, n2)
|n1 > n2 = LT
|n1 < n2 = GT
|n1 == n2 = compare ( found c1 s ) ( found c2 s )
found :: Char -> String -> Int
found e s = length $ takeWhile (/= e ) s


letterStatistics :: String -> [(Char , Int)]
mostFreqKHashing :: String -> Int -> String
mostFreqKHashing s n = foldl ((++)) [] $ map toString $ take n $ orderedStatistics s
letterStatistics myWord = sortBy (\c d -> compare ( snd c ) ( snd d ) ) $ letterFrequency myWord

--frequency of letters , if identical, ordered by first occurrence in string
--function mostFrequentKHashing starts at the last elements, therefore the sublists have to be reversed
orderStatistics :: String -> [(Char , Int)]
orderStatistics s = concat $ map ( reverse . ( sortBy myCriterion ) ) orderedStatistics
where
where
toString :: (Char , Int) -> String
orderedStatistics = groupBy (\g h -> snd g == snd h ) $ letterStatistics s
found :: Char -> String -> Int
toString ( c , i ) = c : show i
found c str = ( length $ takeWhile ( /= c ) str ) + 1
myCriterion :: (Char , Int) -> (Char , Int ) -> Ordering
myCriterion d e = compare ( found ( fst d ) s ) ( found ( fst e ) s )
mostFrequentKHashing :: String -> Int -> String
mostFrequentKHashing s n = toString lastElement ++ toString secondFromLast
where
statistics = orderStatistics s
lastElement = last statistics
secondFromLast = last $ init statistics
toString :: (Char , Int ) -> String
toString ( c , i ) = c : show i


mostFreqKSimilarity :: String -> String -> Int
mostFreqKSimilarity :: String -> String -> Int
mostFreqKSimilarity s t = fromEnum ( last $ head list ) - 48
mostFreqKSimilarity s t = snd $ head $ S.toList $ S.fromList ( doublets s ) `S.intersection`
S.fromList ( doublets t )
where
where
toPairs :: String -> [String]
toPair :: String -> (Char , Int)
toPairs st = [take 2 $ drop start st | start <- [0,2 ..length st - 2]]
list = S.toList ( S.fromList ( toPairs s ) `S.intersection` S.fromList ( toPairs t ) )
toPair s = ( head s , fromEnum ( head $ tail s ) - 48 )
doublets :: String -> [(Char , Int)]

doublets str = map toPair [take 2 $ drop start str | start <- [0 , 2 ..length str - 2]]
mostFreqKSDF :: String -> String -> Int -> Int -> Int
mostFreqKSDF s1 s2 k dist = dist - mostFreqKSimilarity ( mostFrequentKHashing s1 k ) ( mostFrequentKHashing s2 k )
mostFreqKSDF :: String -> String -> Int ->Int
mostFreqKSDF s t n = mostFreqKSimilarity ( mostFreqKHashing s n ) (mostFreqKHashing t n )
</lang>
</lang>
{{out}}
{{out}}