File size distribution: Difference between revisions

m
Line 414:
 
type FrequencyGroup = ((Integer, Integer), Integer)
 
frequencyGroups :: Int -> [Integer] -> [FrequencyGroup]
frequencyGroups _ [] = []
frequencyGroups totalGroups xs
| length xs == 1 = [((head xs, head xs), 1)]
| otherwise = placeGroups xs groupMinMax
where
range = maximum xs - minimum xs
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups
groups = takeWhile (<=groupSize + maximum xs) $ iterate (+groupSize) 0
groupMinMax = (,0) <$> zip groups (pred <$> tail groups)
 
placeGroups [] = id
placeGroups (d:ds) = placeGroups ds .
fmap (\g@((min,max), count) ->
if d >= min && d <= max
then ((min, max), succ count)
else g
)
 
fileSizes :: [Item] -> [Integer]
Line 445 ⟶ 426:
f (Folder p) acc = p:acc
f _ acc = acc
 
paths :: [Item] -> [FilePath]
paths = fmap (\case File p _ -> p
Folder p -> p)
 
totalBytes :: [Item] -> Integer
Line 459 ⟶ 436:
) (0, 0)
 
groupsFromGroupfrequencyGroups :: Int -> [Integer] -> FrequencyGroup -> [FrequencyGroup]
frequencyGroups :: Int ->_ [Integer] ->= [FrequencyGroup]
groupsFromGroup gsize fileSizes ((min, max), count)
frequencyGroups _totalGroups []xs = []
| length range > 1 = frequencyGroups gsize range
| otherwiselength xs == 1 = [((minhead xs, maxhead xs), count1)]
| otherwise = placeGroups xs groupMinMax
where
range = maximum xs - minimum xs
collectBetween min max = filter (\n -> n >= min && n <= max)
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups
range = collectBetween min max fileSizes
groups = takeWhile (<=groupSize + maximum xs) $ iterate (+groupSize) 0
groupMinMax = (,0) <$> zip groups (pred <$> tail groups)
 
placeGroups [] = id
placeGroups (d:ds) = placeGroups ds .
fmap (\g@((min,max), count) ->
if d >= min && d <= max
then ((min, max), succ count)
else g
)
 
expandGroups :: Int -> [Integer] -> Integer -> [FrequencyGroup] -> [FrequencyGroup]
Line 479 ⟶ 467:
expand = ((\g@((min, max), count) ->
if count > groupThreshold then
groupsFromGroup gsize fileSizes g
else
[g]
) =<<)
 
groupsFromGroup ((min, max), count)
| length range > 1 = frequencyGroups gsize range
| otherwise = [((min, max), count)]
where
collectBetween min max = filter (\n -> n >= min && n <= max)
range = collectBetween min max fileSizes
 
displaySize :: Integer -> String
Line 561 ⟶ 556:
Right (path, groupSize) -> do
items <- parallelItemCollector path
-- mapM_ putStrLn $ paths items
let (fileCount, folderCount) = counts items
printf "Total files: %d\n" fileCount
Anonymous user