File size distribution: Difference between revisions

Content added Content deleted
Line 392: Line 392:
=={{header|Haskell}}==
=={{header|Haskell}}==
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible.
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible.
<lang haskell>{-# LANGUAGE LambdaCase #-}
<lang haskell>import Control.Concurrent (forkIO, setNumCapabilities)

import Control.Concurrent (forkIO, setNumCapabilities)
import Control.Concurrent.Chan (Chan, newChan, readChan,
import Control.Concurrent.Chan (Chan, newChan, readChan,
writeChan, writeList2Chan)
writeChan, writeList2Chan)
Line 424: Line 426:


fileSizes :: [Item] -> [Integer]
fileSizes :: [Item] -> [Integer]
fileSizes = foldr f []
fileSizes = foldr f [] where f (File _ n) acc = n:acc
f _ acc = acc
where
f (File _ n) acc = n:acc
f _ acc = acc


folders :: [Item] -> [FilePath]
folders :: [Item] -> [FilePath]
folders = foldr f []
folders = foldr f [] where f (Folder p) acc = p:acc
f _ acc = acc
where
f (Folder p) acc = p:acc
f _ acc = acc


totalBytes :: [Item] -> Integer
totalBytes :: [Item] -> Integer
Line 462: Line 460:
placeGroups n fgMap = case findGroup n groupMinMax of
placeGroups n fgMap = case findGroup n groupMinMax of
Just k -> Map.alter incrementCount k fgMap
Just k -> Map.alter incrementCount k fgMap
Nothing -> error "Should never happen"
Nothing -> fgMap -- Should never happen.


expandGroups :: Int -- ^ Desired number of frequency groups.
expandGroups :: Int -- ^ Desired number of frequency groups.
-> [Integer] -- ^ List of collected file sizes. Must be sorted.
-> [Integer] -- ^ List of collected file sizes.
-> Integer -- ^ Computed frequency group limit.
-> Integer -- ^ Computed frequency group limit.
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups'
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups'
expandGroups gsize fileSizes groupThreshold
expandGroups gsize fileSizes groupThreshold
| groupThreshold > 0 = loop 15 $ frequencyGroups gsize fileSizes
| groupThreshold > 0 = loop 15 $ frequencyGroups gsize sortedFileSizes
| otherwise = frequencyGroups gsize fileSizes
| otherwise = frequencyGroups gsize sortedFileSizes
where
where
sortedFileSizes = sort fileSizes
loop 0 gs = gs -- break out in case we can't go below threshold
loop 0 gs = gs -- break out in case we can't go below threshold
loop n gs
loop n gs | all (<= groupThreshold) $ Map.elems gs = gs
| otherwise = loop (pred n) (expand gs)
| all (<= groupThreshold) $ Map.elems gs = gs
| otherwise = loop (pred n) (expand gs)


expand :: FrequencyGroups -> FrequencyGroups
expand :: FrequencyGroups -> FrequencyGroups
Line 491: Line 489:
:: FGKey -- ^ Group Key
:: FGKey -- ^ Group Key
-> Integer -- ^ Count
-> Integer -- ^ Count
-> Maybe (FGKey, FrequencyGroups) -- ^ Tuple with key and 'FrequencyGroups' to replace the key
-> Maybe (FGKey, FrequencyGroups) -- ^ Returns expanded 'FrequencyGroups' with base key it replaces.
groupsFromGroup (min, max) count
groupsFromGroup (min, max) count
| length range > 1 = Just ((min, max), frequencyGroups gsize range)
| length range > 1 = Just ((min, max), frequencyGroups gsize range)
| otherwise = Nothing
| otherwise = Nothing
where
where
range = filter (\n -> n >= min && n <= max) fileSizes
range = filter (\n -> n >= min && n <= max) sortedFileSizes


displaySize :: Integer -> String
displaySize :: Integer -> String
Line 533: Line 531:
percentage :: Double
percentage :: Double
percentage = (realToFrac count / realToFrac filesCount) * 100
percentage = (realToFrac count / realToFrac filesCount) * 100
bars = replicate (round percentage) '█'
size = round percentage
bars | size == 0 = "▍"
| otherwise = replicate size '█'


parseArgs :: [String] -> Either String (FilePath, Int)
parseArgs :: [String] -> Either String (FilePath, Int)
Line 563: Line 563:


main :: IO ()
main :: IO ()
main = parseArgs <$> getArgs >>= \case
main = do
args <- getArgs
case parseArgs args of
Left errorMessage -> hPutStrLn stderr errorMessage
Left errorMessage -> hPutStrLn stderr errorMessage
Right (path, groupSize) -> do
Right (path, groupSize) -> do
items <- parallelItemCollector path
items <- parallelItemCollector path
let (fileCount, folderCount) = counts items
let (fileCount, folderCount) = counts items
printf "Total files: %d\n" fileCount
printf "Total files: %d\nTotal folders: %d\n" fileCount folderCount
printf "Total folders: %d\n" folderCount
printf "Total size: %s\n" $ displaySize $ totalBytes items
printf "Total size: %s\n" $ displaySize $ totalBytes items
putStrLn "\nDistribution:\n"
printf "\nDistribution:\n\n%9s <-> %9s %7s\n" "From" "To" "Count"
printf "%9s <-> %9s %7s\n" "From" "To" "Count"
putStrLn $ replicate 46 '-'
putStrLn $ replicate 46 '-'
let results = expandGroups groupSize (sizes items) (groupThreshold fileCount)
let results = expandGroups groupSize (fileSizes items) (groupThreshold fileCount)
mapM_ (displayFrequency fileCount) $ Map.assocs results
mapM_ (displayFrequency fileCount) $ Map.assocs results
where
where
sizes = sort . fileSizes
groupThreshold = round . (*0.25) . realToFrac</lang>
groupThreshold = round . (*0.25) . realToFrac</lang>
{{out}}
{{out}}
<pre style="height: 50rem;">$ filedist ~/Music 1 ↵
<pre style="height: 50rem;">$ filedist ~/Music
Using 4 worker threads
Using 4 worker threads
Total files: 688
Total files: 688
Line 597: Line 592:
243B <-> 323B = 99 14.390%: ██████████████
243B <-> 323B = 99 14.390%: ██████████████
323B <-> 645B = 23 3.343%: ███
323B <-> 645B = 23 3.343%: ███
646B <-> 968B = 2 0.291%:
646B <-> 968B = 2 0.291%:
969B <-> 1.26KB = 1 0.145%:
969B <-> 1.26KB = 1 0.145%:
3.19KB <-> 6.38KB = 12 1.744%: ██
3.19KB <-> 6.38KB = 12 1.744%: ██
6.38KB <-> 9.58KB = 22 3.198%: ███
6.38KB <-> 9.58KB = 22 3.198%: ███
Line 608: Line 603:
108.41KB <-> 162.61KB = 23 3.343%: ███
108.41KB <-> 162.61KB = 23 3.343%: ███
162.61KB <-> 216.81KB = 8 1.163%: █
162.61KB <-> 216.81KB = 8 1.163%: █
236.46KB <-> 472.93KB = 3 0.436%:
236.46KB <-> 472.93KB = 3 0.436%:
709.39KB <-> 945.85KB = 44 6.395%: ██████
709.39KB <-> 945.85KB = 44 6.395%: ██████
3.30MB <-> 4.96MB = 4 0.581%: █
3.30MB <-> 4.96MB = 4 0.581%: █
Line 614: Line 609:
6.67MB <-> 13.33MB = 72 10.465%: ██████████
6.67MB <-> 13.33MB = 72 10.465%: ██████████
13.33MB <-> 20.00MB = 6 0.872%: █
13.33MB <-> 20.00MB = 6 0.872%: █
20.00MB <-> 26.66MB = 1 0.145%:
20.00MB <-> 26.66MB = 1 0.145%:


$ filedist ~/Music 10
$ filedist ~/Music 10
Line 631: Line 626:
267B <-> 355B = 57 8.285%: ████████
267B <-> 355B = 57 8.285%: ████████
356B <-> 444B = 20 2.907%: ███
356B <-> 444B = 20 2.907%: ███
801B <-> 889B = 2 0.291%:
801B <-> 889B = 2 0.291%:
959B <-> 1.87KB = 1 0.145%:
959B <-> 1.87KB = 1 0.145%:
3.75KB <-> 4.68KB = 1 0.145%:
3.75KB <-> 4.68KB = 1 0.145%:
4.68KB <-> 5.62KB = 1 0.145%:
4.68KB <-> 5.62KB = 1 0.145%:
5.62KB <-> 6.55KB = 11 1.599%: ██
5.62KB <-> 6.55KB = 11 1.599%: ██
6.56KB <-> 7.49KB = 10 1.453%: █
6.56KB <-> 7.49KB = 10 1.453%: █
Line 650: Line 645:
94.59KB <-> 189.17KB = 42 6.105%: ██████
94.59KB <-> 189.17KB = 42 6.105%: ██████
189.17KB <-> 283.76KB = 4 0.581%: █
189.17KB <-> 283.76KB = 4 0.581%: █
283.76KB <-> 378.35KB = 2 0.291%:
283.76KB <-> 378.35KB = 2 0.291%:
851.28KB <-> 945.87KB = 44 6.395%: ██████
851.28KB <-> 945.87KB = 44 6.395%: ██████
2.67MB <-> 5.33MB = 5 0.727%: █
2.67MB <-> 5.33MB = 5 0.727%: █
Line 656: Line 651:
8.00MB <-> 10.67MB = 35 5.087%: █████
8.00MB <-> 10.67MB = 35 5.087%: █████
10.67MB <-> 13.33MB = 16 2.326%: ██
10.67MB <-> 13.33MB = 16 2.326%: ██
13.33MB <-> 16.00MB = 3 0.436%:
13.33MB <-> 16.00MB = 3 0.436%:
16.00MB <-> 18.67MB = 3 0.436%:
16.00MB <-> 18.67MB = 3 0.436%:
24.00MB <-> 26.66MB = 1 0.145%:
24.00MB <-> 26.66MB = 1 0.145%:
</pre>
</pre>