File size distribution: Difference between revisions
Content added Content deleted
Line 392: | Line 392: | ||
=={{header|Haskell}}== |
=={{header|Haskell}}== |
||
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible. |
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible. |
||
<lang haskell>{-# LANGUAGE LambdaCase #-} |
|||
⚫ | |||
⚫ | |||
import Control.Concurrent.Chan (Chan, newChan, readChan, |
import Control.Concurrent.Chan (Chan, newChan, readChan, |
||
writeChan, writeList2Chan) |
writeChan, writeList2Chan) |
||
Line 424: | Line 426: | ||
fileSizes :: [Item] -> [Integer] |
fileSizes :: [Item] -> [Integer] |
||
fileSizes = foldr f [] |
fileSizes = foldr f [] where f (File _ n) acc = n:acc |
||
⚫ | |||
where |
|||
f (File _ n) acc = n:acc |
|||
⚫ | |||
folders :: [Item] -> [FilePath] |
folders :: [Item] -> [FilePath] |
||
folders = foldr f [] |
folders = foldr f [] where f (Folder p) acc = p:acc |
||
⚫ | |||
where |
|||
f (Folder p) acc = p:acc |
|||
⚫ | |||
totalBytes :: [Item] -> Integer |
totalBytes :: [Item] -> Integer |
||
Line 462: | Line 460: | ||
placeGroups n fgMap = case findGroup n groupMinMax of |
placeGroups n fgMap = case findGroup n groupMinMax of |
||
Just k -> Map.alter incrementCount k fgMap |
Just k -> Map.alter incrementCount k fgMap |
||
Nothing -> |
Nothing -> fgMap -- Should never happen. |
||
expandGroups :: Int -- ^ Desired number of frequency groups. |
expandGroups :: Int -- ^ Desired number of frequency groups. |
||
-> [Integer] -- ^ List of collected file sizes |
-> [Integer] -- ^ List of collected file sizes. |
||
-> Integer -- ^ Computed frequency group limit. |
-> Integer -- ^ Computed frequency group limit. |
||
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups' |
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups' |
||
expandGroups gsize fileSizes groupThreshold |
expandGroups gsize fileSizes groupThreshold |
||
| groupThreshold > 0 = loop 15 $ frequencyGroups gsize |
| groupThreshold > 0 = loop 15 $ frequencyGroups gsize sortedFileSizes |
||
| otherwise = frequencyGroups gsize |
| otherwise = frequencyGroups gsize sortedFileSizes |
||
where |
where |
||
⚫ | |||
loop 0 gs = gs -- break out in case we can't go below threshold |
loop 0 gs = gs -- break out in case we can't go below threshold |
||
loop n gs |
loop n gs | all (<= groupThreshold) $ Map.elems gs = gs |
||
⚫ | |||
| all (<= groupThreshold) $ Map.elems gs = gs |
|||
⚫ | |||
expand :: FrequencyGroups -> FrequencyGroups |
expand :: FrequencyGroups -> FrequencyGroups |
||
Line 491: | Line 489: | ||
:: FGKey -- ^ Group Key |
:: FGKey -- ^ Group Key |
||
-> Integer -- ^ Count |
-> Integer -- ^ Count |
||
-> Maybe (FGKey, FrequencyGroups) -- ^ |
-> Maybe (FGKey, FrequencyGroups) -- ^ Returns expanded 'FrequencyGroups' with base key it replaces. |
||
groupsFromGroup (min, max) count |
groupsFromGroup (min, max) count |
||
| length range > 1 = Just ((min, max), frequencyGroups gsize range) |
| length range > 1 = Just ((min, max), frequencyGroups gsize range) |
||
| otherwise = Nothing |
| otherwise = Nothing |
||
where |
where |
||
range = filter (\n -> n >= min && n <= max) |
range = filter (\n -> n >= min && n <= max) sortedFileSizes |
||
displaySize :: Integer -> String |
displaySize :: Integer -> String |
||
Line 533: | Line 531: | ||
percentage :: Double |
percentage :: Double |
||
percentage = (realToFrac count / realToFrac filesCount) * 100 |
percentage = (realToFrac count / realToFrac filesCount) * 100 |
||
size = round percentage |
|||
bars | size == 0 = "▍" |
|||
| otherwise = replicate size '█' |
|||
parseArgs :: [String] -> Either String (FilePath, Int) |
parseArgs :: [String] -> Either String (FilePath, Int) |
||
Line 563: | Line 563: | ||
main :: IO () |
main :: IO () |
||
main = parseArgs <$> getArgs >>= \case |
|||
main = do |
|||
args <- getArgs |
|||
case parseArgs args of |
|||
Left errorMessage -> hPutStrLn stderr errorMessage |
Left errorMessage -> hPutStrLn stderr errorMessage |
||
Right (path, groupSize) -> do |
Right (path, groupSize) -> do |
||
items <- parallelItemCollector path |
items <- parallelItemCollector path |
||
let (fileCount, folderCount) = counts items |
let (fileCount, folderCount) = counts items |
||
printf "Total files: %d\n" fileCount |
printf "Total files: %d\nTotal folders: %d\n" fileCount folderCount |
||
printf "Total folders: %d\n" folderCount |
|||
printf "Total size: %s\n" $ displaySize $ totalBytes items |
printf "Total size: %s\n" $ displaySize $ totalBytes items |
||
printf "\nDistribution:\n\n%9s <-> %9s %7s\n" "From" "To" "Count" |
|||
printf "%9s <-> %9s %7s\n" "From" "To" "Count" |
|||
putStrLn $ replicate 46 '-' |
putStrLn $ replicate 46 '-' |
||
let results = expandGroups groupSize ( |
let results = expandGroups groupSize (fileSizes items) (groupThreshold fileCount) |
||
mapM_ (displayFrequency fileCount) $ Map.assocs results |
mapM_ (displayFrequency fileCount) $ Map.assocs results |
||
where |
where |
||
⚫ | |||
groupThreshold = round . (*0.25) . realToFrac</lang> |
groupThreshold = round . (*0.25) . realToFrac</lang> |
||
{{out}} |
{{out}} |
||
<pre style="height: 50rem;">$ filedist ~/Music |
<pre style="height: 50rem;">$ filedist ~/Music |
||
Using 4 worker threads |
Using 4 worker threads |
||
Total files: 688 |
Total files: 688 |
||
Line 597: | Line 592: | ||
243B <-> 323B = 99 14.390%: ██████████████ |
243B <-> 323B = 99 14.390%: ██████████████ |
||
323B <-> 645B = 23 3.343%: ███ |
323B <-> 645B = 23 3.343%: ███ |
||
646B <-> 968B = 2 0.291%: |
646B <-> 968B = 2 0.291%: ▍ |
||
969B <-> 1.26KB = 1 0.145%: |
969B <-> 1.26KB = 1 0.145%: ▍ |
||
3.19KB <-> 6.38KB = 12 1.744%: ██ |
3.19KB <-> 6.38KB = 12 1.744%: ██ |
||
6.38KB <-> 9.58KB = 22 3.198%: ███ |
6.38KB <-> 9.58KB = 22 3.198%: ███ |
||
Line 608: | Line 603: | ||
108.41KB <-> 162.61KB = 23 3.343%: ███ |
108.41KB <-> 162.61KB = 23 3.343%: ███ |
||
162.61KB <-> 216.81KB = 8 1.163%: █ |
162.61KB <-> 216.81KB = 8 1.163%: █ |
||
236.46KB <-> 472.93KB = 3 0.436%: |
236.46KB <-> 472.93KB = 3 0.436%: ▍ |
||
709.39KB <-> 945.85KB = 44 6.395%: ██████ |
709.39KB <-> 945.85KB = 44 6.395%: ██████ |
||
3.30MB <-> 4.96MB = 4 0.581%: █ |
3.30MB <-> 4.96MB = 4 0.581%: █ |
||
Line 614: | Line 609: | ||
6.67MB <-> 13.33MB = 72 10.465%: ██████████ |
6.67MB <-> 13.33MB = 72 10.465%: ██████████ |
||
13.33MB <-> 20.00MB = 6 0.872%: █ |
13.33MB <-> 20.00MB = 6 0.872%: █ |
||
20.00MB <-> 26.66MB = 1 0.145%: |
20.00MB <-> 26.66MB = 1 0.145%: ▍ |
||
$ filedist ~/Music 10 |
$ filedist ~/Music 10 |
||
Line 631: | Line 626: | ||
267B <-> 355B = 57 8.285%: ████████ |
267B <-> 355B = 57 8.285%: ████████ |
||
356B <-> 444B = 20 2.907%: ███ |
356B <-> 444B = 20 2.907%: ███ |
||
801B <-> 889B = 2 0.291%: |
801B <-> 889B = 2 0.291%: ▍ |
||
959B <-> 1.87KB = 1 0.145%: |
959B <-> 1.87KB = 1 0.145%: ▍ |
||
3.75KB <-> 4.68KB = 1 0.145%: |
3.75KB <-> 4.68KB = 1 0.145%: ▍ |
||
4.68KB <-> 5.62KB = 1 0.145%: |
4.68KB <-> 5.62KB = 1 0.145%: ▍ |
||
5.62KB <-> 6.55KB = 11 1.599%: ██ |
5.62KB <-> 6.55KB = 11 1.599%: ██ |
||
6.56KB <-> 7.49KB = 10 1.453%: █ |
6.56KB <-> 7.49KB = 10 1.453%: █ |
||
Line 650: | Line 645: | ||
94.59KB <-> 189.17KB = 42 6.105%: ██████ |
94.59KB <-> 189.17KB = 42 6.105%: ██████ |
||
189.17KB <-> 283.76KB = 4 0.581%: █ |
189.17KB <-> 283.76KB = 4 0.581%: █ |
||
283.76KB <-> 378.35KB = 2 0.291%: |
283.76KB <-> 378.35KB = 2 0.291%: ▍ |
||
851.28KB <-> 945.87KB = 44 6.395%: ██████ |
851.28KB <-> 945.87KB = 44 6.395%: ██████ |
||
2.67MB <-> 5.33MB = 5 0.727%: █ |
2.67MB <-> 5.33MB = 5 0.727%: █ |
||
Line 656: | Line 651: | ||
8.00MB <-> 10.67MB = 35 5.087%: █████ |
8.00MB <-> 10.67MB = 35 5.087%: █████ |
||
10.67MB <-> 13.33MB = 16 2.326%: ██ |
10.67MB <-> 13.33MB = 16 2.326%: ██ |
||
13.33MB <-> 16.00MB = 3 0.436%: |
13.33MB <-> 16.00MB = 3 0.436%: ▍ |
||
16.00MB <-> 18.67MB = 3 0.436%: |
16.00MB <-> 18.67MB = 3 0.436%: ▍ |
||
24.00MB <-> 26.66MB = 1 0.145%: |
24.00MB <-> 26.66MB = 1 0.145%: ▍ |
||
</pre> |
</pre> |
||