File size distribution: Difference between revisions

Content added Content deleted
(UNIX shell: fix wrong sort for files more than 1GB and "prettyfy" the output)
(UNIX shell: further simplification, only need one awk invocation)
Line 1,605: Line 1,605:
{{works with|Bourne Shell}}
{{works with|Bourne Shell}}
Use POSIX conformant code unless the environment variable GNU is set to anything not empty.
Use POSIX conformant code unless the environment variable GNU is set to anything not empty.
<lang sh>
<lang sh>#!/bin/sh
#!/bin/sh
set -eu

#!/bin/sh
set -eu
set -eu


tabs -8
if [ ${GNU:-} ]
if [ ${GNU:-} ]
then
then
find -- "${1:-.}" -type f -exec du -b -- {} +
find -- "${1:-.}" -type f -exec du -b -- {} +
else
else
# Use a subshell to remove the last "total" line per each ARG_MAX
# Use a subshell to remove the last "total" line per each ARG_MAX
find -- "${1:-.}" -type f -exec sh -c 'wc -c -- "$@" | sed \$d' argv0 {} +
find -- "${1:-.}" -type f -exec sh -c 'wc -c -- "$@" | sed \$d' argv0 {} +
fi | awk '
fi | awk -vOFS='\t' '
BEGIN {split("KB MB GB TB PB", u); u[0] = "B"}
{
{
++hist[$1 ? length($1) - 1 : -1]
++hist[$1 ? length($1) - 1 : -1]
total += $1
total += $1
}
}
END {
END {
print total, NR
max = -2
for (i in hist)
print i, hist[i]
for (i in hist)
max = (i > max ? i : max)
}' | \

{
print "From", "To", "Count\n"
read total
for (i = -1; i <= max; ++i)
tabs -8
{
sort -n | awk -vtotal="$total" -vOFS='\t' '
if (i in hist)
BEGIN {
{
split("KB MB GB TB PB", u); u[0] = "B"
if (i == -1)
print "From", "To", "Count\n"
print "0B", "0B", hist[i]
}
else
$1 == -1 {print "0B", "0B", $2; next}
print 10 ** (i % 3) u[int(i / 3)],
{
print 10 ** ($1 % 3) u[int($1 / 3)],
10 ** ((i + 1) % 3) u[int((i + 1) / 3)],
10 ** (($1 + 1) % 3) u[int(($1 + 1) / 3)],
hist[i]
}
$2
}
}
l = length(total) - 1
END {
printf "\nTotal: %.1f %s in %d files\n",
$0 = total
total / (10 ** l), u[int(l / 3)], NR
l = length($1) - 1
}'</lang>
printf "\nTotal: %.1f %s in %d files\n",
$1 / (10 ** l), u[int(l / 3)], $2}'
}</lang>
{{out}}
{{out}}
<pre>$ time ~/fsd.sh
<pre>$ time ~/fsd.sh