Entropy/Narcissist: Difference between revisions
(→{{header|jq}}: def sum():) |
|||
(23 intermediate revisions by 17 users not shown) | |||
Line 1: | Line 1: | ||
{{task}} |
{{task}} |
||
[[File:ENTROPY.JPG| |
[[File:ENTROPY.JPG|400px||right]] |
||
;Task: |
;Task: |
||
Line 10: | Line 10: | ||
:* [[Entropy]] |
:* [[Entropy]] |
||
<br><br> |
<br><br> |
||
=={{header|Ada}}== |
|||
<syntaxhighlight lang="ada">with Ada.Text_Io; |
|||
with Ada.Command_Line; |
|||
with Ada.Numerics.Elementary_Functions; |
|||
procedure Entropy is |
|||
use Ada.Text_Io; |
|||
type Hist_Type is array (Character) of Natural; |
|||
function Log_2 (V : Float) return Float is |
|||
use Ada.Numerics.Elementary_Functions; |
|||
begin |
|||
return Log (V) / Log (2.0); |
|||
end Log_2; |
|||
procedure Read_File (Name : String; Hist : out Hist_Type) is |
|||
File : File_Type; |
|||
Char : Character; |
|||
begin |
|||
Hist := (others => 0); |
|||
Open (File, In_File, Name); |
|||
while not End_Of_File (File) loop |
|||
Get (File, Char); |
|||
Hist (Char) := Hist (Char) + 1; |
|||
end loop; |
|||
Close (File); |
|||
end Read_File; |
|||
function Length_Of (Hist : Hist_Type) return Natural is |
|||
Sum : Natural := 0; |
|||
begin |
|||
for V of Hist loop |
|||
Sum := Sum + V; |
|||
end loop; |
|||
return Sum; |
|||
end Length_Of; |
|||
function Entropy_Of (Hist : Hist_Type) return Float is |
|||
Length : constant Float := Float (Length_Of (Hist)); |
|||
Sum : Float := 0.0; |
|||
begin |
|||
for V of Hist loop |
|||
if V > 0 then |
|||
Sum := Sum + Float (V) / Length * Log_2 (Float (V) / Length); |
|||
end if; |
|||
end loop; |
|||
return -Sum; |
|||
end Entropy_Of; |
|||
package Float_Io is new Ada.Text_Io.Float_Io (Float); |
|||
Name : constant String := Ada.Command_Line.Argument (1); |
|||
Hist : Hist_Type; |
|||
Entr : Float; |
|||
begin |
|||
Float_Io.Default_Exp := 0; |
|||
Float_Io.Default_Aft := 6; |
|||
Read_File (Name, Hist); |
|||
Entr := Entropy_Of (Hist); |
|||
Put ("Entropy of '"); |
|||
Put (Name); |
|||
Put ("' is "); |
|||
Float_Io.Put (Entr); |
|||
New_Line; |
|||
end Entropy;</syntaxhighlight> |
|||
{{out}} |
|||
<pre>Entropy of 'entropy.adb' is 4.559854</pre> |
|||
=={{header|ALGOL 68}}== |
=={{header|ALGOL 68}}== |
||
Line 16: | Line 86: | ||
Note that the source here uses spaces, not tabs, hence the low entropy, replacing all runs of four spaces with a single space |
Note that the source here uses spaces, not tabs, hence the low entropy, replacing all runs of four spaces with a single space |
||
results in an entropy of +4.64524532762062e +0. |
results in an entropy of +4.64524532762062e +0. |
||
< |
<syntaxhighlight lang="algol68">BEGIN |
||
# calculate the shannon entropy of a string # |
# calculate the shannon entropy of a string # |
||
PROC shannon entropy = ( STRING s )REAL: |
PROC shannon entropy = ( STRING s )REAL: |
||
Line 74: | Line 144: | ||
print( ( shannon entropy( file contents ), newline ) ) |
print( ( shannon entropy( file contents ), newline ) ) |
||
FI |
FI |
||
END</ |
END</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 82: | Line 152: | ||
=={{header|AutoHotkey}}== |
=={{header|AutoHotkey}}== |
||
{{works with|AutoHotkey 1.1}} |
{{works with|AutoHotkey 1.1}} |
||
< |
<syntaxhighlight lang="autohotkey">FileRead, var, *C %A_ScriptFullPath% |
||
MsgBox, % Entropy(var) |
MsgBox, % Entropy(var) |
||
Line 97: | Line 167: | ||
} |
} |
||
return, e |
return, e |
||
}</ |
}</syntaxhighlight> |
||
{{Output}} |
{{Output}} |
||
<pre>5.942956</pre> |
<pre>5.942956</pre> |
||
=={{header|AWK}}== |
|||
The record separator RS is set to end of file. So getline reads the whole file in one line. |
|||
<syntaxhighlight lang="awk"> |
|||
BEGIN{FS="" |
|||
RS="\x04"#EOF |
|||
getline<"entropy.awk" |
|||
for(i=1;i<=NF;i++)H[$i]++ |
|||
for(i in H)E-=(h=H[i]/NF)*log(h) |
|||
print "bytes ",NF," entropy ",E/log(2) |
|||
exit}</syntaxhighlight> |
|||
{{Output}} |
|||
<pre>bytes 158 entropy 5.2802</pre> |
|||
=={{header|BBC BASIC}}== |
|||
{{works with|BBC BASIC for Windows}} |
|||
<syntaxhighlight lang="bbcbasic"> DIM Freq%(255) |
|||
FOR I%=PAGE TO LOMEM Freq%(?I%)+=1 NEXT |
|||
Size=LOMEM - PAGE |
|||
FOR I%=0 TO 255 |
|||
IF Freq%(I%) Entropy+=Freq%(I%) / Size * LN(Freq%(I%) / Size) / LN(2) |
|||
NEXT |
|||
PRINT "My size is ";Size " bytes and my entropy is ";-Entropy "!" |
|||
END</syntaxhighlight> |
|||
{{out}} |
|||
<pre>My size is 224 bytes and my entropy is 5.11257089!</pre> |
|||
=={{header|C}}== |
=={{header|C}}== |
||
Line 106: | Line 202: | ||
Assumes that the source file is stored in the working directory as "entropy.c". |
Assumes that the source file is stored in the working directory as "entropy.c". |
||
< |
<syntaxhighlight lang="c">#include <stdio.h> |
||
#include <stdlib.h> |
#include <stdlib.h> |
||
#include <stdbool.h> |
#include <stdbool.h> |
||
Line 153: | Line 249: | ||
printf("%lf\n",H); |
printf("%lf\n",H); |
||
return 0; |
return 0; |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
|||
<syntaxhighlight lang="text">5.195143</syntaxhighlight> |
|||
=={{header|C++}}== |
|||
<syntaxhighlight lang="cpp">#include <iostream> |
|||
#include <fstream> |
|||
#include <cmath> |
|||
using namespace std; |
|||
string readFile (string path) { |
|||
string contents; |
|||
string line; |
|||
ifstream inFile(path); |
|||
while (getline (inFile, line)) { |
|||
contents.append(line); |
|||
contents.append("\n"); |
|||
} |
|||
inFile.close(); |
|||
return contents; |
|||
} |
|||
double entropy (string X) { |
|||
const int MAXCHAR = 127; |
|||
int N = X.length(); |
|||
int count[MAXCHAR]; |
|||
double count_i; |
|||
char ch; |
|||
double sum = 0.0; |
|||
for (int i = 0; i < MAXCHAR; i++) count[i] = 0; |
|||
for (int pos = 0; pos < N; pos++) { |
|||
ch = X[pos]; |
|||
count[(int)ch]++; |
|||
} |
|||
for (int n_i = 0; n_i < MAXCHAR; n_i++) { |
|||
count_i = count[n_i]; |
|||
if (count_i > 0) sum -= count_i / N * log2(count_i / N); |
|||
} |
|||
return sum; |
|||
} |
|||
int main () { |
|||
cout<<entropy(readFile("entropy.cpp")); |
|||
return 0; |
|||
}</syntaxhighlight> |
|||
{{out}} |
{{out}} |
||
< |
<pre>4.58688</pre> |
||
=={{header|Crystal}}== |
=={{header|Crystal}}== |
||
{{trans|Ruby}} |
{{trans|Ruby}} |
||
< |
<syntaxhighlight lang="ruby">def entropy(s) |
||
counts = s.chars.each_with_object(Hash(Char, Float64).new(0.0)) { |c, h| h[c] += 1 } |
counts = s.chars.each_with_object(Hash(Char, Float64).new(0.0)) { |c, h| h[c] += 1 } |
||
counts.values.sum do |count| |
counts.values.sum do |count| |
||
Line 167: | Line 308: | ||
end |
end |
||
puts entropy File.read(__FILE__)</ |
puts entropy File.read(__FILE__)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 174: | Line 315: | ||
=={{header|D}}== |
=={{header|D}}== |
||
< |
<syntaxhighlight lang="d">void main(in string[] args) { |
||
import std.stdio, std.algorithm, std.math, std.file; |
import std.stdio, std.algorithm, std.math, std.file; |
||
Line 184: | Line 325: | ||
.sum |
.sum |
||
.writeln; |
.writeln; |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>6.29803</pre> |
<pre>6.29803</pre> |
||
=={{header|Elixir}}== |
=={{header|Elixir}}== |
||
< |
<syntaxhighlight lang="elixir">File.open(__ENV__.file, [:read], fn(file) -> |
||
text = IO.read(file, :all) |
text = IO.read(file, :all) |
||
leng = String.length(text) |
leng = String.length(text) |
||
Line 200: | Line 341: | ||
end) |
end) |
||
|> IO.puts |
|> IO.puts |
||
end)</ |
end)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 208: | Line 349: | ||
=={{header|Emacs Lisp}}== |
=={{header|Emacs Lisp}}== |
||
< |
<syntaxhighlight lang="lisp">(defun shannon-entropy (input) |
||
(let ((freq-table (make-hash-table)) |
(let ((freq-table (make-hash-table)) |
||
(entropy 0) |
(entropy 0) |
||
Line 227: | Line 368: | ||
(shannon-entropy (with-temp-buffer |
(shannon-entropy (with-temp-buffer |
||
(insert-file-contents "U:/rosetta/narcissist.el") |
(insert-file-contents "U:/rosetta/narcissist.el") |
||
(buffer-string))))</ |
(buffer-string))))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
< |
<syntaxhighlight lang="lisp">(narcissist) |
||
4.5129548515535785</ |
4.5129548515535785</syntaxhighlight> |
||
=={{header|Erlang}}== |
=={{header|Erlang}}== |
||
< |
<syntaxhighlight lang="erlang">#! /usr/bin/escript |
||
-define(LOG2E, 1.44269504088896340735992). |
-define(LOG2E, 1.44269504088896340735992). |
||
Line 257: | Line 398: | ||
_ -> count(Data, I+1, Frq #{Chr => 1}) |
_ -> count(Data, I+1, Frq #{Chr => 1}) |
||
end. |
end. |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 264: | Line 405: | ||
=={{header|Factor}}== |
=={{header|Factor}}== |
||
< |
<syntaxhighlight lang="factor">USING: assocs io io.encodings.utf8 io.files kernel math |
||
math.functions math.statistics prettyprint sequences ; |
math.functions math.statistics prettyprint sequences ; |
||
IN: rosetta-code.entropy-narcissist |
IN: rosetta-code.entropy-narcissist |
||
Line 275: | Line 416: | ||
"entropy-narcissist.factor" utf8 [ |
"entropy-narcissist.factor" utf8 [ |
||
contents entropy . |
contents entropy . |
||
] with-file-reader</ |
] with-file-reader</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 282: | Line 423: | ||
=={{header|FreeBASIC}}== |
=={{header|FreeBASIC}}== |
||
< |
<syntaxhighlight lang="freebasic">' version 01-06-2016 |
||
' compile with: fbc -s console |
' compile with: fbc -s console |
||
' modified code from ENTROPY entry |
' modified code from ENTROPY entry |
||
Line 334: | Line 475: | ||
Print : Print "hit any key to end program" |
Print : Print "hit any key to end program" |
||
Sleep |
Sleep |
||
End</ |
End</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>Windows version |
<pre>Windows version |
||
Line 347: | Line 488: | ||
=={{header|Go}}== |
=={{header|Go}}== |
||
< |
<syntaxhighlight lang="go">package main |
||
import ( |
import ( |
||
Line 381: | Line 522: | ||
l := float64(len(d)) |
l := float64(len(d)) |
||
return math.Log2(l) - hm/l |
return math.Log2(l) - hm/l |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 389: | Line 530: | ||
=={{header|Haskell}}== |
=={{header|Haskell}}== |
||
< |
<syntaxhighlight lang="haskell">import qualified Data.ByteString as BS |
||
import Data.List |
import Data.List |
||
import System.Environment |
import System.Environment |
||
Line 399: | Line 540: | ||
entropy = sort >>> group >>> map genericLength >>> normalize >>> map lg >>> sum |
entropy = sort >>> group >>> map genericLength >>> normalize >>> map lg >>> sum |
||
where lg c = -c * logBase 2 c |
where lg c = -c * logBase 2 c |
||
normalize c = let sc = sum c in map (/ sc) c</ |
normalize c = let sc = sum c in map (/ sc) c</syntaxhighlight> |
||
{{out}} In a shell |
{{out}} In a shell |
||
Line 416: | Line 557: | ||
=={{header|J}}== |
=={{header|J}}== |
||
'''Solution''':< |
'''Solution''':<syntaxhighlight lang="j"> entropy=: +/@:-@(* 2&^.)@(#/.~ % #) |
||
1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3''</ |
1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3''</syntaxhighlight> |
||
'''Example''':< |
'''Example''':<syntaxhighlight lang="j"> load 'entropy.ijs' |
||
4.73307</ |
4.73307</syntaxhighlight> |
||
=={{header|Julia}}== |
|||
<lang julia>using DataStructures |
|||
entropy(s) = -sum(x -> x / length(s) * log2(x / length(s)), values(counter(s))) |
|||
println("self-entropy: ", entropy(read(Base.source_path(), String)))</lang> |
|||
{{out}} |
|||
<pre>self-entropy: 4.716527560525572</pre> |
|||
=={{header|Java}}== |
=={{header|Java}}== |
||
< |
<syntaxhighlight lang="java"> |
||
import java.io.BufferedReader; |
import java.io.BufferedReader; |
||
import java.io.File; |
import java.io.File; |
||
Line 469: | Line 601: | ||
} |
} |
||
return entropy; |
return entropy / Math.log(2); |
||
} |
} |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre>Entropy of file "src/EntropyNarcissist.java" = |
<pre>Entropy of file "src/EntropyNarcissist.java" = 4.691381977073.</pre> |
||
=={{header|jq}}== |
|||
'''Works with jq, the C implementation of jq''' |
|||
'''Works with gojq, the Go implementation of jq''' |
|||
'''Works with jaq, the Rust implementation of jq''' |
|||
The program assumes it will be presented to itself using |
|||
an invocation of jq with the -sR options, along the lines of: |
|||
<pre> |
|||
jq -sR -f entropy-narcissist.jq < entropy-narcissist.jq |
|||
</pre> |
</pre> |
||
If your jq supports `keys_unsorted`, feel free to use it instead of `keys`. |
|||
<syntaxhighlight lang="jq"> |
|||
def chars: explode[] | [.] | implode; |
|||
def bow(stream): |
|||
reduce stream as $word ({}; .[($word|tostring)] += 1); |
|||
def sum(s): reduce s as $x (0; .+$x); |
|||
length as $l |
|||
| bow(chars) |
|||
| sum(keys[] as $k | .[$k] as $c | $c * ($c|log2) ) |
|||
| ($l|log2) - ./$l |
|||
</syntaxhighlight> |
|||
{{output}} |
|||
<pre> |
|||
{{output}} |
|||
<pre> |
|||
4.796499915496963 |
|||
</pre> |
|||
=={{header|Julia}}== |
|||
<syntaxhighlight lang="julia">using DataStructures |
|||
entropy(s) = -sum(x -> x / length(s) * log2(x / length(s)), values(counter(s))) |
|||
println("self-entropy: ", entropy(read(Base.source_path(), String)))</syntaxhighlight> |
|||
{{out}} |
|||
<pre>self-entropy: 4.716527560525572</pre> |
|||
=={{header|Kotlin}}== |
=={{header|Kotlin}}== |
||
< |
<syntaxhighlight lang="scala">// version 1.1.0 (entropy_narc.kt) |
||
fun log2(d: Double) = Math.log(d) / Math.log(2.0) |
fun log2(d: Double) = Math.log(d) / Math.log(2.0) |
||
Line 502: | Line 676: | ||
val prog = java.io.File("entropy_narc.kt").readText() |
val prog = java.io.File("entropy_narc.kt").readText() |
||
println("This program's entropy is ${"%18.16f".format(shannon(prog))}") |
println("This program's entropy is ${"%18.16f".format(shannon(prog))}") |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 511: | Line 685: | ||
=={{header|Lua}}== |
=={{header|Lua}}== |
||
arg[0] gives the path of the script currently being executed |
arg[0] gives the path of the script currently being executed |
||
< |
<syntaxhighlight lang="lua">function getFile (filename) |
||
local inFile = io.open(filename, "r") |
local inFile = io.open(filename, "r") |
||
local fileContent = inFile:read("*all") |
local fileContent = inFile:read("*all") |
||
Line 536: | Line 710: | ||
end |
end |
||
print(entropy(getFile(arg[0])))</ |
print(entropy(getFile(arg[0])))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>4.3591214356783</pre> |
<pre>4.3591214356783</pre> |
||
=={{header|Nim}}== |
|||
As we have compiled without specific options to change the way the executable is named, we can retrieve the source file name by adding the suffix “.nim” to the executable file name. We suppose also that the source file is in the same directory as the executable (which is true in our environment). |
|||
<syntaxhighlight lang="nim">import os, math, strutils, tables |
|||
let execName = getAppFilename().splitPath().tail |
|||
let srcName = execName & ".nim" |
|||
func entropy(str: string): float = |
|||
var counts: CountTable[char] |
|||
for ch in str: |
|||
counts.inc(ch) |
|||
for count in counts.values: |
|||
result -= count / str.len * log2(count / str.len) |
|||
echo "Source file entropy: ", srcName.readFile().entropy().formatFloat(ffDecimal, 5) |
|||
echo "Binary file entropy: ", execName.readFile().entropy().formatFloat(ffDecimal, 5)</syntaxhighlight> |
|||
{{out}} |
|||
<pre>Source file entropy: 4.75555 |
|||
Binary file entropy: 5.97036</pre> |
|||
=={{header|PARI/GP}}== |
=={{header|PARI/GP}}== |
||
< |
<syntaxhighlight lang="parigp">entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2); |
||
entropy(Str(entropy))</ |
entropy(Str(entropy))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>%1 = 4.54978213</pre> |
<pre>%1 = 4.54978213</pre> |
||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
< |
<syntaxhighlight lang="perl">#!/usr/bin/perl |
||
use strict ; |
use strict ; |
||
use warnings ; |
use warnings ; |
||
Line 573: | Line 769: | ||
} |
} |
||
$infocontent *= -1 ; |
$infocontent *= -1 ; |
||
say "The information content of the source file is $infocontent !" ;</ |
say "The information content of the source file is $infocontent !" ;</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>The information content of the source file is 4.6487923749222 !</pre> |
<pre>The information content of the source file is 4.6487923749222 !</pre> |
||
=={{header|Perl 6}}== |
|||
{{Works with|rakudo|2016.05}} |
|||
<lang perl6>say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_ |
|||
given slurp($*PROGRAM-NAME).comb</lang> |
|||
Result should be in the neighborhood of 4.9 |
|||
{{out}} |
|||
<pre>4.89351613053006</pre> |
|||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
Minor edit to the [[Entropy#Phix|Entropy]] answer, if compiled assumes source code is in the same directory. |
Minor edit to the [[Entropy#Phix|Entropy]] answer, if compiled assumes source code is in the same directory. |
||
<!--<syntaxhighlight lang="phix">(notonline)--> |
|||
<lang Phix>function log2(atom v) |
|||
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- command_line, file i/o</span> |
|||
return log(v)/log(2) |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">entropy</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
end function |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">symbols</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span> |
|||
<span style="color: #000000;">counts</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> |
|||
function entropy(sequence s) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">N</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
sequence symbols = {}, |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">N</span> <span style="color: #008080;">do</span> |
|||
counts = {} |
|||
<span style="color: #004080;">object</span> <span style="color: #000000;">si</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
integer N = length(s) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">si</span><span style="color: #0000FF;">,</span><span style="color: #000000;">symbols</span><span style="color: #0000FF;">)</span> |
|||
for i=1 to N do |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
object si = s[i] |
|||
<span style="color: #000000;">symbols</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">symbols</span><span style="color: #0000FF;">,</span><span style="color: #000000;">si</span><span style="color: #0000FF;">)</span> |
|||
integer k = find(si,symbols) |
|||
<span style="color: #000000;">counts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">counts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span> |
|||
if k=0 then |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #000000;">counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
counts = append(counts,1) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
else |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
counts[k] += 1 |
|||
<span style="color: #004080;">atom</span> <span style="color: #000000;">H</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
end if |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">counts</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
end for |
|||
<span style="color: #004080;">atom</span> <span style="color: #000000;">ci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]/</span><span style="color: #000000;">N</span> |
|||
atom H = 0 |
|||
<span style="color: #000000;">H</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">ci</span><span style="color: #0000FF;">*</span><span style="color: #7060A8;">log2</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ci</span><span style="color: #0000FF;">)</span> |
|||
integer n = length(counts) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
for i=1 to n do |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">H</span> |
|||
atom ci = counts[i]/N |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
H -= ci*log2(ci) |
|||
end for |
|||
<span style="color: #0000FF;">?</span><span style="color: #000000;">entropy</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">open</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">substitute</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">command_line</span><span style="color: #0000FF;">()[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span><span style="color: #008000;">".exe"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">".exw"</span><span style="color: #0000FF;">)),</span><span style="color: #008000;">"rb"</span><span style="color: #0000FF;">))</span> |
|||
return H |
|||
<!--</syntaxhighlight>--> |
|||
end function |
|||
Output is eg 4.993666233, but that may vary with Windows/Linux line endings, tabs vs spaces, trailing returns, BOM headers, etc. |
|||
?entropy(get_text(open(substitute(command_line()[2],".exe",".exw")),"rb"))</lang> |
|||
{{out}} |
|||
<pre> |
|||
4.993666233 |
|||
</pre> |
|||
=={{header|PHP}}== |
=={{header|PHP}}== |
||
< |
<syntaxhighlight lang="php"><?php |
||
$h = 0; |
$h = 0; |
||
$s = file_get_contents(__FILE__); |
$s = file_get_contents(__FILE__); |
||
Line 630: | Line 813: | ||
( $c / $l ) * |
( $c / $l ) * |
||
log( $c / $l, 2 ); |
log( $c / $l, 2 ); |
||
echo $h;</ |
echo $h;</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>2.9339128173013</pre> |
<pre>2.9339128173013</pre> |
||
=={{header|Picat}}== |
|||
{{trans|Go}} |
|||
{{works with|Picat}} |
|||
<syntaxhighlight lang="picat"> |
|||
entropy(File) = E => |
|||
Bytes = read_file_bytes(File), |
|||
F = [0: I in 1..256], |
|||
foreach (B in Bytes) |
|||
B1 := B + 1, |
|||
F[B1] := F[B1] + 1 |
|||
end, |
|||
HM = 0, |
|||
foreach (C in F) |
|||
if (C > 0) then |
|||
HM := HM + C * log(2, C) |
|||
end |
|||
end, |
|||
L = Bytes.length, |
|||
E = log(2, L) - HM / L. |
|||
main(Args) => |
|||
printf("Entropy: %f\n", entropy(Args[1])). |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
$ picat entropy.pi entropy.pi |
|||
Entropy: 4.384622 |
|||
</pre> |
|||
=={{header|PicoLisp}}== |
|||
<syntaxhighlight lang="picolisp"> |
|||
(scl 8) |
|||
(load "@lib/math.l") |
|||
(setq LN2 0.693147180559945309417) |
|||
(setq Me |
|||
(let F (file) |
|||
(pack (car F) (cadr F)))) |
|||
(setq Hist NIL Sz 0) |
|||
(in Me |
|||
(use Ch |
|||
(while (setq Ch (rd 1)) |
|||
(inc 'Sz) |
|||
(if (assoc Ch Hist) |
|||
(con @ (inc (cdr @))) |
|||
(setq Hist (cons (cons Ch 1) Hist)))))) |
|||
(prinl "My entropy is " |
|||
(format |
|||
(*/ |
|||
(sum |
|||
'((Pair) |
|||
(let R (*/ (cdr Pair) 1. Sz) |
|||
(- (*/ R (log R) 1.)))) |
|||
Hist) |
|||
1. LN2) |
|||
*Scl)) |
|||
(bye) |
|||
</syntaxhighlight> |
|||
{{Out}} |
|||
<pre> |
|||
My entropy is 4.12169822 |
|||
</pre> |
|||
=={{header|Python}}== |
=={{header|Python}}== |
||
Line 640: | Line 890: | ||
Minor edit to the [[Entropy#Python|Entropy]] answer. |
Minor edit to the [[Entropy#Python|Entropy]] answer. |
||
< |
<syntaxhighlight lang="python">import math |
||
from collections import Counter |
from collections import Counter |
||
Line 650: | Line 900: | ||
b=f.read() |
b=f.read() |
||
print(entropy(b))</ |
print(entropy(b))</syntaxhighlight> |
||
{{Output}} |
{{Output}} |
||
<pre>4.575438063744619</pre> |
<pre>4.575438063744619</pre> |
||
Line 656: | Line 906: | ||
=={{header|Racket}}== |
=={{header|Racket}}== |
||
The entropy of the program below is 4.512678555350348. |
The entropy of the program below is 4.512678555350348. |
||
< |
<syntaxhighlight lang="racket"> |
||
#lang racket |
#lang racket |
||
(require math) |
(require math) |
||
Line 664: | Line 914: | ||
(- (for/sum ([(d c) (in-hash (samples->hash ds))]) |
(- (for/sum ([(d c) (in-hash (samples->hash ds))]) |
||
(* (/ c n) (log2 (/ c n))))) |
(* (/ c n) (log2 (/ c n))))) |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Raku}}== |
|||
(formerly Perl 6) |
|||
{{Works with|rakudo|2016.05}} |
|||
<syntaxhighlight lang="raku" line>say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_ |
|||
given slurp($*PROGRAM-NAME).comb</syntaxhighlight> |
|||
Result should be in the neighborhood of 4.9 |
|||
{{out}} |
|||
<pre>4.89351613053006</pre> |
|||
=={{header|REXX}}== |
=={{header|REXX}}== |
||
REXX doesn't have a BIF (built-in function) for '''log''' or '''ln''', so the subroutine (function) '''log2''' is included herein. |
REXX doesn't have a BIF (built-in function) for '''log''' or '''ln''', so the subroutine (function) '''log2''' is included herein. |
||
< |
<syntaxhighlight lang="rexx">/*REXX program calculates the "information entropy" for ~this~ REXX program. */ |
||
numeric digits |
numeric digits length( e() ) % 2 - length(.) /*use 1/2 of the decimal digits of E. */ |
||
#=0; @.=0; $=; $$=; recs=sourceline() |
#= 0; @.= 0; $=; $$=; recs= sourceline() /*define some handy─dandy REXX vars. */ |
||
do m=1 for recs; $=$||sourceLine(m) /* [↓] obtain program source and ──► $*/ |
|||
end /*m*/ /* [↑] $ str won't have any meta chars*/ |
|||
$=$ || sourceline(m) /*get a sourceLine of this REXX program*/ |
|||
end /*m*/ /* [↑] $ str won't have any meta chars*/ |
|||
L=length($) /*the byte length of this REXX program.*/ |
L=length($) /*the byte length of this REXX program.*/ |
||
do j=1 for L; _= substr($, j, 1) /*process each character in $ string.*/ |
|||
if @._==0 then do; #= # + 1 /*¿Character unique? Bump char counter*/ |
|||
$$= $$ || _ /*add this character to the $$ list. */ |
|||
end |
|||
@._= @._ + 1 /*keep track of this character's count.*/ |
|||
end /*j*/ /* [↑] characters are all 8─bit bytes.*/ |
|||
sum= 0 /*calculate info entropy for each char.*/ |
|||
do i=1 for #; _= substr($$, i, 1) /*obtain a character from unique list. */ |
|||
sum= sum - @._ / L * log2(@._ / L) /*add {negatively} the char entropies. */ |
|||
end /*i*/ |
|||
end /*i*/ |
|||
say ' program length: ' L /*pgm length doesn't include meta chars*/ |
say ' program length: ' L /*pgm length doesn't include meta chars*/ |
||
say 'program statements: ' recs /*pgm statements are actually pgm lines*/ |
say 'program statements: ' recs /*pgm statements are actually pgm lines*/ |
||
say ' unique characters: ' #; |
say ' unique characters: ' #; say /*characters are 8─bit bytes of the pgm*/ |
||
say 'The information entropy of this REXX program ──► ' format(sum,,12) |
say 'The information entropy of this REXX program ──► ' format(sum,,12) |
||
exit /*stick a fork in it, we're all done. */ |
exit /*stick a fork in it, we're all done. */ |
||
/*──────────────────────────────────────────────────────────────────────────────────────*/ |
/*──────────────────────────────────────────────────────────────────────────────────────*/ |
||
e: e= 2.718281828459045235360287471352662497757247093699959574966967627724076630; return e |
|||
log2: procedure; parse arg x 1 ox; ig= x>1.5; ii=0; is=1 - 2 * (ig\==1) |
|||
/*──────────────────────────────────────────────────────────────────────────────────────*/ |
|||
numeric digits digits()+5 /* [↓] precision of E must be≥digits()*/ |
|||
log2: procedure; parse arg x 1 ox; ig= x>1.5; ii= 0; is= 1 - 2 * (ig\==1) |
|||
e=2.71828182845904523536028747135266249775724709369995957496696762772407663035354759 |
|||
numeric digits digits()+5; call e /*the precision of E must be≥digits(). */ |
|||
do while ig & ox>1.5 | \ig&ox<.5; _= e; do j=-1; iz= ox * _ ** -is |
|||
if j>=0 & (ig & iz<1 | \ig&iz>.5) then leave; _= _ * _; izz= iz; end /*j*/ |
|||
ox=izz; ii=ii+is*2**j; end /*while*/; x= x * e** -ii -1; z= 0; _= -1; p= z |
|||
do k=1; _= -_ * x; z= z+_/k; if z=p then leave; p= z; end /*k*/ |
|||
r= z + ii; if arg()==2 then return r; return r / log2(2,.)</syntaxhighlight> |
|||
'''output''' |
|||
{{out|output|text= when using this REXX program as input:}} |
|||
<pre> |
<pre> |
||
program length: |
program length: 2631 |
||
program statements: |
program statements: 31 |
||
unique characters: |
unique characters: 79 |
||
The information entropy of this REXX program ──► 4. |
The information entropy of this REXX program ──► 4.362691425984 |
||
</pre> |
</pre> |
||
=={{header|Ruby}}== |
=={{header|Ruby}}== |
||
< |
<syntaxhighlight lang="ruby">def entropy(s) |
||
counts = s.each_char. |
counts = s.each_char.tally |
||
size = s.size.to_f |
|||
counts.values.reduce(0) do |entropy, count| |
counts.values.reduce(0) do |entropy, count| |
||
freq = count / |
freq = count / size |
||
entropy - freq * Math.log2(freq) |
entropy - freq * Math.log2(freq) |
||
end |
end |
||
end |
end |
||
s = File.read(__FILE__) |
s = File.read(__FILE__) |
||
p entropy(s) |
p entropy(s) |
||
</syntaxhighlight> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre>4.653607496799478 |
||
4.885234973253878 |
|||
</pre> |
</pre> |
||
=={{header|Rust}}== |
=={{header|Rust}}== |
||
< |
<syntaxhighlight lang="rust">use std::fs::File; |
||
use std::io::{Read, BufReader}; |
use std::io::{Read, BufReader}; |
||
Line 753: | Line 1,010: | ||
let file = BufReader::new(File::open(name).expect("Could not read file.")); |
let file = BufReader::new(File::open(name).expect("Could not read file.")); |
||
println!("Entropy is {}.", entropy(file.bytes().flatten())); |
println!("Entropy is {}.", entropy(file.bytes().flatten())); |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>Entropy is 5.7108583.</pre> |
<pre>Entropy is 5.7108583.</pre> |
||
=={{header|Sidef}}== |
=={{header|Sidef}}== |
||
< |
<syntaxhighlight lang="ruby">func entropy(s) { |
||
[0, |
[0, |
||
s.chars.freq.values.map {|c| |
s.chars.freq.values.map {|c| |
||
Line 767: | Line 1,024: | ||
} |
} |
||
say entropy(File(__FILE__).open_r.slurp)</ |
say entropy(File(__FILE__).open_r.slurp)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 775: | Line 1,032: | ||
=={{header|Tcl}}== |
=={{header|Tcl}}== |
||
Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program. |
Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program. |
||
< |
<syntaxhighlight lang="tcl">proc entropy {str} { |
||
set log2 [expr log(2)] |
set log2 [expr log(2)] |
||
foreach char [split $str ""] {dict incr counts $char} |
foreach char [split $str ""] {dict incr counts $char} |
||
Line 786: | Line 1,043: | ||
} |
} |
||
puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]</ |
puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
entropy = 4.59099 |
entropy = 4.59099 |
||
</pre> |
</pre> |
||
=={{header|V (Vlang)}}== |
|||
{{trans|Go}} |
|||
<syntaxhighlight lang="go">import os |
|||
import math |
|||
fn main() { |
|||
println("Binary file entropy: ${entropy(os.args[0])?}") |
|||
} |
|||
fn entropy(file string) ?f64 { |
|||
d := os.read_bytes(file)? |
|||
mut f := [256]f64{} |
|||
for b in d { |
|||
f[b]++ |
|||
} |
|||
mut hm := 0.0 |
|||
for c in f { |
|||
if c > 0 { |
|||
hm += c * math.log2(c) |
|||
} |
|||
} |
|||
l := f64(d.len) |
|||
return math.log2(l) - hm/l |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
Binary file entropy: 5.676177202237735 |
|||
</pre> |
|||
=={{header|Wren}}== |
|||
Basically an amalgam of the code in the [[Print_itself#Wren]] and [[Entropy#Wren]] tasks. |
|||
<syntaxhighlight lang="wren">import "os" for Process |
|||
import "io" for File |
|||
var args = Process.allArguments |
|||
var s = File.read(args[1]).trim() |
|||
var m = {} |
|||
for (c in s) { |
|||
var d = m[c] |
|||
m[c] = (d) ? d + 1 : 1 |
|||
} |
|||
var hm = 0 |
|||
for (k in m.keys) { |
|||
var c = m[k] |
|||
hm = hm + c * c.log2 |
|||
} |
|||
var l = s.count |
|||
System.print(l.log2 - hm/l)</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
4.6302314663 |
|||
</pre> |
|||
=={{header|XPL0}}== |
|||
To run: entropy < entropy.xpl |
|||
<syntaxhighlight lang "XPL0">int Count(128), I, Len, Ch; |
|||
real Sum, Prob; |
|||
[for I:= 0 to 127 do Count(I):= 0; |
|||
Len:= 0; |
|||
loop [Ch:= ChIn(1); |
|||
if Ch = $1A\EOF\ then quit; |
|||
Count(Ch):= Count(Ch)+1; |
|||
Len:= Len+1; |
|||
]; |
|||
Sum:= 0.; |
|||
for I:= 0 to 127 do |
|||
if Count(I) then |
|||
[Prob:= float(Count(I)) / float(Len); |
|||
Sum:= Sum + Prob*Ln(Prob); |
|||
]; |
|||
RlOut(0, -Sum/Ln(2.)); |
|||
]</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
4.63457</pre> |
|||
=={{header|zkl}}== |
=={{header|zkl}}== |
||
Minor edit to the [[Entropy#zkl|Entropy]] answer. |
Minor edit to the [[Entropy#zkl|Entropy]] answer. |
||
< |
<syntaxhighlight lang="zkl">fcn entropy(text){ |
||
text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq } |
text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq } |
||
.fp1((0).pump(256,List,(0.0).create.fp(0)).copy())) |
.fp1((0).pump(256,List,(0.0).create.fp(0)).copy())) |
||
Line 803: | Line 1,138: | ||
} |
} |
||
entropy(File("entropy.zkl").read().text).println();</ |
entropy(File("entropy.zkl").read().text).println();</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
Latest revision as of 09:02, 22 April 2024
You are encouraged to solve this task according to the task description, using any language you may know.
- Task
Write a computer program that computes and shows its own entropy.
- Related Tasks
Ada
with Ada.Text_Io;
with Ada.Command_Line;
with Ada.Numerics.Elementary_Functions;
procedure Entropy is
use Ada.Text_Io;
type Hist_Type is array (Character) of Natural;
function Log_2 (V : Float) return Float is
use Ada.Numerics.Elementary_Functions;
begin
return Log (V) / Log (2.0);
end Log_2;
procedure Read_File (Name : String; Hist : out Hist_Type) is
File : File_Type;
Char : Character;
begin
Hist := (others => 0);
Open (File, In_File, Name);
while not End_Of_File (File) loop
Get (File, Char);
Hist (Char) := Hist (Char) + 1;
end loop;
Close (File);
end Read_File;
function Length_Of (Hist : Hist_Type) return Natural is
Sum : Natural := 0;
begin
for V of Hist loop
Sum := Sum + V;
end loop;
return Sum;
end Length_Of;
function Entropy_Of (Hist : Hist_Type) return Float is
Length : constant Float := Float (Length_Of (Hist));
Sum : Float := 0.0;
begin
for V of Hist loop
if V > 0 then
Sum := Sum + Float (V) / Length * Log_2 (Float (V) / Length);
end if;
end loop;
return -Sum;
end Entropy_Of;
package Float_Io is new Ada.Text_Io.Float_Io (Float);
Name : constant String := Ada.Command_Line.Argument (1);
Hist : Hist_Type;
Entr : Float;
begin
Float_Io.Default_Exp := 0;
Float_Io.Default_Aft := 6;
Read_File (Name, Hist);
Entr := Entropy_Of (Hist);
Put ("Entropy of '");
Put (Name);
Put ("' is ");
Float_Io.Put (Entr);
New_Line;
end Entropy;
- Output:
Entropy of 'entropy.adb' is 4.559854
ALGOL 68
Assumes the source file is in the current directory and called "entropyNarcissist.a68".
Note that the source here uses spaces, not tabs, hence the low entropy, replacing all runs of four spaces with a single space
results in an entropy of +4.64524532762062e +0.
BEGIN
# calculate the shannon entropy of a string #
PROC shannon entropy = ( STRING s )REAL:
BEGIN
INT string length = ( UPB s - LWB s ) + 1;
# count the occurances of each character #
[ 0 : max abs char ]INT char count;
FOR char pos FROM LWB char count TO UPB char count DO
char count[ char pos ] := 0
OD;
FOR char pos FROM LWB s TO UPB s DO
char count[ ABS s[ char pos ] ] +:= 1
OD;
# calculate the entropy, we use log base 10 and then convert #
# to log base 2 after calculating the sum #
REAL entropy := 0;
FOR char pos FROM LWB char count TO UPB char count DO
IF char count[ char pos ] /= 0
THEN
# have a character that occurs in the string #
REAL probability = char count[ char pos ] / string length;
entropy -:= probability * log( probability )
FI
OD;
entropy / log( 2 )
END; # shannon entropy #
IF FILE input file;
STRING file name = "entropyNarcissist.a68";
open( input file, file name, stand in channel ) /= 0
THEN
# failed to open the file #
print( ( "Unable to open """ + file name + """", newline ) )
ELSE
# file opened OK #
BOOL at eof := FALSE;
# set the EOF handler for the file #
on logical file end( input file
, ( REF FILE f )BOOL:
BEGIN
# note that we reached EOF on the latest read #
at eof := TRUE;
# return TRUE so processing can continue #
TRUE
END
);
# construct a string containing the whole file #
STRING file contents := "";
WHILE STRING line;
get( input file, ( line, newline ) );
NOT at eof
DO
file contents +:= line + REPR 12
OD;
close( input file );
# show the entropy of the file cotents #
print( ( shannon entropy( file contents ), newline ) )
FI
END
- Output:
+3.93440186690189e +0
AutoHotkey
FileRead, var, *C %A_ScriptFullPath%
MsgBox, % Entropy(var)
Entropy(n) {
a := [], len := StrLen(n), m := n
while StrLen(m) {
s := SubStr(m, 1, 1)
m := RegExReplace(m, s, "", c)
a[s] := c
}
for key, val in a {
m := Log(p := val / len)
e -= p * m / Log(2)
}
return, e
}
- Output:
5.942956
AWK
The record separator RS is set to end of file. So getline reads the whole file in one line.
BEGIN{FS=""
RS="\x04"#EOF
getline<"entropy.awk"
for(i=1;i<=NF;i++)H[$i]++
for(i in H)E-=(h=H[i]/NF)*log(h)
print "bytes ",NF," entropy ",E/log(2)
exit}
- Output:
bytes 158 entropy 5.2802
BBC BASIC
DIM Freq%(255)
FOR I%=PAGE TO LOMEM Freq%(?I%)+=1 NEXT
Size=LOMEM - PAGE
FOR I%=0 TO 255
IF Freq%(I%) Entropy+=Freq%(I%) / Size * LN(Freq%(I%) / Size) / LN(2)
NEXT
PRINT "My size is ";Size " bytes and my entropy is ";-Entropy "!"
END
- Output:
My size is 224 bytes and my entropy is 5.11257089!
C
Minor edit to the Entropy answer.
Assumes that the source file is stored in the working directory as "entropy.c".
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#define MAXLEN 961 //maximum string length
int makehist(char *S,int *hist,int len){
int wherechar[256];
int i,histlen;
histlen=0;
for(i=0;i<256;i++)wherechar[i]=-1;
for(i=0;i<len;i++){
if(wherechar[(int)S[i]]==-1){
wherechar[(int)S[i]]=histlen;
histlen++;
}
hist[wherechar[(int)S[i]]]++;
}
return histlen;
}
double entropy(int *hist,int histlen,int len){
int i;
double H;
H=0;
for(i=0;i<histlen;i++){
H-=(double)hist[i]/len*log2((double)hist[i]/len);
}
return H;
}
int main(void){
char S[MAXLEN];
int len,*hist,histlen;
double H;
FILE *f;
f=fopen("entropy.c","r");
for(len=0;!feof(f);len++)S[len]=fgetc(f);
S[--len]='\0';
hist=(int*)calloc(len,sizeof(int));
histlen=makehist(S,hist,len);
//hist now has no order (known to the program) but that doesn't matter
H=entropy(hist,histlen,len);
printf("%lf\n",H);
return 0;
}
- Output:
5.195143
C++
#include <iostream>
#include <fstream>
#include <cmath>
using namespace std;
string readFile (string path) {
string contents;
string line;
ifstream inFile(path);
while (getline (inFile, line)) {
contents.append(line);
contents.append("\n");
}
inFile.close();
return contents;
}
double entropy (string X) {
const int MAXCHAR = 127;
int N = X.length();
int count[MAXCHAR];
double count_i;
char ch;
double sum = 0.0;
for (int i = 0; i < MAXCHAR; i++) count[i] = 0;
for (int pos = 0; pos < N; pos++) {
ch = X[pos];
count[(int)ch]++;
}
for (int n_i = 0; n_i < MAXCHAR; n_i++) {
count_i = count[n_i];
if (count_i > 0) sum -= count_i / N * log2(count_i / N);
}
return sum;
}
int main () {
cout<<entropy(readFile("entropy.cpp"));
return 0;
}
- Output:
4.58688
Crystal
def entropy(s)
counts = s.chars.each_with_object(Hash(Char, Float64).new(0.0)) { |c, h| h[c] += 1 }
counts.values.sum do |count|
freq = count / s.size
-freq * Math.log2(freq)
end
end
puts entropy File.read(__FILE__)
- Output:
4.963709090807145
D
void main(in string[] args) {
import std.stdio, std.algorithm, std.math, std.file;
auto data = sort(cast(ubyte[])args[0].read);
return data
.group
.map!(g => g[1] / double(data.length))
.map!(p => -p * p.log2)
.sum
.writeln;
}
- Output:
6.29803
Elixir
File.open(__ENV__.file, [:read], fn(file) ->
text = IO.read(file, :all)
leng = String.length(text)
String.codepoints(text)
|> Enum.group_by(&(&1))
|> Enum.map(fn{_,value} -> length(value) end)
|> Enum.reduce(0, fn count, entropy ->
freq = count / leng
entropy - freq * :math.log2(freq)
end)
|> IO.puts
end)
- Output:
4.848342673395324
Emacs Lisp
(defun shannon-entropy (input)
(let ((freq-table (make-hash-table))
(entropy 0)
(length (+ (length input) 0.0)))
(mapcar (lambda (x)
(puthash x
(+ 1 (gethash x freq-table 0))
freq-table))
input)
(maphash (lambda (k v)
(set 'entropy (+ entropy
(* (/ v length)
(log (/ v length) 2)))))
freq-table)
(- entropy)))
(defun narcissist ()
(shannon-entropy (with-temp-buffer
(insert-file-contents "U:/rosetta/narcissist.el")
(buffer-string))))
- Output:
(narcissist)
4.5129548515535785
Erlang
#! /usr/bin/escript
-define(LOG2E, 1.44269504088896340735992).
main(_) ->
Self = escript:script_name(),
{ok, Contents} = file:read_file(Self),
io:format("My entropy is ~p~n", [entropy(Contents)]).
entropy(Data) ->
Frq = count(Data),
maps:fold(fun(_, C, E) ->
P = C / byte_size(Data),
E - P*math:log(P)
end, 0, Frq) * ?LOG2E.
count(Data) -> count(Data, 0, #{}).
count(Data, I, Frq) when I =:= byte_size(Data) -> Frq;
count(Data, I, Frq) ->
Chr = binary:at(Data, I),
case Frq of
#{Chr := K} -> count(Data, I+1, Frq #{Chr := K+1});
_ -> count(Data, I+1, Frq #{Chr => 1})
end.
- Output:
My entropy is 5.00988934931771
Factor
USING: assocs io io.encodings.utf8 io.files kernel math
math.functions math.statistics prettyprint sequences ;
IN: rosetta-code.entropy-narcissist
: entropy ( seq -- entropy )
[ length ] [ histogram >alist [ second ] map ] bi
[ swap / ] with map
[ dup log 2 log / * ] map-sum neg ;
"entropy-narcissist.factor" utf8 [
contents entropy .
] with-file-reader
- Output:
4.591946214804276
FreeBASIC
' version 01-06-2016
' compile with: fbc -s console
' modified code from ENTROPY entry
Dim As Integer i, count, totalchar(255)
Dim As UByte buffer
Dim As Double prop, entropy
' command (0) returns the name of this program (including the path)
Dim As String slash, filename = Command(0)
Dim As Integer ff = FreeFile ' find first free filenumber
Open filename For Binary As #ff
If Err > 0 Then ' should not happen
Print "Error opening the file"
Beep : Sleep 5000, 1
End
End If
' will read 1 UByte from the file until it reaches the end of the file
For i = 1 To Lof(ff)
Get #ff, ,buffer
totalchar(buffer) += 1
count = count + 1
Next
For i = 0 To 255
If totalchar(i) = 0 Then Continue For
prop = totalchar(i) / count
entropy = entropy - (prop * Log (prop) / Log(2))
Next
' next lines are only compiled when compiling for Windows OS (32/64)
#Ifdef __FB_WIN32__
slash = chr(92)
print "Windows version"
#endif
#Ifdef __FB_LINUX__
slash = chr(47)
print "LINUX version"
#EndIf
i = InStrRev(filename, slash)
If i <> 0 Then filename = Right(filename, Len(filename)-i)
Print "My name is "; filename
Print : Print "The Entropy of myself is"; entropy
Print
' empty keyboard buffer
While InKey <> "" : Wend
Print : Print "hit any key to end program"
Sleep
End
- Output:
Windows version My name is entropy_narcissist.exe The Entropy of myself is 6.142286625408597 LINUX version My name is entropy_narcissist The Entropy of myself is 5.450343613062795
Go
package main
import (
"fmt"
"io/ioutil"
"log"
"math"
"os"
"runtime"
)
func main() {
_, src, _, _ := runtime.Caller(0)
fmt.Println("Source file entropy:", entropy(src))
fmt.Println("Binary file entropy:", entropy(os.Args[0]))
}
func entropy(file string) float64 {
d, err := ioutil.ReadFile(file)
if err != nil {
log.Fatal(err)
}
var f [256]float64
for _, b := range d {
f[b]++
}
hm := 0.
for _, c := range f {
if c > 0 {
hm += c * math.Log2(c)
}
}
l := float64(len(d))
return math.Log2(l) - hm/l
}
- Output:
Source file entropy: 5.038501725029859 Binary file entropy: 5.388171194771937
Haskell
import qualified Data.ByteString as BS
import Data.List
import System.Environment
(>>>) = flip (.)
main = getArgs >>= head >>> BS.readFile >>= BS.unpack >>> entropy >>> print
entropy = sort >>> group >>> map genericLength >>> normalize >>> map lg >>> sum
where lg c = -c * logBase 2 c
normalize c = let sc = sum c in map (/ sc) c
- Output:
In a shell
$ ghc --make -O3 Narcissist.hs
Entropy of the source
$ ./Narcissist Narcissist.hs 4.452645183154108
Entropy of the binary
$ ./Narcissist Narcissist 5.525417236346172
J
Solution:
entropy=: +/@:-@(* 2&^.)@(#/.~ % #)
1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3''
Example:
load 'entropy.ijs'
4.73307
Java
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class EntropyNarcissist {
private static final String FILE_NAME = "src/EntropyNarcissist.java";
public static void main(String[] args) {
System.out.printf("Entropy of file \"%s\" = %.12f.%n", FILE_NAME, getEntropy(FILE_NAME));
}
private static double getEntropy(String fileName) {
Map<Character,Integer> characterCount = new HashMap<>();
int length = 0;
try (BufferedReader reader = new BufferedReader(new FileReader(new File(fileName)));) {
int c = 0;
while ( (c = reader.read()) != -1 ) {
characterCount.merge((char) c, 1, (v1, v2) -> v1 + v2);
length++;
}
}
catch ( IOException e ) {
throw new RuntimeException(e);
}
double entropy = 0;
for ( char key : characterCount.keySet() ) {
double fraction = (double) characterCount.get(key) / length;
entropy -= fraction * Math.log(fraction);
}
return entropy / Math.log(2);
}
}
- Output:
Entropy of file "src/EntropyNarcissist.java" = 4.691381977073.
jq
Works with jq, the C implementation of jq
Works with gojq, the Go implementation of jq
Works with jaq, the Rust implementation of jq
The program assumes it will be presented to itself using an invocation of jq with the -sR options, along the lines of:
jq -sR -f entropy-narcissist.jq < entropy-narcissist.jq
If your jq supports `keys_unsorted`, feel free to use it instead of `keys`.
def chars: explode[] | [.] | implode;
def bow(stream):
reduce stream as $word ({}; .[($word|tostring)] += 1);
def sum(s): reduce s as $x (0; .+$x);
length as $l
| bow(chars)
| sum(keys[] as $k | .[$k] as $c | $c * ($c|log2) )
| ($l|log2) - ./$l
- Output:
{{output}} <pre> 4.796499915496963
Julia
using DataStructures
entropy(s) = -sum(x -> x / length(s) * log2(x / length(s)), values(counter(s)))
println("self-entropy: ", entropy(read(Base.source_path(), String)))
- Output:
self-entropy: 4.716527560525572
Kotlin
// version 1.1.0 (entropy_narc.kt)
fun log2(d: Double) = Math.log(d) / Math.log(2.0)
fun shannon(s: String): Double {
val counters = mutableMapOf<Char, Int>()
for (c in s) {
if (counters.containsKey(c)) counters[c] = counters[c]!! + 1
else counters.put(c, 1)
}
val nn = s.length.toDouble()
var sum = 0.0
for (key in counters.keys) {
val term = counters[key]!! / nn
sum += term * log2(term)
}
return -sum
}
fun main(args: Array<String>) {
val prog = java.io.File("entropy_narc.kt").readText()
println("This program's entropy is ${"%18.16f".format(shannon(prog))}")
}
- Output:
This program's entropy is 4.8471803665906705
Lua
arg[0] gives the path of the script currently being executed
function getFile (filename)
local inFile = io.open(filename, "r")
local fileContent = inFile:read("*all")
inFile:close()
return fileContent
end
function log2 (x) return math.log(x) / math.log(2) end
function entropy (X)
local N, count, sum, i = X:len(), {}, 0
for char = 1, N do
i = X:sub(char, char)
if count[i] then
count[i] = count[i] + 1
else
count[i] = 1
end
end
for n_i, count_i in pairs(count) do
sum = sum + count_i / N * log2(count_i / N)
end
return -sum
end
print(entropy(getFile(arg[0])))
- Output:
4.3591214356783
Nim
As we have compiled without specific options to change the way the executable is named, we can retrieve the source file name by adding the suffix “.nim” to the executable file name. We suppose also that the source file is in the same directory as the executable (which is true in our environment).
import os, math, strutils, tables
let execName = getAppFilename().splitPath().tail
let srcName = execName & ".nim"
func entropy(str: string): float =
var counts: CountTable[char]
for ch in str:
counts.inc(ch)
for count in counts.values:
result -= count / str.len * log2(count / str.len)
echo "Source file entropy: ", srcName.readFile().entropy().formatFloat(ffDecimal, 5)
echo "Binary file entropy: ", execName.readFile().entropy().formatFloat(ffDecimal, 5)
- Output:
Source file entropy: 4.75555 Binary file entropy: 5.97036
PARI/GP
entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2);
entropy(Str(entropy))
- Output:
%1 = 4.54978213
Perl
#!/usr/bin/perl
use strict ;
use warnings ;
use feature 'say' ;
sub log2 {
my $number = shift ;
return log( $number ) / log( 2 ) ;
}
open my $fh , "<" , $ARGV[ 0 ] or die "Can't open $ARGV[ 0 ]$!\n" ;
my %frequencies ;
my $totallength = 0 ;
while ( my $line = <$fh> ) {
chomp $line ;
next if $line =~ /^$/ ;
map { $frequencies{ $_ }++ } split( // , $line ) ;
$totallength += length ( $line ) ;
}
close $fh ;
my $infocontent = 0 ;
for my $letter ( keys %frequencies ) {
my $content = $frequencies{ $letter } / $totallength ;
$infocontent += $content * log2( $content ) ;
}
$infocontent *= -1 ;
say "The information content of the source file is $infocontent !" ;
- Output:
The information content of the source file is 4.6487923749222 !
Phix
Minor edit to the Entropy answer, if compiled assumes source code is in the same directory.
without js -- command_line, file i/o function entropy(sequence s) sequence symbols = {}, counts = {} integer N = length(s) for i=1 to N do object si = s[i] integer k = find(si,symbols) if k=0 then symbols = append(symbols,si) counts = append(counts,1) else counts[k] += 1 end if end for atom H = 0 for i=1 to length(counts) do atom ci = counts[i]/N H -= ci*log2(ci) end for return H end function ?entropy(get_text(open(substitute(command_line()[2],".exe",".exw")),"rb"))
Output is eg 4.993666233, but that may vary with Windows/Linux line endings, tabs vs spaces, trailing returns, BOM headers, etc.
PHP
<?php
$h = 0;
$s = file_get_contents(__FILE__);
$l = strlen($s);
foreach ( count_chars($s, 1) as $c )
$h -=
( $c / $l ) *
log( $c / $l, 2 );
echo $h;
- Output:
2.9339128173013
Picat
entropy(File) = E =>
Bytes = read_file_bytes(File),
F = [0: I in 1..256],
foreach (B in Bytes)
B1 := B + 1,
F[B1] := F[B1] + 1
end,
HM = 0,
foreach (C in F)
if (C > 0) then
HM := HM + C * log(2, C)
end
end,
L = Bytes.length,
E = log(2, L) - HM / L.
main(Args) =>
printf("Entropy: %f\n", entropy(Args[1])).
- Output:
$ picat entropy.pi entropy.pi Entropy: 4.384622
PicoLisp
(scl 8)
(load "@lib/math.l")
(setq LN2 0.693147180559945309417)
(setq Me
(let F (file)
(pack (car F) (cadr F))))
(setq Hist NIL Sz 0)
(in Me
(use Ch
(while (setq Ch (rd 1))
(inc 'Sz)
(if (assoc Ch Hist)
(con @ (inc (cdr @)))
(setq Hist (cons (cons Ch 1) Hist))))))
(prinl "My entropy is "
(format
(*/
(sum
'((Pair)
(let R (*/ (cdr Pair) 1. Sz)
(- (*/ R (log R) 1.))))
Hist)
1. LN2)
*Scl))
(bye)
- Output:
My entropy is 4.12169822
Python
Minor edit to the Entropy answer.
import math
from collections import Counter
def entropy(s):
p, lns = Counter(s), float(len(s))
return -sum( count/lns * math.log(count/lns, 2) for count in p.values())
with open(__file__) as f:
b=f.read()
print(entropy(b))
- Output:
4.575438063744619
Racket
The entropy of the program below is 4.512678555350348.
#lang racket
(require math)
(define (log2 x) (/ (log x) (log 2)))
(define ds (string->list (file->string "entropy.rkt")))
(define n (length ds))
(- (for/sum ([(d c) (in-hash (samples->hash ds))])
(* (/ c n) (log2 (/ c n)))))
Raku
(formerly Perl 6)
say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_
given slurp($*PROGRAM-NAME).comb
Result should be in the neighborhood of 4.9
- Output:
4.89351613053006
REXX
REXX doesn't have a BIF (built-in function) for log or ln, so the subroutine (function) log2 is included herein.
/*REXX program calculates the "information entropy" for ~this~ REXX program. */
numeric digits length( e() ) % 2 - length(.) /*use 1/2 of the decimal digits of E. */
#= 0; @.= 0; $=; $$=; recs= sourceline() /*define some handy─dandy REXX vars. */
do m=1 for recs; $=$||sourceLine(m) /* [↓] obtain program source and ──► $*/
end /*m*/ /* [↑] $ str won't have any meta chars*/
L=length($) /*the byte length of this REXX program.*/
do j=1 for L; _= substr($, j, 1) /*process each character in $ string.*/
if @._==0 then do; #= # + 1 /*¿Character unique? Bump char counter*/
$$= $$ || _ /*add this character to the $$ list. */
end
@._= @._ + 1 /*keep track of this character's count.*/
end /*j*/ /* [↑] characters are all 8─bit bytes.*/
sum= 0 /*calculate info entropy for each char.*/
do i=1 for #; _= substr($$, i, 1) /*obtain a character from unique list. */
sum= sum - @._ / L * log2(@._ / L) /*add {negatively} the char entropies. */
end /*i*/
say ' program length: ' L /*pgm length doesn't include meta chars*/
say 'program statements: ' recs /*pgm statements are actually pgm lines*/
say ' unique characters: ' #; say /*characters are 8─bit bytes of the pgm*/
say 'The information entropy of this REXX program ──► ' format(sum,,12)
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: e= 2.718281828459045235360287471352662497757247093699959574966967627724076630; return e
/*──────────────────────────────────────────────────────────────────────────────────────*/
log2: procedure; parse arg x 1 ox; ig= x>1.5; ii= 0; is= 1 - 2 * (ig\==1)
numeric digits digits()+5; call e /*the precision of E must be≥digits(). */
do while ig & ox>1.5 | \ig&ox<.5; _= e; do j=-1; iz= ox * _ ** -is
if j>=0 & (ig & iz<1 | \ig&iz>.5) then leave; _= _ * _; izz= iz; end /*j*/
ox=izz; ii=ii+is*2**j; end /*while*/; x= x * e** -ii -1; z= 0; _= -1; p= z
do k=1; _= -_ * x; z= z+_/k; if z=p then leave; p= z; end /*k*/
r= z + ii; if arg()==2 then return r; return r / log2(2,.)
- output when using this REXX program as input:
program length: 2631 program statements: 31 unique characters: 79 The information entropy of this REXX program ──► 4.362691425984
Ruby
def entropy(s)
counts = s.each_char.tally
size = s.size.to_f
counts.values.reduce(0) do |entropy, count|
freq = count / size
entropy - freq * Math.log2(freq)
end
end
s = File.read(__FILE__)
p entropy(s)
- Output:
4.653607496799478
Rust
use std::fs::File;
use std::io::{Read, BufReader};
fn entropy<I: IntoIterator<Item = u8>>(iter: I) -> f32 {
let mut histogram = [0u64; 256];
let mut len = 0u64;
for b in iter {
histogram[b as usize] += 1;
len += 1;
}
histogram
.iter()
.cloned()
.filter(|&h| h > 0)
.map(|h| h as f32 / len as f32)
.map(|ratio| -ratio * ratio.log2())
.sum()
}
fn main() {
let name = std::env::args().nth(0).expect("Could not get program name.");
let file = BufReader::new(File::open(name).expect("Could not read file."));
println!("Entropy is {}.", entropy(file.bytes().flatten()));
}
- Output:
Entropy is 5.7108583.
Sidef
func entropy(s) {
[0,
s.chars.freq.values.map {|c|
var f = c/s.len
f * f.log2
}...
]«-»
}
say entropy(File(__FILE__).open_r.slurp)
- Output:
4.27307750866434915713432109186549
Tcl
Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program.
proc entropy {str} {
set log2 [expr log(2)]
foreach char [split $str ""] {dict incr counts $char}
set entropy 0.0
foreach count [dict values $counts] {
set freq [expr {$count / double([string length $str])}]
set entropy [expr {$entropy - $freq * log($freq)/$log2}]
}
return $entropy
}
puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]
- Output:
entropy = 4.59099
V (Vlang)
import os
import math
fn main() {
println("Binary file entropy: ${entropy(os.args[0])?}")
}
fn entropy(file string) ?f64 {
d := os.read_bytes(file)?
mut f := [256]f64{}
for b in d {
f[b]++
}
mut hm := 0.0
for c in f {
if c > 0 {
hm += c * math.log2(c)
}
}
l := f64(d.len)
return math.log2(l) - hm/l
}
- Output:
Binary file entropy: 5.676177202237735
Wren
Basically an amalgam of the code in the Print_itself#Wren and Entropy#Wren tasks.
import "os" for Process
import "io" for File
var args = Process.allArguments
var s = File.read(args[1]).trim()
var m = {}
for (c in s) {
var d = m[c]
m[c] = (d) ? d + 1 : 1
}
var hm = 0
for (k in m.keys) {
var c = m[k]
hm = hm + c * c.log2
}
var l = s.count
System.print(l.log2 - hm/l)
- Output:
4.6302314663
XPL0
To run: entropy < entropy.xpl
int Count(128), I, Len, Ch;
real Sum, Prob;
[for I:= 0 to 127 do Count(I):= 0;
Len:= 0;
loop [Ch:= ChIn(1);
if Ch = $1A\EOF\ then quit;
Count(Ch):= Count(Ch)+1;
Len:= Len+1;
];
Sum:= 0.;
for I:= 0 to 127 do
if Count(I) then
[Prob:= float(Count(I)) / float(Len);
Sum:= Sum + Prob*Ln(Prob);
];
RlOut(0, -Sum/Ln(2.));
]
- Output:
4.63457
zkl
Minor edit to the Entropy answer.
fcn entropy(text){
text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq }
.fp1((0).pump(256,List,(0.0).create.fp(0)).copy()))
.filter() // remove all zero entries
.apply('/(text.len())) // (num of char)/len
.apply(fcn(p){-p*p.log()}) // |p*ln(p)|
.sum(0.0)/(2.0).log(); // sum * ln(e)/ln(2) to convert to log2
}
entropy(File("entropy.zkl").read().text).println();
- Output:
4.8422