Entropy/Narcissist: Difference between revisions

(→‎{{header|jq}}: def sum():)
(16 intermediate revisions by 13 users not shown)
Line 1:
Line 10:
:*   [[Entropy]]
<syntaxhighlight lang="ada">with Ada.Text_Io;
with Ada.Command_Line;
with Ada.Numerics.Elementary_Functions;
procedure Entropy is
use Ada.Text_Io;
type Hist_Type is array (Character) of Natural;
function Log_2 (V : Float) return Float is
use Ada.Numerics.Elementary_Functions;
return Log (V) / Log (2.0);
end Log_2;
procedure Read_File (Name : String; Hist : out Hist_Type) is
File : File_Type;
Char : Character;
Hist := (others => 0);
Open (File, In_File, Name);
while not End_Of_File (File) loop
Get (File, Char);
Hist (Char) := Hist (Char) + 1;
end loop;
Close (File);
end Read_File;
function Length_Of (Hist : Hist_Type) return Natural is
Sum : Natural := 0;
for V of Hist loop
Sum := Sum + V;
end loop;
return Sum;
end Length_Of;
function Entropy_Of (Hist : Hist_Type) return Float is
Length : constant Float := Float (Length_Of (Hist));
Sum : Float := 0.0;
for V of Hist loop
if V > 0 then
Sum := Sum + Float (V) / Length * Log_2 (Float (V) / Length);
end if;
end loop;
return -Sum;
end Entropy_Of;
package Float_Io is new Ada.Text_Io.Float_Io (Float);
Name : constant String := Ada.Command_Line.Argument (1);
Hist : Hist_Type;
Entr : Float;
Float_Io.Default_Exp := 0;
Float_Io.Default_Aft := 6;
Read_File (Name, Hist);
Entr := Entropy_Of (Hist);
Put ("Entropy of '");
Put (Name);
Put ("' is ");
Float_Io.Put (Entr);
end Entropy;</syntaxhighlight>
<pre>Entropy of 'entropy.adb' is 4.559854</pre>
=={{header|ALGOL 68}}==
Line 16 ⟶ 86:
Note that the source here uses spaces, not tabs, hence the low entropy, replacing all runs of four spaces with a single space
results in an entropy of +4.64524532762062e +0.
<langsyntaxhighlight lang="algol68">BEGIN
# calculate the shannon entropy of a string #
PROC shannon entropy = ( STRING s )REAL:
Line 74 ⟶ 144:
print( ( shannon entropy( file contents ), newline ) )
Line 82 ⟶ 152:
{{works with|AutoHotkey 1.1}}
<langsyntaxhighlight AutoHotkeylang="autohotkey">FileRead, var, *C %A_ScriptFullPath%
MsgBox, % Entropy(var)
Line 97 ⟶ 167:
return, e
The record separator RS is set to end of file. So getline reads the whole file in one line.
<syntaxhighlight lang="awk">
for(i in H)E-=(h=H[i]/NF)*log(h)
print "bytes ",NF," entropy ",E/log(2)
<pre>bytes 158 entropy 5.2802</pre>
=={{header|BBC BASIC}}==
{{works with|BBC BASIC for Windows}}
<syntaxhighlight lang="bbcbasic"> DIM Freq%(255)
FOR I%=0 TO 255
IF Freq%(I%) Entropy+=Freq%(I%) / Size * LN(Freq%(I%) / Size) / LN(2)
PRINT "My size is ";Size " bytes and my entropy is ";-Entropy "!"
<pre>My size is 224 bytes and my entropy is 5.11257089!</pre>
Line 106 ⟶ 202:
Assumes that the source file is stored in the working directory as "entropy.c".
<langsyntaxhighlight lang="c">#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
Line 153 ⟶ 249:
return 0;
<syntaxhighlight lang="text">5.195143</langsyntaxhighlight>
<langsyntaxhighlight lang="cpp">#include <iostream>
#include <fstream>
#include <cmath>
Line 198 ⟶ 294:
return 0;
Line 204 ⟶ 300:
<langsyntaxhighlight lang="ruby">def entropy(s)
counts = s.chars.each_with_object(Hash(Char, Float64).new(0.0)) { |c, h| h[c] += 1 }
counts.values.sum do |count|
Line 212 ⟶ 308:
puts entropy File.read(__FILE__)</langsyntaxhighlight>
Line 219 ⟶ 315:
<langsyntaxhighlight lang="d">void main(in string[] args) {
import std.stdio, std.algorithm, std.math, std.file;
Line 229 ⟶ 325:
<langsyntaxhighlight lang="elixir">File.open(__ENV__.file, [:read], fn(file) ->
text = IO.read(file, :all)
leng = String.length(text)
Line 245 ⟶ 341:
|> IO.puts
Line 253 ⟶ 349:
=={{header|Emacs Lisp}}==
<langsyntaxhighlight lang="lisp">(defun shannon-entropy (input)
(let ((freq-table (make-hash-table))
(entropy 0)
Line 272 ⟶ 368:
(shannon-entropy (with-temp-buffer
(insert-file-contents "U:/rosetta/narcissist.el")
<langsyntaxhighlight lang="lisp">(narcissist)
<langsyntaxhighlight lang="erlang">#! /usr/bin/escript
-define(LOG2E, 1.44269504088896340735992).
Line 302 ⟶ 398:
_ -> count(Data, I+1, Frq #{Chr => 1})
Line 309 ⟶ 405:
<langsyntaxhighlight lang="factor">USING: assocs io io.encodings.utf8 io.files kernel math
math.functions math.statistics prettyprint sequences ;
IN: rosetta-code.entropy-narcissist
Line 320 ⟶ 416:
"entropy-narcissist.factor" utf8 [
contents entropy .
] with-file-reader</langsyntaxhighlight>
Line 327 ⟶ 423:
<langsyntaxhighlight FreeBASIClang="freebasic">' version 01-06-2016
' compile with: fbc -s console
' modified code from ENTROPY entry
Line 379 ⟶ 475:
Print : Print "hit any key to end program"
<pre>Windows version
Line 392 ⟶ 488:
<langsyntaxhighlight lang="go">package main
import (
Line 426 ⟶ 522:
l := float64(len(d))
return math.Log2(l) - hm/l
Line 434 ⟶ 530:
<langsyntaxhighlight lang="haskell">import qualified Data.ByteString as BS
import Data.List
import System.Environment
Line 444 ⟶ 540:
entropy = sort >>> group >>> map genericLength >>> normalize >>> map lg >>> sum
where lg c = -c * logBase 2 c
normalize c = let sc = sum c in map (/ sc) c</langsyntaxhighlight>
{{out}} In a shell
Line 461 ⟶ 557:
'''Solution''':<langsyntaxhighlight lang="j"> entropy=: +/@:-@(* 2&^.)@(#/.~ % #)
1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3''</langsyntaxhighlight>
'''Example''':<langsyntaxhighlight lang="j"> load 'entropy.ijs'
<langsyntaxhighlight lang="java">
import java.io.BufferedReader;
import java.io.File;
Line 509 ⟶ 605:
<pre>Entropy of file "src/EntropyNarcissist.java" = 4.691381977073.</pre>
'''Works with jq, the C implementation of jq'''
'''Works with gojq, the Go implementation of jq'''
'''Works with jaq, the Rust implementation of jq'''
The program assumes it will be presented to itself using
an invocation of jq with the -sR options, along the lines of:
jq -sR -f entropy-narcissist.jq < entropy-narcissist.jq
If your jq supports `keys_unsorted`, feel free to use it instead of `keys`.
<syntaxhighlight lang="jq">
def chars: explode[] | [.] | implode;
def bow(stream):
reduce stream as $word ({}; .[($word|tostring)] += 1);
def sum(s): reduce s as $x (0; .+$x);
length as $l
| bow(chars)
| sum(keys[] as $k | .[$k] as $c | $c * ($c|log2) )
| ($l|log2) - ./$l
<langsyntaxhighlight lang="julia">using DataStructures
entropy(s) = -sum(x -> x / length(s) * log2(x / length(s)), values(counter(s)))
println("self-entropy: ", entropy(read(Base.source_path(), String)))</langsyntaxhighlight>
Line 524 ⟶ 654:
<langsyntaxhighlight lang="scala">// version 1.1.0 (entropy_narc.kt)
fun log2(d: Double) = Math.log(d) / Math.log(2.0)
Line 546 ⟶ 676:
val prog = java.io.File("entropy_narc.kt").readText()
println("This program's entropy is ${"%18.16f".format(shannon(prog))}")
Line 555 ⟶ 685:
arg[0] gives the path of the script currently being executed
<langsyntaxhighlight Lualang="lua">function getFile (filename)
local inFile = io.open(filename, "r")
local fileContent = inFile:read("*all")
Line 580 ⟶ 710:
As we have compiled without specific options to change the way the executable is named, we can retrieve the source file name by adding the suffix “.nim” to the executable file name. We suppose also that the source file is in the same directory as the executable (which is true in our environment).
<lang Nim>import os, math, strutils, tables
<syntaxhighlight lang="nim">import os, math, strutils, tables
let execName = getAppFilename().splitPath().tail
Line 598 ⟶ 730:
echo "Source file entropy: ", srcName.readFile().entropy().formatFloat(ffDecimal, 5)
echo "Binary file entropy: ", execName.readFile().entropy().formatFloat(ffDecimal, 5)</langsyntaxhighlight>
Line 605 ⟶ 737:
<langsyntaxhighlight lang="parigp">entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2);
<pre>%1 = 4.54978213</pre>
<langsyntaxhighlight Perllang="perl">#!/usr/bin/perl
use strict ;
use warnings ;
Line 637 ⟶ 769:
$infocontent *= -1 ;
say "The information content of the source file is $infocontent !" ;</langsyntaxhighlight>
<pre>The information content of the source file is 4.6487923749222 !</pre>
Line 643 ⟶ 775:
Minor edit to the [[Entropy#Phix|Entropy]] answer, if compiled assumes source code is in the same directory.
<!--<syntaxhighlight lang="phix">(notonline)-->
<lang Phix>function log2(atom v)
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- command_line, file i/o</span>
return log(v)/log(2)
<span style="color: #008080;">function</span> <span style="color: #000000;">entropy</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
end function
<span style="color: #004080;">sequence</span> <span style="color: #000000;">symbols</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">counts</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
function entropy(sequence s)
<span style="color: #004080;">integer</span> <span style="color: #000000;">N</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
sequence symbols = {},
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">N</span> <span style="color: #008080;">do</span>
counts = {}
<span style="color: #004080;">object</span> <span style="color: #000000;">si</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
integer N = length(s)
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">si</span><span style="color: #0000FF;">,</span><span style="color: #000000;">symbols</span><span style="color: #0000FF;">)</span>
for i=1 to N do
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
object si = s[i]
<span style="color: #000000;">symbols</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">symbols</span><span style="color: #0000FF;">,</span><span style="color: #000000;">si</span><span style="color: #0000FF;">)</span>
integer k = find(si,symbols)
<span style="color: #000000;">counts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">counts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
if k=0 then
<span symbols style="color: append(symbols,si)#008080;">else</span>
<span style="color: #000000;">counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
counts = append(counts,1)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
counts[k] += 1
<span style="color: #004080;">atom</span> <span style="color: #000000;">H</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
end if
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">counts</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
end for
<span style="color: #004080;">atom</span> <span style="color: #000000;">ci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]/</span><span style="color: #000000;">N</span>
atom H = 0
<span style="color: #000000;">H</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">ci</span><span style="color: #0000FF;">*</span><span style="color: #7060A8;">log2</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ci</span><span style="color: #0000FF;">)</span>
integer n = length(counts)
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
for i=1 to n do
<span style="color: #008080;">return</span> <span style="color: #000000;">H</span>
atom ci = counts[i]/N
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
H -= ci*log2(ci)
end for
<span style="color: #0000FF;">?</span><span style="color: #000000;">entropy</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">open</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">substitute</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">command_line</span><span style="color: #0000FF;">()[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span><span style="color: #008000;">".exe"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">".exw"</span><span style="color: #0000FF;">)),</span><span style="color: #008000;">"rb"</span><span style="color: #0000FF;">))</span>
return H
end function
Output is eg 4.993666233, but that may vary with Windows/Linux line endings, tabs vs spaces, trailing returns, BOM headers, etc.
<langsyntaxhighlight PHPlang="php"><?php
$h = 0;
$s = file_get_contents(__FILE__);
Line 686 ⟶ 813:
( $c / $l ) *
log( $c / $l, 2 );
echo $h;</langsyntaxhighlight>
{{works with|Picat}}
<syntaxhighlight lang="picat">
entropy(File) = E =>
Bytes = read_file_bytes(File),
F = [0: I in 1..256],
foreach (B in Bytes)
B1 := B + 1,
F[B1] := F[B1] + 1
HM = 0,
foreach (C in F)
if (C > 0) then
HM := HM + C * log(2, C)
L = Bytes.length,
E = log(2, L) - HM / L.
main(Args) =>
printf("Entropy: %f\n", entropy(Args[1])).
$ picat entropy.pi entropy.pi
Entropy: 4.384622
<syntaxhighlight lang="picolisp">
<lang PicoLisp>
(scl 8)
(load "@lib/math.l")
Line 723 ⟶ 879:
My entropy is 4.12169822
{{works with|Python 3.4}}
Line 733 ⟶ 890:
Minor edit to the [[Entropy#Python|Entropy]] answer.
<langsyntaxhighlight Pythonlang="python">import math
from collections import Counter
Line 743 ⟶ 900:
Line 749 ⟶ 906:
The entropy of the program below is 4.512678555350348.
<langsyntaxhighlight lang="racket">
#lang racket
(require math)
Line 757 ⟶ 914:
(- (for/sum ([(d c) (in-hash (samples->hash ds))])
(* (/ c n) (log2 (/ c n)))))
(formerly Perl 6)
{{Works with|rakudo|2016.05}}
<syntaxhighlight lang="raku" perl6line>say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_
given slurp($*PROGRAM-NAME).comb</langsyntaxhighlight>
Result should be in the neighborhood of 4.9
Line 770 ⟶ 927:
REXX doesn't have a BIF (built-in function) for &nbsp; '''log''' &nbsp; or &nbsp; '''ln''', &nbsp; so the subroutine (function) &nbsp; '''log2''' &nbsp; is included herein.
<langsyntaxhighlight lang="rexx">/*REXX program calculates the "information entropy" for ~this~ REXX program. */
numeric digits length( e() ) % 2 - length(.) /*use 1/2 of the decimal digits of E. */
#= 0; @.= 0; $=; $$=; recs= sourceline() /*define some handy─dandy REXX vars. */
Line 800 ⟶ 957:
ox=izz; ii=ii+is*2**j; end /*while*/; x= x * e** -ii -1; z= 0; _= -1; p= z
do k=1; _= -_ * x; z= z+_/k; if z=p then leave; p= z; end /*k*/
r= z + ii; if arg()==2 then return r; return r / log2(2,.)</langsyntaxhighlight>
{{out|output|text=&nbsp; when using this REXX program as input:}}
Line 811 ⟶ 968:
<langsyntaxhighlight lang="ruby">def entropy(s)
counts = s.each_char.with_object(Hash.new(0.0)) {|c,h| h[c] += 1}tally
size = s.size.to_f
counts.values.reduce(0) do |entropy, count|
freq = count / s.size
entropy - freq * Math.log2(freq)
s = File.read(__FILE__)
p entropy(s)</lang>
<langsyntaxhighlight Rustlang="rust">use std::fs::File;
use std::io::{Read, BufReader};
Line 852 ⟶ 1,010:
let file = BufReader::new(File::open(name).expect("Could not read file."));
println!("Entropy is {}.", entropy(file.bytes().flatten()));
<pre>Entropy is 5.7108583.</pre>
<langsyntaxhighlight lang="ruby">func entropy(s) {
s.chars.freq.values.map {|c|
Line 866 ⟶ 1,024:
say entropy(File(__FILE__).open_r.slurp)</langsyntaxhighlight>
Line 874 ⟶ 1,032:
Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program.
<langsyntaxhighlight lang="tcl">proc entropy {str} {
set log2 [expr log(2)]
foreach char [split $str ""] {dict incr counts $char}
Line 885 ⟶ 1,043:
puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]</langsyntaxhighlight>
entropy = 4.59099
=={{header|V (Vlang)}}==
<syntaxhighlight lang="go">import os
import math
fn main() {
println("Binary file entropy: ${entropy(os.args[0])?}")
fn entropy(file string) ?f64 {
d := os.read_bytes(file)?
mut f := [256]f64{}
for b in d {
mut hm := 0.0
for c in f {
if c > 0 {
hm += c * math.log2(c)
l := f64(d.len)
return math.log2(l) - hm/l
Binary file entropy: 5.676177202237735
Basically an amalgam of the code in the [[Print_itself#Wren]] and [[Entropy#Wren]] tasks.
<langsyntaxhighlight ecmascriptlang="wren">import "os" for Process
import "io" for File
import "/math" for Math
var args = Process.allArguments
Line 908 ⟶ 1,094:
for (k in m.keys) {
var c = m[k]
hm = hm + c *Math c.log2(c)
var l = s.count
System.print(Mathl.log2(l) - hm/l)</langsyntaxhighlight>
To run: entropy < entropy.xpl
<syntaxhighlight lang "XPL0">int Count(128), I, Len, Ch;
real Sum, Prob;
[for I:= 0 to 127 do Count(I):= 0;
Len:= 0;
loop [Ch:= ChIn(1);
if Ch = $1A\EOF\ then quit;
Count(Ch):= Count(Ch)+1;
Len:= Len+1;
Sum:= 0.;
for I:= 0 to 127 do
if Count(I) then
[Prob:= float(Count(I)) / float(Len);
Sum:= Sum + Prob*Ln(Prob);
RlOut(0, -Sum/Ln(2.));
Minor edit to the [[Entropy#zkl|Entropy]] answer.
<langsyntaxhighlight lang="zkl">fcn entropy(text){
text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq }
Line 929 ⟶ 1,138:
