Entropy/Narcissist: Difference between revisions
Line 274: | Line 274: | ||
$ ./Narcissist Narcissist.hs |
$ ./Narcissist Narcissist.hs |
||
4.452645183154108 |
|||
4.444689880591661 |
|||
Entropy of the binary |
Entropy of the binary |
Revision as of 04:25, 16 August 2016
- Task
Write a computer program that computes and shows its own entropy.
- Related Tasks
AutoHotkey
<lang AutoHotkey>FileRead, var, *C %A_ScriptFullPath% MsgBox, % Entropy(var)
Entropy(n) {
a := [], len := StrLen(n), m := n while StrLen(m) { s := SubStr(m, 1, 1) m := RegExReplace(m, s, "", c) a[s] := c } for key, val in a { m := Log(p := val / len) e -= p * m / Log(2) } return, e
}</lang>
- Output:
5.942956
C
Minor edit to the Entropy answer.
Assumes that the source file is stored in the working directory as "entropy.c". <lang c>#include <stdio.h>
- include <stdlib.h>
- include <stdbool.h>
- include <string.h>
- include <math.h>
- define MAXLEN 961 //maximum string length
int makehist(char *S,int *hist,int len){ int wherechar[256]; int i,histlen; histlen=0; for(i=0;i<256;i++)wherechar[i]=-1; for(i=0;i<len;i++){ if(wherechar[(int)S[i]]==-1){ wherechar[(int)S[i]]=histlen; histlen++; } hist[wherechar[(int)S[i]]]++; } return histlen; }
double entropy(int *hist,int histlen,int len){ int i; double H; H=0; for(i=0;i<histlen;i++){ H-=(double)hist[i]/len*log2((double)hist[i]/len); } return H; }
int main(void){ char S[MAXLEN]; int len,*hist,histlen; double H; FILE *f; f=fopen("entropy.c","r"); for(len=0;!feof(f);len++)S[len]=fgetc(f); S[--len]='\0'; hist=(int*)calloc(len,sizeof(int)); histlen=makehist(S,hist,len); //hist now has no order (known to the program) but that doesn't matter H=entropy(hist,histlen,len); printf("%lf\n",H); return 0; }</lang>
- Output:
<lang>5.195143</lang>
D
<lang d>void main(in string[] args) {
import std.stdio, std.algorithm, std.math, std.file;
auto data = sort(cast(ubyte[])args[0].read); return data .group .map!(g => g[1] / double(data.length)) .map!(p => -p * p.log2) .sum .writeln;
}</lang>
- Output:
6.29803
Elixir
<lang elixir>File.open(__ENV__.file, [:read], fn(file) ->
text = IO.read(file, :all) leng = String.length(text) String.codepoints(text) |> Enum.group_by(&(&1)) |> Enum.map(fn{_,value} -> length(value) end) |> Enum.reduce(0, fn count, entropy -> freq = count / leng entropy - freq * :math.log2(freq) end) |> IO.puts
end)</lang>
- Output:
4.848342673395324
Emacs Lisp
<lang lisp>(defun shannon-entropy (input)
(let ((freq-table (make-hash-table))
(entropy 0) (length (+ (length input) 0.0)))
(mapcar (lambda (x)
(puthash x (+ 1 (gethash x freq-table 0)) freq-table)) input)
(maphash (lambda (k v)
(set 'entropy (+ entropy (* (/ v length) (log (/ v length) 2))))) freq-table)
(- entropy)))
(defun narcissist ()
(shannon-entropy (with-temp-buffer
(insert-file-contents "U:/rosetta/narcissist.el") (buffer-string))))</lang>
- Output:
<lang lisp>(narcissist) 4.5129548515535785</lang>
FreeBASIC
<lang FreeBASIC>' version 01-06-2016 ' compile with: fbc -s console ' modified code from ENTROPY entry
Dim As Integer i, count, totalchar(255) Dim As UByte buffer Dim As Double prop, entropy ' command (0) returns the name of this program (including the path) Dim As String slash, filename = Command(0) Dim As Integer ff = FreeFile ' find first free filenumber Open filename For Binary As #ff
If Err > 0 Then ' should not happen
Print "Error opening the file" Beep : Sleep 5000, 1 End
End If
' will read 1 UByte from the file until it reaches the end of the file For i = 1 To Lof(ff)
Get #ff, ,buffer totalchar(buffer) += 1 count = count + 1
Next
For i = 0 To 255
If totalchar(i) = 0 Then Continue For prop = totalchar(i) / count entropy = entropy - (prop * Log (prop) / Log(2))
Next
' next lines are only compiled when compiling for Windows OS (32/64)
- Ifdef __FB_WIN32__
slash = chr(92) print "Windows version"
- endif
- Ifdef __FB_LINUX__
slash = chr(47) print "LINUX version"
- EndIf
i = InStrRev(filename, slash) If i <> 0 Then filename = Right(filename, Len(filename)-i)
Print "My name is "; filename Print : Print "The Entropy of myself is"; entropy Print
' empty keyboard buffer While InKey <> "" : Wend Print : Print "hit any key to end program" Sleep End</lang>
- Output:
Windows version My name is entropy_narcissist.exe The Entropy of myself is 6.142286625408597 LINUX version My name is entropy_narcissist The Entropy of myself is 5.450343613062795
Go
<lang go>package main
import (
"fmt" "io/ioutil" "log" "math" "os" "runtime"
)
func main() {
_, src, _, _ := runtime.Caller(0) fmt.Println("Source file entropy:", entropy(src)) fmt.Println("Binary file entropy:", entropy(os.Args[0]))
}
func entropy(file string) float64 {
d, err := ioutil.ReadFile(file) if err != nil { log.Fatal(err) } var f [256]float64 for _, b := range d { f[b]++ } hm := 0. for _, c := range f { if c > 0 { hm += c * math.Log2(c) } } l := float64(len(d)) return math.Log2(l) - hm/l
}</lang>
- Output:
Source file entropy: 5.038501725029859 Binary file entropy: 5.388171194771937
Haskell
<lang haskell>import qualified Data.ByteString as BS import Data.List import System.Environment
(>>>) = flip (.)
main = getArgs >>= head >>> BS.readFile >>= BS.unpack >>> entropy >>> print
entropy = sort >>> group >>> map genericLength >>> normalize >>> map lg >>> sum
where lg c = -c * logBase 2 c normalize c = let sc = sum c in map (/ sc) c</lang>
- Output:
In a shell
$ ghc --make -O3 Narcissist.hs
Entropy of the source
$ ./Narcissist Narcissist.hs 4.452645183154108
Entropy of the binary
$ ./Narcissist Narcissist 5.525417236346172
J
Solution:<lang j> entropy=: +/@:-@(* 2&^.)@(#/.~ % #)
1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3</lang>
Example:<lang j> load 'entropy.ijs' 4.73307</lang>
Lua
arg[0] gives the path of the script currently being executed <lang Lua>function getFile (filename)
local inFile = io.open(filename, "r") local fileContent = inFile:read("*all") inFile:close() return fileContent
end
function log2 (x) return math.log(x) / math.log(2) end
function entropy (X)
local N, count, sum, i = X:len(), {}, 0 for char = 1, N do i = X:sub(char, char) if count[i] then count[i] = count[i] + 1 else count[i] = 1 end end for n_i, count_i in pairs(count) do sum = sum + count_i / N * log2(count_i / N) end return -sum
end
print(entropy(getFile(arg[0])))</lang>
- Output:
4.3591214356783
PARI/GP
<lang parigp>entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2); entropy(Str(entropy))</lang>
- Output:
%1 = 4.54978213
Perl
<lang Perl>#!/usr/bin/perl use strict ; use warnings ; use feature 'say' ;
sub log2 {
my $number = shift ; return log( $number ) / log( 2 ) ;
}
open my $fh , "<" , $ARGV[ 0 ] or die "Can't open $ARGV[ 0 ]$!\n" ; my %frequencies ; my $totallength = 0 ; while ( my $line = <$fh> ) {
chomp $line ; next if $line =~ /^$/ ; map { $frequencies{ $_ }++ } split( // , $line ) ; $totallength += length ( $line ) ;
} close $fh ; my $infocontent = 0 ; for my $letter ( keys %frequencies ) {
my $content = $frequencies{ $letter } / $totallength ; $infocontent += $content * log2( $content ) ;
} $infocontent *= -1 ; say "The information content of the source file is $infocontent !" ;</lang>
- Output:
The information content of the source file is 4.6487923749222 !
Perl 6
<lang Perl 6>say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_
given slurp($*PROGRAM-NAME).comb</lang>
Result should be in the neighborhood of 4.9
- Output:
4.89351613053006
Python
Minor edit to the Entropy answer.
<lang Python>import math from collections import Counter
def entropy(s):
p, lns = Counter(s), float(len(s)) return -sum( count/lns * math.log(count/lns, 2) for count in p.values())
with open('c:/E.py') as f:
b=f.read()
print(entropy(b))</lang>
- Output:
4.57672378235371
Racket
The entropy of the program below is 4.512678555350348. <lang racket>
- lang racket
(require math) (define (log2 x) (/ (log x) (log 2))) (define ds (string->list (file->string "entropy.rkt"))) (define n (length ds)) (- (for/sum ([(d c) (in-hash (samples->hash ds))])
(* (/ c n) (log2 (/ c n)))))
</lang>
REXX
REXX doesn't have a BIF (built-in function) for log or ln, so the subroutine (function) log2 is included herein. <lang rexx>/*REXX program calculates the "information entropy" for ~this~ REXX program. */ numeric digits 50 /*use 50 decimal digits for precision. */
- =0; @.=0; $=; $$=; recs=sourceline() /*define some handy─dandy REXX vars. */
do m=1 for recs /* [↓] obtain program source and ──► $*/ $=$ || sourceline(m) /*get a sourceLine of this REXX program*/ end /*m*/ /* [↑] $ str won't have any meta chars*/
L=length($) /*the byte length of this REXX program.*/
do j=1 for L; _=substr($,j,1) /*process each character in $ string.*/ if @._==0 then do; #=#+1 /*¿Character unique? Bump char counter*/ $$=$$ || _ /*add this character to the $$ list. */ end @._=@._+1 /*keep track of this character's count.*/ end /*j*/ /* [↑] characters are all 8─bit bytes.*/
sum=0 /*calculate info entropy for each char.*/
do i=1 for #; _=substr($$,i,1) /*obtain a character from unique list. */ sum=sum - @._/L * log2(@._/L) /*add {negatively} the char entropies. */ end /*i*/
say ' program length: ' L /*pgm length doesn't include meta chars*/ say 'program statements: ' recs /*pgm statements are actually pgm lines*/ say ' unique characters: ' #; say /*characters are 8─bit bytes of the pgm*/ say 'The information entropy of this REXX program ──► ' format(sum,,12) exit /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ log2: procedure; parse arg x 1 ox; ig= x>1.5; ii=0; is=1 - 2 * (ig\==1)
numeric digits digits()+5 /* [↓] precision of E must be≥digits()*/ e=2.71828182845904523536028747135266249775724709369995957496696762772407663035354759 do while ig & ox>1.5 | \ig&ox<.5; _=e; do j=-1; iz=ox* _**-is if j>=0 & (ig & iz<1 | \ig&iz>.5) then leave; _=_*_; izz=iz; end /*j*/ ox=izz; ii=ii+is*2**j; end; x=x* e**-ii-1; z=0; _=-1; p=z do k=1; _=-_*x; z=z+_/k; if z=p then leave; p=z; end /*k*/ r=z+ii; if arg()==2 then return r; return r/log2(2,.)</lang>
output
program length: 2612 program statements: 34 unique characters: 78 The information entropy of this REXX program ──► 4.284631866395
Ruby
<lang ruby>def entropy(s)
counts = s.each_char.with_object(Hash.new(0.0)) {|c,h| h[c] += 1} counts.values.reduce(0) do |entropy, count| freq = count / s.size entropy - freq * Math.log2(freq) end
end s = File.read(__FILE__) p entropy(s)</lang>
- Output:
4.885234973253878
Sidef
<lang ruby>func entropy(s) {
[0, s.chars.freq.values.map {|c| var f = c/s.len f * f.log2 }... ]«-»
}
say entropy(File(__FILE__).open_r.slurp)</lang>
- Output:
4.27307750866434915713432109186549
Tcl
Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program. <lang tcl>proc entropy {str} {
set log2 [expr log(2)] foreach char [split $str ""] {dict incr counts $char} set entropy 0.0 foreach count [dict values $counts] {
set freq [expr {$count / double([string length $str])}] set entropy [expr {$entropy - $freq * log($freq)/$log2}]
} return $entropy
}
puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]</lang>
- Output:
entropy = 4.59099
zkl
Minor edit to the Entropy answer. <lang zkl>fcn entropy(text){
text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq } .fp1((0).pump(256,List,(0.0).create.fp(0)).copy())) .filter() // remove all zero entries .apply('/(text.len())) // (num of char)/len .apply(fcn(p){-p*p.log()}) // |p*ln(p)| .sum(0.0)/(2.0).log(); // sum * ln(e)/ln(2) to convert to log2
}
entropy(File("entropy.zkl").read().text).println();</lang>
- Output:
4.8422