Entropy/Narcissist: Difference between revisions

From Rosetta Code
Content added Content deleted
(Added Lua version)
m (→‎{{header|FreeBASIC}}: fix the highlighting problem, made de linux part working)
Line 147: Line 147:


=={{header|FreeBASIC}}==
=={{header|FreeBASIC}}==
<lang FreeBASIC>' version 01-06-2016
highlighting is a little messed up
<lang FreeBASIC>' version 26-06-2015
' compile with: fbc -s console
' compile with: fbc -s console
' modified code from ENTROPY entry
' modified code from ENTROPY entry

Dim As Integer i, count, totalchar(255)
Dim As Integer i, count, totalchar(255)
Dim As UByte buffer
Dim As UByte buffer
Dim As Double prop, entropy
Dim As Double prop, entropy
' command (0) returns the name of this program (including the path)
' command (0) returns the name of this program (including the path)
Dim As String filename = Command(0)
Dim As String slash, filename = Command(0)
Dim As Integer ff = FreeFile ' find first free filenumber
Dim As Integer ff = FreeFile ' find first free filenumber
Open filename For Binary As #ff
Open filename For Binary As #ff

If Err > 0 Then ' should not happen
If Err > 0 Then ' should not happen
Print "Error opening the file"
Print "Error opening the file"
Line 165: Line 164:
End
End
End If
End If

' will read 1 UByte from the file until it reaches the end of the file
' will read 1 UByte from the file until it reaches the end of the file
For i = 1 To Lof(ff)
For i = 1 To Lof(ff)
Line 172: Line 171:
count = count + 1
count = count + 1
Next
Next

For i = 0 To 255
For i = 0 To 255
If totalchar(i) = 0 Then Continue For
If totalchar(i) = 0 Then Continue For
Line 178: Line 177:
entropy = entropy - (prop * Log (prop) / Log(2))
entropy = entropy - (prop * Log (prop) / Log(2))
Next
Next

' next lines are only compiled when compiling for Windows OS (32/64)
' next lines are only compiled when compiling for Windows OS (32/64)
#Ifdef __FB_WIN32__
#Ifdef __FB_WIN32__
i = InStrRev(filename, "\")
slash = chr(92)
print "Windows version"
If i <> 0 Then filename = Right(filename, Len(filename)-i)
#endif
#ElseIf __FB_LINUX__
#Ifdef __FB_LINUX__
' sorry, I have no idea how to do that for Linux
slash = chr(47)
print "LINUX version"
#EndIf
#EndIf


i = InStrRev(filename, slash)
If i <> 0 Then filename = Right(filename, Len(filename)-i)
Print "My name is "; filename
Print "My name is "; filename
Print : Print "The Entropy of myself is"; entropy
Print : Print "The Entropy of myself is"; entropy
Print
Print

' empty keyboard buffer
' empty keyboard buffer
While InKey <> "" : Var _key_ = InKey : Wend
While InKey <> "" : Wend
Print : Print "hit any key to end program"
Print : Print "hit any key to end program"
Sleep
Sleep
End</lang>
End</lang>
{{out}}
{{out}}
<pre>Windows version
<pre>My name is entropy_narcissist.exe
My name is entropy_narcissist.exe

The Entropy of myself is 6.142286625408597

LINUX version
My name is entropy_narcissist


The Entropy of myself is 6.143551738549207</pre>
The Entropy of myself is 5.450343613062795</pre>


=={{header|Go}}==
=={{header|Go}}==

Revision as of 19:46, 2 June 2016

Entropy/Narcissist is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Write a program that computes and shows its own entropy.

Related Tasks:


AutoHotkey

Works with: AutoHotkey 1.1

<lang AutoHotkey>FileRead, var, *C %A_ScriptFullPath% MsgBox, % Entropy(var)

Entropy(n) {

   a := [], len := StrLen(n), m := n
   while StrLen(m) {
       s := SubStr(m, 1, 1)
       m := RegExReplace(m, s, "", c)
       a[s] := c
   }
   for key, val in a {
       m := Log(p := val / len)
       e -= p * m / Log(2)
   }
   return, e

}</lang>

Output:
5.942956

C

Minor edit to the Entropy answer.

Assumes that the source file is stored in the working directory as "entropy.c". <lang c>#include <stdio.h>

  1. include <stdlib.h>
  2. include <stdbool.h>
  3. include <string.h>
  4. include <math.h>
  1. define MAXLEN 961 //maximum string length

int makehist(char *S,int *hist,int len){ int wherechar[256]; int i,histlen; histlen=0; for(i=0;i<256;i++)wherechar[i]=-1; for(i=0;i<len;i++){ if(wherechar[(int)S[i]]==-1){ wherechar[(int)S[i]]=histlen; histlen++; } hist[wherechar[(int)S[i]]]++; } return histlen; }

double entropy(int *hist,int histlen,int len){ int i; double H; H=0; for(i=0;i<histlen;i++){ H-=(double)hist[i]/len*log2((double)hist[i]/len); } return H; }

int main(void){ char S[MAXLEN]; int len,*hist,histlen; double H; FILE *f; f=fopen("entropy.c","r"); for(len=0;!feof(f);len++)S[len]=fgetc(f); S[--len]='\0'; hist=(int*)calloc(len,sizeof(int)); histlen=makehist(S,hist,len); //hist now has no order (known to the program) but that doesn't matter H=entropy(hist,histlen,len); printf("%lf\n",H); return 0; }</lang>

Output:

<lang>5.195143</lang>

D

<lang d>void main(in string[] args) {

   import std.stdio, std.algorithm, std.math, std.file;
   auto data = sort(cast(ubyte[])args[0].read);
   return data
          .group
          .map!(g => g[1] / double(data.length))
          .map!(p => -p * p.log2)
          .sum
          .writeln;

}</lang>

Output:
6.29803

Elixir

<lang elixir>File.open(__ENV__.file, [:read], fn(file) ->

 text = IO.read(file, :all)
 leng = String.length(text)
 String.codepoints(text)
 |> Enum.group_by(&(&1))
 |> Enum.map(fn{_,value} -> length(value) end)
 |> Enum.reduce(0, fn count, entropy ->
      freq = count / leng
      entropy - freq * :math.log2(freq)
    end)
 |> IO.puts

end)</lang>

Output:
4.848342673395324

Emacs Lisp

<lang lisp>(defun shannon-entropy (input)

 (let ((freq-table (make-hash-table))

(entropy 0) (length (+ (length input) 0.0)))

   (mapcar (lambda (x)

(puthash x (+ 1 (gethash x freq-table 0)) freq-table)) input)

   (maphash (lambda (k v)

(set 'entropy (+ entropy (* (/ v length) (log (/ v length) 2))))) freq-table)

 (- entropy)))

(defun narcissist ()

 (shannon-entropy (with-temp-buffer

(insert-file-contents "U:/rosetta/narcissist.el") (buffer-string))))</lang>

Output:

<lang lisp>(narcissist) 4.5129548515535785</lang>

FreeBASIC

<lang FreeBASIC>' version 01-06-2016 ' compile with: fbc -s console ' modified code from ENTROPY entry

Dim As Integer i, count, totalchar(255) Dim As UByte buffer Dim As Double prop, entropy ' command (0) returns the name of this program (including the path) Dim As String slash, filename = Command(0) Dim As Integer ff = FreeFile ' find first free filenumber Open filename For Binary As #ff

If Err > 0 Then ' should not happen

   Print "Error opening the file"
   Beep : Sleep 5000, 1
   End

End If

' will read 1 UByte from the file until it reaches the end of the file For i = 1 To Lof(ff)

   Get #ff, ,buffer
   totalchar(buffer) += 1
   count = count + 1

Next

For i = 0 To 255

   If totalchar(i) = 0 Then Continue For
   prop = totalchar(i) / count
   entropy = entropy - (prop * Log (prop) / Log(2))

Next

' next lines are only compiled when compiling for Windows OS (32/64)

  1. Ifdef __FB_WIN32__
   slash = chr(92)
   print "Windows version"
  1. endif
  2. Ifdef __FB_LINUX__
  slash = chr(47)
  print "LINUX version"
  1. EndIf
   i = InStrRev(filename, slash)
   If i <> 0 Then filename = Right(filename, Len(filename)-i)

Print "My name is "; filename Print : Print "The Entropy of myself is"; entropy Print

' empty keyboard buffer While InKey <> "" : Wend Print : Print "hit any key to end program" Sleep End</lang>

Output:
Windows version
My name is entropy_narcissist.exe

The Entropy of myself is 6.142286625408597

LINUX version
My name is entropy_narcissist

The Entropy of myself is 5.450343613062795

Go

<lang go>package main

import (

   "fmt"
   "io/ioutil"
   "log"
   "math"
   "os"
   "runtime"

)

func main() {

   _, src, _, _ := runtime.Caller(0)
   fmt.Println("Source file entropy:", entropy(src))
   fmt.Println("Binary file entropy:", entropy(os.Args[0]))

}

func entropy(file string) float64 {

   d, err := ioutil.ReadFile(file)
   if err != nil {
       log.Fatal(err)
   }
   var f [256]float64
   for _, b := range d {
       f[b]++
   }
   hm := 0.
   for _, c := range f {
       if c > 0 {
           hm += c * math.Log2(c)
       }
   }
   l := float64(len(d))
   return math.Log2(l) - hm/l

}</lang>

Output:
Source file entropy: 5.038501725029859
Binary file entropy: 5.388171194771937

J

Solution:<lang j> entropy=: +/@:-@(* 2&^.)@(#/.~ % #)

  1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3</lang>

Example:<lang j> load 'entropy.ijs' 4.73307</lang>

Lua

arg[0] gives the path of the script currently being executed <lang Lua>function getFile (filename)

   local inFile = io.open(filename, "r")
   local fileContent = inFile:read("*all")
   inFile:close()
   return fileContent

end

function log2 (x) return math.log(x) / math.log(2) end

function entropy (X)

   local N, count, sum, i = X:len(), {}, 0
   for char = 1, N do
       i = X:sub(char, char)
       if count[i] then
           count[i] = count[i] + 1
       else
           count[i] = 1
       end
   end
   for n_i, count_i in pairs(count) do
       sum = sum + count_i / N * log2(count_i / N)
   end
   return -sum

end

print(entropy(getFile(arg[0])))</lang>

Output:
4.3591214356783

PARI/GP

<lang parigp>entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2); entropy(Str(entropy))</lang>

Output:
%1 = 4.54978213

Perl

<lang Perl>#!/usr/bin/perl use strict ; use warnings ; use feature 'say' ;

sub log2 {

  my $number = shift ;
  return log( $number ) / log( 2 ) ;

}

open my $fh , "<" , $ARGV[ 0 ] or die "Can't open $ARGV[ 0 ]$!\n" ; my %frequencies ; my $totallength = 0 ; while ( my $line = <$fh> ) {

  chomp $line ;
  next if $line =~ /^$/ ;
  map { $frequencies{ $_ }++ } split( // , $line ) ;
  $totallength += length ( $line ) ;

} close $fh ; my $infocontent = 0 ; for my $letter ( keys %frequencies ) {

  my $content = $frequencies{ $letter } / $totallength ;
  $infocontent += $content * log2( $content ) ;

} $infocontent *= -1 ; say "The information content of the source file is $infocontent !" ;</lang>

Output:
The information content of the source file is 4.6487923749222 !

Perl 6

Works with: rakudo version 2016.05

<lang Perl 6>say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_

   given slurp($*PROGRAM-NAME).comb</lang>

Result should be in the neighborhood of 4.9

Output:
4.89351613053006

Python

Works with: Python 3.4

Minor edit to the Entropy answer.

<lang Python>import math from collections import Counter

def entropy(s):

   p, lns = Counter(s), float(len(s))
   return -sum( count/lns * math.log(count/lns, 2) for count in p.values())

with open('c:/E.py') as f:

   b=f.read()
   

print(entropy(b))</lang>

Output:
4.57672378235371

Racket

The entropy of the program below is 4.512678555350348. <lang racket>

  1. lang racket

(require math) (define (log2 x) (/ (log x) (log 2))) (define ds (string->list (file->string "entropy.rkt"))) (define n (length ds)) (- (for/sum ([(d c) (in-hash (samples->hash ds))])

    (* (/ c n) (log2 (/ c n)))))

</lang>

REXX

REXX doesn't have a BIF (built-in function) for   log   or   ln,   so the subroutine (function)   log2   is included herein. <lang rexx>/*REXX program calculates the "information entropy" for ~this~ REXX program. */ numeric digits 50 /*use 50 decimal digits for precision. */

  1. =0; @.=0; $=; $$=; recs=sourceline() /*define some handy─dandy REXX vars. */
             do m=1  for recs                   /* [↓]  obtain program source and ──► $*/
             $=$ || sourceline(m)               /*get a sourceLine of this REXX program*/
             end   /*m*/                        /* [↑]  $ str won't have any meta chars*/

L=length($) /*the byte length of this REXX program.*/

     do j=1  for L;  _=substr($,j,1)            /*process each character in  $  string.*/
     if @._==0  then do;  #=#+1                 /*¿Character unique?  Bump char counter*/
                          $$=$$ || _            /*add this character to the  $$  list. */
                     end
     @._=@._+1                                  /*keep track of this character's count.*/
     end   /*j*/                                /* [↑]  characters are all 8─bit bytes.*/

sum=0 /*calculate info entropy for each char.*/

     do i=1  for #;  _=substr($$,i,1)           /*obtain a character from unique list. */
     sum=sum  -  @._/L  * log2(@._/L)           /*add {negatively} the char entropies. */
     end   /*i*/

say ' program length: ' L /*pgm length doesn't include meta chars*/ say 'program statements: ' recs /*pgm statements are actually pgm lines*/ say ' unique characters: ' #; say /*characters are 8─bit bytes of the pgm*/ say 'The information entropy of this REXX program ──► ' format(sum,,12) exit /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ log2: procedure; parse arg x 1 ox; ig= x>1.5; ii=0; is=1 - 2 * (ig\==1)

     numeric digits digits()+5                  /* [↓]  precision of E must be≥digits()*/
     e=2.71828182845904523536028747135266249775724709369995957496696762772407663035354759
          do  while  ig & ox>1.5 | \ig&ox<.5;      _=e;         do j=-1;  iz=ox* _**-is
          if j>=0 & (ig & iz<1 | \ig&iz>.5)  then leave;     _=_*_;   izz=iz;  end  /*j*/
          ox=izz;  ii=ii+is*2**j;  end;           x=x* e**-ii-1;  z=0;   _=-1;    p=z
              do k=1;   _=-_*x;   z=z+_/k;        if z=p  then leave;  p=z;    end  /*k*/
          r=z+ii;  if arg()==2  then return r;    return r/log2(2,.)</lang>

output

    program length:  2612
program statements:  34
 unique characters:  78

The information entropy of this REXX program ──►  4.284631866395

Ruby

<lang ruby>def entropy(s)

 counts = s.each_char.with_object(Hash.new(0.0)) {|c,h| h[c] += 1}
 counts.values.reduce(0) do |entropy, count|
   freq = count / s.size
   entropy - freq * Math.log2(freq)
 end

end s = File.read(__FILE__) p entropy(s)</lang>

Output:
4.885234973253878

Tcl

Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program. <lang tcl>proc entropy {str} {

   set log2 [expr log(2)]
   foreach char [split $str ""] {dict incr counts $char}
   set entropy 0.0
   foreach count [dict values $counts] {

set freq [expr {$count / double([string length $str])}] set entropy [expr {$entropy - $freq * log($freq)/$log2}]

   }
   return $entropy

}

puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]</lang>

Output:
entropy = 4.59099

zkl

Minor edit to the Entropy answer. <lang zkl>fcn entropy(text){

  text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq }
      .fp1((0).pump(256,List,(0.0).create.fp(0)).copy()))
  .filter()		      // remove all zero entries
  .apply('/(text.len()))     // (num of char)/len
  .apply(fcn(p){-p*p.log()}) // |p*ln(p)|
  .sum(0.0)/(2.0).log();     // sum * ln(e)/ln(2) to convert to log2

}

entropy(File("entropy.zkl").read().text).println();</lang>

Output:
4.8422