Entropy/Narcissist: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎{{header|AutoHotkey}}: Created entry)
(Added a Python approach.)
Line 168: Line 168:
{{out}}
{{out}}
<pre>4.89351613053006</pre>
<pre>4.89351613053006</pre>

=={{header|Python}}==
{{works with|Python 3.4}}

Minor edit to the [[Entropy#Python|Entropy]] answer.

<lang Python>import math
from collections import Counter

def e(s):
p, l = Counter(s), float(len(s))
return -sum( c/l * math.log(c/l, 2) for c in p.values())

with open('c:/E.py') as f:
b=f.read()
print(e(b))</lang>
{{Output}}
<pre>4.5783626637295045</pre>


=={{header|Racket}}==
=={{header|Racket}}==

Revision as of 18:17, 15 October 2014

Entropy/Narcissist is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Write a program that computes and shows its own entropy.

AutoHotkey

Works with: AutoHotkey 1.1

<lang AutoHotkey>FileRead, var, *C %A_ScriptFullPath% MsgBox, % Entropy(var)

Entropy(n) {

   a := [], len := StrLen(n), m := n
   while StrLen(m) {
       s := SubStr(m, 1, 1)
       m := RegExReplace(m, s, "", c)
       a[s] := c
   }
   for key, val in a {
       m := Log(p := val / len)
       e -= p * m / Log(2)
   }
   return, e

}</lang>

Output:
5.942956

C

Minor edit to the Entropy answer.

Assumes that the source file is stored in the working directory as "entropy.c". <lang c>#include <stdio.h>

  1. include <stdlib.h>
  2. include <stdbool.h>
  3. include <string.h>
  4. include <math.h>
  1. define MAXLEN 961 //maximum string length

int makehist(char *S,int *hist,int len){ int wherechar[256]; int i,histlen; histlen=0; for(i=0;i<256;i++)wherechar[i]=-1; for(i=0;i<len;i++){ if(wherechar[(int)S[i]]==-1){ wherechar[(int)S[i]]=histlen; histlen++; } hist[wherechar[(int)S[i]]]++; } return histlen; }

double entropy(int *hist,int histlen,int len){ int i; double H; H=0; for(i=0;i<histlen;i++){ H-=(double)hist[i]/len*log2((double)hist[i]/len); } return H; }

int main(void){ char S[MAXLEN]; int len,*hist,histlen; double H; FILE *f; f=fopen("entropy.c","r"); for(len=0;!feof(f);len++)S[len]=fgetc(f); S[--len]='\0'; hist=(int*)calloc(len,sizeof(int)); histlen=makehist(S,hist,len); //hist now has no order (known to the program) but that doesn't matter H=entropy(hist,histlen,len); printf("%lf\n",H); return 0; }</lang>

Output:

<lang>5.195143</lang>

D

<lang d>void main(in string[] args) {

   import std.stdio, std.algorithm, std.math, std.file;
   auto data = sort(cast(ubyte[])args[0].read);
   return data
          .group
          .map!(g => g[1] / double(data.length))
          .map!(p => -p * p.log2)
          .sum
          .writeln;

}</lang>

Output:
6.29803

Emacs Lisp

<lang lisp>(defun shannon-entropy (input)

 (let ((freq-table (make-hash-table))

(entropy 0) (length (+ (length input) 0.0)))

   (mapcar (lambda (x)

(puthash x (+ 1 (gethash x freq-table 0)) freq-table)) input)

   (maphash (lambda (k v)

(set 'entropy (+ entropy (* (/ v length) (log (/ v length) 2))))) freq-table)

 (- entropy)))

(defun narcissist ()

 (shannon-entropy (with-temp-buffer

(insert-file-contents "U:/rosetta/narcissist.el") (buffer-string))))</lang>

Output:

<lang lisp>(narcissist) 4.5129548515535785</lang>

J

Solution:<lang j> entropy=: +/@:-@(* 2&^.)@(#/.~ % #)

  1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3</lang>

Example:<lang j> load 'entropy.ijs' 4.73307</lang>

PARI/GP

<lang parigp>entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2); entropy(Str(entropy))</lang>

Output:
%1 = 4.54978213

Perl

<lang Perl>#!/usr/bin/perl use strict ; use warnings ; use feature 'say' ;

sub log2 {

  my $number = shift ;
  return log( $number ) / log( 2 ) ;

}

open my $fh , "<" , $ARGV[ 0 ] or die "Can't open $ARGV[ 0 ]$!\n" ; my %frequencies ; my $totallength = 0 ; while ( my $line = <$fh> ) {

  chomp $line ;
  next if $line =~ /^$/ ;
  map { $frequencies{ $_ }++ } split( // , $line ) ;
  $totallength += length ( $line ) ;

} close $fh ; my $infocontent = 0 ; for my $letter ( keys %frequencies ) {

  my $content = $frequencies{ $letter } / $totallength ;
  $infocontent += $content * log2( $content ) ;

} $infocontent *= -1 ; say "The information content of the source file is $infocontent !" ;</lang>

Output:
The information content of the source file is 4.6487923749222 !

Perl 6

<lang Perl 6>say log(2) R/ [+] map -> \p { p * -log p }, .bag.values >>/>> +$_

   given slurp($*PROGRAM_NAME).comb</lang>
Output:
4.89351613053006

Python

Works with: Python 3.4

Minor edit to the Entropy answer.

<lang Python>import math from collections import Counter

def e(s):

   p, l = Counter(s), float(len(s))
   return -sum( c/l * math.log(c/l, 2) for c in p.values())

with open('c:/E.py') as f:

   b=f.read()
   

print(e(b))</lang>

Output:
4.5783626637295045

Racket

The entropy of the program below is 4.512678555350348. <lang racket>

  1. lang racket

(require math) (define (log2 x) (/ (log x) (log 2))) (define ds (string->list (file->string "entropy.rkt"))) (define n (length ds)) (- (for/sum ([(d c) (in-hash (samples->hash ds))])

    (* (/ c n) (log2 (/ c n)))))

</lang>

REXX

REXX doesn't have a BIF for LOG or LN,   so the subroutine (function) LOG2 is included herein. <lang rexx>/*REXX program calculates the information entropy for this REXX program.*/ numeric digits 30 /*use thirty digits for precision*/ n=0; @.=0; $$=; $=

             do m=1  for sourceline() /*obtain program source and ──► $*/
             $=$ || sourceline(m)     /*get a sourceLine of this pgm.  */
             end   /*m*/

L=length($)

     do j=1  for L;  _=substr($,j,1)  /*process each character in $ str*/
     if @._==0  then do;   n=n+1      /*if unique,  bump char counter. */
                     $$=$$ || _       /*add this character to the list.*/
                     end
     @._ = @._+1                      /*keep track of this char count. */
     end   /*j*/

sum=0 /*calc info entropy for each char*/

     do i=1  for n;  _=substr($$,i,1) /*obtain a char from unique list.*/
     sum=sum  -  @._/L  * log2(@._/L) /*add (negatively) the entropies.*/
     end   /*i*/

say 'program length: ' L say ' unique chars: ' n ; say say 'the information entropy of the string ──► ' format(sum,,12) " bits." exit /*stick a fork in it, we're done.*/ /*──────────────────────────────────LOG2 subroutine─────────────────────*/ log2: procedure; parse arg x 1 xx; ig= x>1.5; is=1-2*(ig\==1); ii=0 numeric digits digits()+5 /* [↓] precision of E must be > digits().*/ e=2.7182818284590452353602874713526624977572470936999595749669676277240766303535

   do  while  ig & xx>1.5 | \ig&xx<.5;   _=e;     do k=-1;  iz=xx* _**-is
   if k>=0 & (ig & iz<1 | \ig&iz>.5)     then leave;  _=_*_;  izz=iz; end
   xx=izz;  ii=ii+is*2**k;  end;         x=x* e**-ii-1;  z=0;  _=-1;  p=z
     do k=1;  _=-_*x;  z=z+_/k;  if z=p  then leave;  p=z;  end  /*k*/
   r=z+ii;  if arg()==2  then return r;  return r/log2(2,0)</lang>

output

program length:  1827
  unique chars:  69

the information entropy of this REXX program ──►  4.649522956153  bits.

Tcl

Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program. <lang tcl>proc entropy {str} {

   set log2 [expr log(2)]
   foreach char [split $str ""] {dict incr counts $char}
   set entropy 0.0
   foreach count [dict values $counts] {

set freq [expr {$count / double([string length $str])}] set entropy [expr {$entropy - $freq * log($freq)/$log2}]

   }
   return $entropy

}

puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]</lang>

Output:
entropy = 4.59099

zkl

Minor edit to the Entropy answer. <lang zkl>fcn entropy(text){

  text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq }
      .fp1((0).pump(256,List,(0.0).create.fp(0)).copy()))
  .filter()		      // remove all zero entries
  .apply('/(text.len()))     // (num of char)/len
  .apply(fcn(p){-p*p.log()}) // |p*ln(p)|
  .sum(0.0)/(2.0).log();     // sum * ln(e)/ln(2) to convert to log2

}

entropy(File("entropy.zkl").read().text).println();</lang>

Output:
4.8422