Cumulative standard deviation

From Rosetta Code
Revision as of 21:48, 16 June 2009 by rosettacode>Paddy3118 (Add Python)
Task
Cumulative standard deviation
You are encouraged to solve this task according to the task description, using any language you may know.

Write a stateful function, class, generator or coroutine that takes a series of floating point numbers, one at a time, and returns the running standard deviation of the series. The task implementation should use the most natural programming style of those listed for the function in the implementation language; the task must state which is being used. Do not apply Bessel's Correction; the returned standard deviation should always be computed as if the sample seen so far is the entire population.

Use this to compute the standard deviation of the demonstration set:

C

Of course this function is not thread-safe nor it can be used to compute the standard deviation just for a set of values per time.

<lang c>#include <stdio.h>

  1. include <stdlib.h>
  2. include <math.h>

enum Action { VALUE, STDDEV, MEAN, VAR, COUNT, RESET }; double stat_object(double v, enum Action action) {

 static double sum = 0.0;
 static double sum2 = 0.0;
 static size_t num = 0;
 double m;
 switch(action) {
 case VALUE:
   num++;
   sum += v;
   sum2 += v*v;
   return stat_object(0.0, STDDEV);
 case STDDEV:
   return sqrt(stat_object(0.0, VAR));
 case MEAN:
   return (num>0) ? sum/(double)num : 0.0;
 case VAR:
   m = stat_object(0.0, MEAN);
   return (num>0) ? (sum2/(double)num - m*m) : 0.0;
 case COUNT:
   return num;
 case RESET:
   sum = sum2 = 0.0; num = 0;
   return 0.0;
 }

}</lang>

<lang c>double v[] = { 2,4,4,4,5,5,7,9 };

int main() {

 int i;
 double sd;
 for(i=0; i < sizeof(v)/sizeof(double) ; i++)
   sd = stat_object(v[i], VALUE);
 printf("standard dev = %lf\n", sd);
 return 0;

}</lang>

Forth

<lang forth>

st-count ( stats -- n ) f@ ;
st-sum ( stats -- sum ) float+ f@ ;
st-sumsq ( stats -- sum*sum ) 2 floats + f@ ;
st-add ( fnum stats -- )
         dup f@    1e f+ dup f!  float+
         dup f@ fover f+ dup f!  float+
 fdup f* dup f@       f+     f! ;
st-mean ( stats -- mean )
 dup st-sum st-count f/ ;
st-variance ( stats -- var )
 dup st-sumsq
 dup st-mean fdup f* dup st-count f*  f-
 st-count f/ ;
st-stddev ( stats -- stddev )
 st-variance fsqrt ;

create stats 0e f, 0e f, 0e f,

2e stats st-add 4e stats st-add 4e stats st-add 4e stats st-add 5e stats st-add 5e stats st-add 7e stats st-add 9e stats st-add

stats st-stddev f. \ 2. </lang>

Fortran

Translation of: C
Works with: Fortran version 95 and later

This one imitates C and suffers the same problems: the function is not thread-safe and must be used to compute the stddev for one set per time.

<lang fortran>program Test_Stddev

 implicit none
 real, dimension(8) :: v = (/ 2,4,4,4,5,5,7,9 /)
 integer :: i
 real :: sd
 do i = 1, size(v)
    sd = stat_object(v(i))
 end do
 print *, "std dev = ", sd

contains

 recursive function stat_object(a, cmd) result(stddev)
   real :: stddev
   real, intent(in) :: a
   character(len=*), intent(in), optional :: cmd
   real, save :: summa = 0.0, summa2 = 0.0
   integer, save :: num = 0
   real :: m
   if ( .not. present(cmd) ) then
      num = num + 1
      summa = summa + a
      summa2 = summa2 + a*a
      stddev = stat_object(0.0, "stddev")
   else
      select case(cmd)
      case("stddev")
         stddev = sqrt(stat_object(0.0, "variance"))
      case("variance")
         m = stat_object(0.0, "mean")
         if ( num > 0 ) then
            stddev = summa2/real(num) - m*m
         else
            stddev = 0.0
         end if
      case("count")
         stddev = real(num)
      case("mean")
         if ( num > 0 ) then
            stddev = summa/real(num)
         else
            stddev = 0.0
         end if
      case("reset")
         summa = 0.0
         summa2 = 0.0
         num = 0
      case default
         stddev = 0.0
      end select
   end if
 end function stat_object

end program Test_Stddev</lang>

Objective-C

<lang objc>#import <stdio.h>

  1. import <math.h>
  2. import <objc/Object.h>

@interface SDAccum : Object {

 double sum, sum2;
 unsigned int num;

} -(id)init; -(double)value: (double)v; -(unsigned int)count; -(double)mean; -(double)variance; -(double)stddev; @end

@implementation SDAccum -(id)init {

 sum = sum2 = 0.0;
 num = 0;
 return self;

} -(double)value: (double)v {

 sum = sum + v;
 sum2 = sum2 + v*v;
 num++;
 return [self stddev];

} -(unsigned int)count {

 return num;

} -(double)mean {

 return (num>0) ? sum/(double)num : 0.0;

} -(double)variance {

 double m = [self mean];
 return (num>0) ? (sum2/(double)num - m*m) : 0.0;

} -(double)stddev {

 return sqrt([self variance]);

} @end</lang>

<lang objc>double v[] = { 2,4,4,4,5,5,7,9 };

int main() {

 int i;
 double sd;
 id sdacc = [SDAccum new];
 for(i=0; i < sizeof(v)/sizeof(double) ; i++)
   sd = [sdacc value: v[i]];
 printf("std dev = %lf\n", sd);
 return 0;

}</lang>

Perl

<lang perl>{

   package SDAccum;
   sub new {

my $class = shift; my $self = {}; $self->{sum} = 0.0; $self->{sum2} = 0.0; $self->{num} = 0; bless $self, $class; return $self;

   }
   sub count {

my $self = shift; return $self->{num};

   }
   sub mean {

my $self = shift; return ($self->{num}>0) ? $self->{sum}/$self->{num} : 0.0;

   }
   sub variance {

my $self = shift; my $m = $self->mean; return ($self->{num}>0) ? $self->{sum2}/$self->{num} - $m * $m : 0.0;

   }
   sub stddev {

my $self = shift; return sqrt($self->variance);

   }
   sub value {

my $self = shift; my $v = shift; $self->{sum} += $v; $self->{sum2} += $v * $v; $self->{num}++; return $self->stddev;

   }

}</lang>

<lang perl>my $sdacc = SDAccum->new; my $sd;

foreach my $v ( 2,4,4,4,5,5,7,9 ) {

   $sd = $sdacc->value($v);

} print "std dev = $sd\n";</lang>

Python

The program should work with Python 2.x and 3.x, although the output would not be a tuple in 3.x <lang python>>>> from math import sqrt >>> def sd(x):

   sd.sum  += x
   sd.sum2 += x*x
   sd.n    += 1.0
   sum, sum2, n = sd.sum, sd.sum2, sd.n
   return sqrt(sum2/n - sum*sum/n/n)

>>> sd.sum = sd.sum2 = sd.n = 0 >>> for value in (2,4,4,4,5,5,7,9):

   print (value, sd(value))


(2, 0.0) (4, 1.0) (4, 0.94280904158206258) (4, 0.8660254037844386) (5, 0.97979589711327075) (5, 1.0) (7, 1.3997084244475311) (9, 2.0) >>> </lang>

REXX

Works with: oorexx

This is indeed Object REXX.

<lang orexx>sdacc = .SDAccum~new x = .array~of(2,4,4,4,5,5,7,9) sd = 0 do i = 1 to x~size

  sd = sdacc~value(x[i])

end

say "std dev = "sd


class SDAccum
method sum attribute
method sum2 attribute
method count attribute
method init
 self~sum = 0.0
 self~sum2 = 0.0
 self~count = 0
method value
 expose sum sum2 count
 parse arg x
 sum = sum + x
 sum2 = sum2 + x*x
 count = count + 1
 return self~stddev
method mean
 expose sum count
 return sum/count
method variance
 expose sum2  count
 m = self~mean
 return sum2/count - m*m
method stddev
 return self~sqrt(self~variance)
method sqrt
 arg n
 if n = 0 then return 0
 ans = n / 2
 prev = n
 do until prev = ans
   prev = ans
   ans = ( prev + ( n / prev ) ) / 2
 end
 return ans</lang>

Ruby

Uses an object to keep state.

"Simplification of the formula [...] for standard deviation [...] can be memorized as taking the square root of (the average of the squares less the square of the average)." [1]

<lang ruby>class StdDevAccumulator

 def initialize
   @n, @sum, @sumofsquares = 0, 0.0, 0.0
 end
 
 def <<(num)
   # return self to make this possible:  sd << 1 << 2 << 3 # => 0.816496580927726
   @n += 1
   @sum += num
   @sumofsquares += num**2
   self
 end
 
 def stddev
   Math.sqrt( (@sumofsquares / @n) - (@sum / @n)**2 )
 end
 
 def to_s
   stddev.to_s
 end

end

sd = StdDevAccumulator.new i = 0 [2,4,4,4,5,5,7,9].each {|n| puts "adding #{n}: stddev of #{i+=1} samples is #{sd << n}" }</lang>

adding 2: stddev of 1 samples is 0.0
adding 4: stddev of 2 samples is 1.0
adding 4: stddev of 3 samples is 0.942809041582063
adding 4: stddev of 4 samples is 0.866025403784439
adding 5: stddev of 5 samples is 0.979795897113272
adding 5: stddev of 6 samples is 1.0
adding 7: stddev of 7 samples is 1.39970842444753
adding 9: stddev of 8 samples is 2.0

Smalltalk

Works with: GNU Smalltalk

<lang smalltalk>Object subclass: SDAccum [

   |sum sum2 num|
   SDAccum class >> new [  |o| 
       o := super basicNew.
       ^ o init.
   ]
   init [ sum := 0. sum2 := 0. num := 0 ]
   value: aValue [ 
     sum := sum + aValue.
     sum2 := sum2 + ( aValue * aValue ).
     num := num + 1.
     ^ self stddev
   ]
   count [ ^ num ]
   mean [ num>0 ifTrue: [^ sum / num] ifFalse: [ ^ 0.0 ] ]
   variance [ |m| m := self mean.
              num>0 ifTrue: [^ (sum2/num) - (m*m) ] ifFalse: [ ^ 0.0 ]
            ]
   stddev [ ^ (self variance) sqrt ] 

].</lang>

<lang smalltalk>|sdacc sd| sdacc := SDAccum new.

  1. ( 2 4 4 4 5 5 7 9 ) do: [ :v | sd := sdacc value: v ].

('std dev = %1' % { sd }) displayNl.</lang>

Tcl

With a Class

Works with: Tcl version 8.6

<lang tcl>oo::class create SDAccum {

   variable sum sum2 num
   constructor {} {
       set sum 0.0
       set sum2 0.0
       set num 0
   }
   method value {x} {
       set sum2 [expr {$sum2 + $x**2}]
       set sum [expr {$sum + $x}]
       incr num
       return [my stddev]
   }
   method count {} {
       return $num
   }
   method mean {} {
       expr {$sum / double($num)}
   }
   method variance {} {
       expr {($sum2 - double($num)*[my mean]**2)/double($num)}
   }
   method stddev {} {
       expr {sqrt([my variance])}
   }

}

  1. Demonstration

set sdacc [SDAccum new] foreach val {2 4 4 4 5 5 7 9} {

   set sd [$sdacc value $val]

} puts "the standard deviation is: $sd"</lang> which produces the output:

the standard deviation is: 2.0