Welch's t-test: Difference between revisions

Content added Content deleted

Inline

Revision as of 07:33, 28 May 2015

Given two lists of data, calculate the p-Value used for null hypothesis testing.

C

Works with: C99

This shows how pvalue can be calculated from any two arrays, using Welch's 2-sided t-test, which doesn't assume equal variance. The high "n" value ensures roughly 17 digits convergence with Simpson's integral approximation. <lang C>#include <stdio.h>

include <math.h>

double calculate_pvalue (const double *array1, const size_t array1_size, const double *array2, const size_t array2_size) { double mean1 = 0.0, mean2 = 0.0; for (size_t x = 0; x < array1_size; x++) {//get array1 mean mean1 += array1[x]; } for (size_t x = 0; x < array2_size; x++) { mean2 += array2[x];//get array2 mean } mean1 /= array1_size; mean2 /= array2_size; double variance1 = 0.0, variance2 = 0.0; for (size_t x = 0; x < array1_size; x++) { variance1 += (array1[x]-mean1)*(array1[x]-mean1); } for (size_t x = 0; x < array2_size; x++) { variance2 += (array2[x]-mean2)*(array2[x]-mean2); } if ((mean1 == mean2) && (variance1 == variance2)) { return 1.0;//not worth calculating } variance1 = variance1/(array1_size-1); variance2 = variance2/(array2_size-1); const double WELCH_T_STATISTIC = (mean1-mean2)/sqrt(variance1/array1_size+variance2/array2_size); const double DEGREES_OF_FREEDOM = pow((variance1/array1_size+variance2/array2_size),2.0)//numerator / ( (variance1*variance1)/(array1_size*array1_size*(array1_size-1))+ (variance2*variance2)/(array2_size*array2_size*(array2_size-1)) ); // printf("Welch's t statistic is %lf, degrees of freedom is %lf\n",WELCH_T_STATISTIC,DEGREES_OF_FREEDOM); //modified implementation of Simpson's rule from http://rosettacode.org/wiki/Numerical_integration#C //this function Simpson-integrates from 0 to x, so there isn't any "from" term const double a = DEGREES_OF_FREEDOM/2, x = DEGREES_OF_FREEDOM/(WELCH_T_STATISTIC*WELCH_T_STATISTIC+DEGREES_OF_FREEDOM); const double n = 11000, h = x/n; double sum1 = 0.0, sum2 = 0.0; for(unsigned int i = 0;i < n; i++) { // printf("a = %.2f, b = %.2f => t = %lf, sum1 = %lf\n",a,b,t,sum1);

     sum1 += (pow(h * i + h / 2.0,a-1))*(pow(1-(h * i + h / 2.0),-0.5));

}

  for(unsigned int i = 1;i < n;i++) {
     sum2 += (pow(h * i,a-1))*(pow(1-h * i,-0.5));

} // printf("Sum1 = %lf; sum2 = %lf\n",sum1,sum2); return ((h / 6.0) * ((pow(x,a-1))*(pow(1-x,-0.5)) + 4.0 * sum1 + 2.0 * sum2))/(tgamma(a)*1.77245385090551610/tgamma(a+0.5));//heavily simplified to speed computation }

int main(void) { //examples of how to use function const double dset1[] = {27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4}; const double dset2[] = {27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4}; printf("%.17f\n",calculate_pvalue(dset1,sizeof(dset1)/sizeof(*dset1),dset2,sizeof(dset2)/sizeof(*dset2))); const double dset3[] = {17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8}; const double dset4[] = {21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8}; printf("%.17f\n",calculate_pvalue(dset3,sizeof(dset3)/sizeof(*dset3),dset4,sizeof(dset4)/sizeof(*dset4))); const double dset5[] = {19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0}; const double dset6[] = {28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2}; printf("%.17f\n",calculate_pvalue(dset5,sizeof(dset5)/sizeof(*dset5),dset6,sizeof(dset6)/sizeof(*dset6))); const double dset7[] = {30.02,29.99,30.11,29.97,30.01,29.99}; const double dset8[] = {29.89,29.93,29.72,29.98,30.02,29.98}; printf("%.17f\n",calculate_pvalue(dset7,sizeof(dset7)/sizeof(*dset7),dset8,sizeof(dset8)/sizeof(*dset8)));/**/ const double x[] = {3.0,4.0,1.0,2.1}; const double y[] = {490.2,340.0,433.9}; printf("%.17f\n",calculate_pvalue(x,sizeof(x)/sizeof(*x),y,sizeof(y)/sizeof(*y))); return 0; } </lang>

Racket

Translation of: C

, producing the same output.

<lang racket>#lang racket (require math/statistics math/special-functions)

(define (p-value S1 S2 #:n (n 11000))

 (define σ²1 (variance S1 #:bias #t))
 (define σ²2 (variance S2 #:bias #t))
 (define N1 (sequence-length S1))
 (define N2 (sequence-length S2))
 (define σ²/sz1 (/ σ²1 N1))
 (define σ²/sz2 (/ σ²2 N2))
 
 (define degrees-of-freedom
   (/ (sqr (+ σ²/sz1 σ²/sz2))
      (+ (/ (sqr σ²1) (* (sqr N1) (sub1 N1)))
         (/ (sqr σ²2) (* (sqr N2) (sub1 N2))))))
 
 (define a (/ degrees-of-freedom 2))
 (define a-1 (sub1 a))
 (define x (let ((welch-t-statistic (/ (- (mean S1) (mean S2)) (sqrt (+ σ²/sz1 σ²/sz2)))))
             (/ degrees-of-freedom (+ (sqr welch-t-statistic) degrees-of-freedom))))
 (define h (/ x n))
 
 (/ (* (/ h 6)
       (+ (* (expt x a-1)
             (expt (- 1 x) -1/2))
          (* 4 (for/sum ((i (in-range 0 n)))
                 (* (expt (+ (* h i) (/ h 2)) a-1)
                    (expt (- 1 (+ (* h i) (/ h 2))) -1/2))))
          (* 2  (for/sum ((i (in-range 0 n)))
                  (* (expt (* h i) a-1) (expt (- 1 (* h i)) -1/2))))))
    (* (gamma a) 1.77245385090551610 (/ (gamma (+ a 1/2))))))

(module+ test

 (list
  (p-value (list 27.5 21.0 19.0 23.6 17.0 17.9 16.9 20.1 21.9 22.6 23.1 19.6 19.0 21.7 21.4)
           (list 27.1 22.0 20.8 23.4 23.4 23.5 25.8 22.0 24.8 20.2 21.9 22.1 22.9 20.5 24.4))
  
  (p-value (list 17.2 20.9 22.6 18.1 21.7 21.4 23.5 24.2 14.7 21.8)
           (list 21.5 22.8 21.0 23.0 21.6 23.6 22.5 20.7 23.4 21.8
                 20.7 21.7 21.5 22.5 23.6 21.5 22.5 23.5 21.5 21.8))
  
  (p-value (list 19.8 20.4 19.6 17.8 18.5 18.9 18.3 18.9 19.5 22.0)
           (list 28.2 26.6 20.1 23.3 25.2 22.1 17.7 27.6 20.6 13.7
                 23.2 17.5 20.6 18.0 23.9 21.6 24.3 20.4 24.0 13.2))
  
  (p-value (list 30.02 29.99 30.11 29.97 30.01 29.99)
           (list 29.89 29.93 29.72 29.98 30.02 29.98))
  
  (p-value (list 3.0 4.0 1.0 2.1)
           (list 490.2 340.0 433.9))))</lang>

Output:

(0.021378001462867013 0.14884169660532798 0.035972271029796624 0.09077332428567102 0.01075139991904718)