Welch's t-test: Difference between revisions

From Rosetta Code
Content added Content deleted
(change to draft; change description; link to WP)
Line 4: Line 4:


=={{header|C}}==
=={{header|C}}==
{{works with|C99}}
This shows how pvalue can be calculated from any two arrays, using Welch's 2-sided t-test, which doesn't assume equal variance.
This shows how pvalue can be calculated from any two arrays, using Welch's 2-sided t-test, which doesn't assume equal variance.
The high "n" value ensures roughly 17 digits convergence with Simpson's integral approximation.
The high "n" value ensures roughly 17 digits convergence with Simpson's integral approximation.

Revision as of 21:15, 26 May 2015

Welch's t-test is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Given two lists of data, calculate the p-Value used for null hypothesis testing.

C

Works with: C99

This shows how pvalue can be calculated from any two arrays, using Welch's 2-sided t-test, which doesn't assume equal variance. The high "n" value ensures roughly 17 digits convergence with Simpson's integral approximation. <lang C>#include <stdio.h>

  1. include <math.h>

double calculate_pvalue (const double *array1, const size_t array1_size, const double *array2, const size_t array2_size) { double mean1 = 0.0, mean2 = 0.0; // printf("array1:"); for (size_t x = 0; x < array1_size; x++) { mean1 += array1[x]; // printf("\t%.1f",array1[x]); // printf("array[%d] = %lf\n",x,array1[x]); } // printf("\narray2:"); for (size_t x = 0; x < array2_size; x++) { mean2 += array2[x]; // printf("\t%.1f",array2[x]); // printf("array[%d] = %lf\n",x,array2[x]); } mean1 /= array1_size; mean2 /= array2_size; double variance1 = 0.0, variance2 = 0.0; for (size_t x = 0; x < array1_size; x++) { variance1 += (array1[x]-mean1)*(array1[x]-mean1); } for (size_t x = 0; x < array2_size; x++) { variance2 += (array2[x]-mean2)*(array2[x]-mean2); } if ((mean1 == mean2) && (variance1 == variance2)) { return 1701.1864;//not worth calculating } variance1 = variance1/(array1_size-1); variance2 = variance2/(array2_size-1); const double WELCH_T_STATISTIC = (mean1-mean2)/sqrt(variance1/array1_size+variance2/array2_size); const double DEGREES_OF_FREEDOM = pow((variance1/array1_size+variance2/array2_size),2.0)//numerator / ( (variance1*variance1)/(array1_size*array1_size*(array1_size-1))+ (variance2*variance2)/(array2_size*array2_size*(array2_size-1)) ); // printf("Welch's t statistic is %lf, degrees of freedom is %lf\n",WELCH_T_STATISTIC,DEGREES_OF_FREEDOM); //modified implementation of Simpson's rule from http://rosettacode.org/wiki/Numerical_integration#C //this function Simpson-integrates from 0 to x, so there isn't any "from" term const double a = DEGREES_OF_FREEDOM/2, x = DEGREES_OF_FREEDOM/(WELCH_T_STATISTIC*WELCH_T_STATISTIC+DEGREES_OF_FREEDOM); const double n = 11000, h = x/n; double sum1 = 0.0, sum2 = 0.0; for(unsigned int i = 0;i < n; i++) { // printf("a = %.2f, b = %.2f => t = %lf, sum1 = %lf\n",a,b,t,sum1);

     sum1 += (pow(h * i + h / 2.0,a-1))*(pow(1-(h * i + h / 2.0),-0.5));

}

  for(unsigned int i = 1;i < n;i++) {
     sum2 += (pow(h * i,a-1))*(pow(1-h * i,-0.5));

} // printf("Sum1 = %lf; sum2 = %lf\n",sum1,sum2); return ((h / 6.0) * ((pow(x,a-1))*(pow(1-x,-0.5)) + 4.0 * sum1 + 2.0 * sum2))/(tgamma(a)*1.77245385090551610/tgamma(a+0.5));//heavily simplified to speed computation }

int main(void) { //examples of how to use function const double dset1[] = {27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4}; const double dset2[] = {27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4}; printf("%.17f\n",calculate_pvalue(dset1,sizeof(dset1)/sizeof(*dset1),dset2,sizeof(dset2)/sizeof(*dset2))); const double dset3[] = {17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8}; const double dset4[] = {21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8}; printf("%.17f\n",calculate_pvalue(dset3,sizeof(dset3)/sizeof(*dset3),dset4,sizeof(dset4)/sizeof(*dset4))); const double dset5[] = {19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0}; const double dset6[] = {28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2}; printf("%.17f\n",calculate_pvalue(dset5,sizeof(dset5)/sizeof(*dset5),dset6,sizeof(dset6)/sizeof(*dset6))); const double dset7[] = {30.02,29.99,30.11,29.97,30.01,29.99}; const double dset8[] = {29.89,29.93,29.72,29.98,30.02,29.98}; printf("%.17f\n",calculate_pvalue(dset7,sizeof(dset7)/sizeof(*dset7),dset8,sizeof(dset8)/sizeof(*dset8)));/**/ const double x[] = {3.0,4.0,1.0,2.1}; const double y[] = {490.2,340.0,433.9}; printf("%.17f\n",calculate_pvalue(x,sizeof(x)/sizeof(*x),y,sizeof(y)/sizeof(*y))); return 0; } </lang>