# Diversity prediction theorem

Diversity prediction theorem is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

The wisdom of the crowd is the collective opinion of a group of individuals rather than that of a single expert.

Wisdom-of-the-crowds research routinely attributes the superiority of crowd averages over individual judgments to the elimination of individual noise, an explanation that assumes independence of the individual judgments from each other. Thus the crowd tends to make its best decisions if it is made up of diverse opinions and ideologies.

Scott E. Page introduced the diversity prediction theorem: "The squared error of the collective prediction equals the average squared error minus the predictive diversity". Therefore, when the diversity in a group is large, the error of the crowd is small.

- Average Individual Error: Average of the individual squared errors

- Collective Error: Squared error of the collective prediction

- Prediction Diversity: Average squared distance from the individual predictions to the collective prediction

So, The Diversity Prediction Theorem: Given a crowd of predictive models

Collective Error = Average Individual Error - Prediction Diversity

## C

Accepts inputs from command line, prints out usage on incorrect invocation.

`  #include<string.h>#include<stdlib.h>#include<stdio.h> float mean(float* arr,int size){	int i = 0;	float sum = 0; 	while(i != size)		sum += arr[i++]; 	return sum/size;} float variance(float reference,float* arr, int size){	int i=0;	float* newArr = (float*)malloc(size*sizeof(float)); 	for(;i<size;i++)		newArr[i] = (reference - arr[i])*(reference - arr[i]); 	return mean(newArr,size);} float* extractData(char* str, int *len){	float* arr;	int i=0,count = 1;	char* token; 	while(str[i]!=00){		if(str[i++]==',')			count++;	} 	arr = (float*)malloc(count*sizeof(float));	*len = count; 	token = strtok(str,","); 	i = 0; 	while(token!=NULL){		arr[i++] = atof(token);		token = strtok(NULL,",");	} 	return arr;} int main(int argC,char* argV[]){	float* arr,reference,meanVal;	int len;	if(argC!=3)		printf("Usage : %s <reference value> <observations separated by commas>");	else{		arr = extractData(argV[2],&len); 		reference = atof(argV[1]); 		meanVal = mean(arr,len); 		printf("Average Error : %.9f\n",variance(reference,arr,len));		printf("Crowd Error : %.9f\n",(reference - meanVal)*(reference - meanVal));		printf("Diversity : %.9f",variance(meanVal,arr,len));	} 	return 0;} `

Invocation and Output :

```C:\rosettaCode>diversityTheorem.exe 49 48,47,51
Average Error : 3.000000000
Crowd Error : 0.111110263
Diversity : 2.888888597
C:\rosettaCode>diversityTheorem.exe 49 48,47,51,42
Average Error : 14.500000000
Crowd Error : 4.000000000
Diversity : 10.500000000
```

## C++

` #include <iostream>#include <vector>#include <numeric> float sum(const std::vector<float> &array){    return std::accumulate(array.begin(), array.end(), 0.0);} float square(float x){    return x * x;} float mean(const std::vector<float> &array){    return sum(array) / array.size();} float averageSquareDiff(float a, const std::vector<float> &predictions){    std::vector<float> results;    for (float x : predictions)        results.push_back(square(x - a));    return mean(results);} void diversityTheorem(float truth, const std::vector<float> &predictions){    float average = mean(predictions);    std::cout        << "average-error: " << averageSquareDiff(truth, predictions) << "\n"        << "crowd-error: " << square(truth - average) << "\n"        << "diversity: " << averageSquareDiff(average, predictions) << std::endl;} int main() {    diversityTheorem(49, {48,47,51});    diversityTheorem(49, {48,47,51,42});    return 0;} `
Output:
```average-error: 3
crowd-error: 0.11111
diversity: 2.88889
average-error: 14.5
crowd-error: 4
diversity: 10.5
```

## C#

` using System;using System.Linq;using System.Collections.Generic; public class MainClass {    static double Square(double x) => x * x;     static double AverageSquareDiff(double a, IEnumerable<double> predictions)        => predictions.Select(x => Square(x - a)).Average();     static void DiversityTheorem(double truth, IEnumerable<double> predictions)    {        var average = predictions.Average();        Console.WriteLine(\$@"average-error: {AverageSquareDiff(truth, predictions)}crowd-error: {Square(truth - average)}diversity: {AverageSquareDiff(average, predictions)}");    }     public static void Main() {	DiversityTheorem(49, new []{48d,47,51});    	DiversityTheorem(49, new []{48d,47,51,42});    }}`
Output:
```average-error: 3
crowd-error: 0.11111
diversity: 2.88889
average-error: 14.5
crowd-error: 4
diversity: 10.5
```

## Clojure

John Lawrence Aspden's code posted on Diversity Prediction Theorem.

` (defn diversity-theorem [truth predictions]  (let [square (fn[x] (* x x))        mean (/ (reduce + predictions) (count predictions))        avg-sq-diff (fn[a] (/ (reduce + (for [x predictions] (square (- x a)))) (count predictions)))]    {:average-error (avg-sq-diff truth)     :crowd-error (square (- truth mean))     :diversity (avg-sq-diff mean)})) (println (diversity-theorem 49 '(48 47 51)))(println (diversity-theorem 49 '(48 47 51 42))) `
Output:
```{:average-error 3, :crowd-error 1/9, :diversity 26/9}
{:average-error 29/2, :crowd-error 4, :diversity 21/2}
```

## Go

`package main import "fmt" func averageSquareDiff(f float64, preds []float64) (av float64) {    for _, pred := range preds {        av += (pred - f) * (pred - f)    }    av /= float64(len(preds))    return} func diversityTheorem(truth float64, preds []float64) (float64, float64, float64) {    av := 0.0    for _, pred := range preds {        av += pred    }    av /= float64(len(preds))    avErr := averageSquareDiff(truth, preds)    crowdErr := (truth - av) * (truth - av)    div := averageSquareDiff(av, preds)    return avErr, crowdErr, div} func main() {    predsArray := [2][]float64{{48, 47, 51}, {48, 47, 51, 42}}    truth := 49.0    for _, preds := range predsArray {        avErr, crowdErr, div := diversityTheorem(truth, preds)        fmt.Printf("Average-error : %6.3f\n", avErr)        fmt.Printf("Crowd-error   : %6.3f\n", crowdErr)        fmt.Printf("Diversity     : %6.3f\n\n", div)    }}`
Output:
```Average-error :  3.000
Crowd-error   :  0.111
Diversity     :  2.889

Average-error : 14.500
Crowd-error   :  4.000
Diversity     : 10.500
```

## JavaScript

### ES5

`'use strict'; function sum(array) {    return array.reduce(function (a, b) {        return a + b;    });} function square(x) {    return x * x;} function mean(array) {    return sum(array) / array.length;} function averageSquareDiff(a, predictions) {    return mean(predictions.map(function (x) {        return square(x - a);    }));} function diversityTheorem(truth, predictions) {    var average = mean(predictions);    return {        'average-error': averageSquareDiff(truth, predictions),        'crowd-error': square(truth - average),        'diversity': averageSquareDiff(average, predictions)    };} console.log(diversityTheorem(49, [48,47,51]))console.log(diversityTheorem(49, [48,47,51,42])) `
Output:
```{ 'average-error': 3,
'crowd-error': 0.11111111111111269,
diversity: 2.888888888888889 }
{ 'average-error': 14.5, 'crowd-error': 4, diversity: 10.5 }
```

### ES6

`(() => {    'use strict';     // mean :: Num a => [a] -> b    const mean = xs => {        const lng = xs.length;         return lng > 0 ? (            xs.reduce((a, b) => a + b, 0) / lng        ) : undefined;    }     // meanErrorSquared :: Num a => a -> [a] -> b    const meanErrorSquared = (observed, predictions) =>        mean(predictions.map(x => Math.pow(x - observed, 2)));      // diversityValues :: Num a => a -> [a] ->    //     {mean-Error :: b, crowd-error :: b, diversity :: b}    const diversityValues = (observed, predictions) => {        const predictionMean = mean(predictions);         return {            'mean-error': meanErrorSquared(observed, predictions),            'crowd-error': Math.pow(observed - predictionMean, 2),            'diversity': meanErrorSquared(predictionMean, predictions)        };    }      // TEST     // show :: a -> String    const show = x => JSON.stringify(x, null, 2);     return show([{        observed: 49,        predictions: [48, 47, 51]    }, {        observed: 49,        predictions: [48, 47, 51, 42]    }].map(x => {        const dctData = diversityValues(x.observed, x.predictions),            dct = {};         return (            Object.keys(dctData)            .forEach(k => dct[k] = dctData[k].toPrecision(3)),            dct        );    }));})();`
Output:
```[
{
"mean-error": "3.00",
"crowd-error": "0.111",
"diversity": "2.89"
},
{
"mean-error": "14.5",
"crowd-error": "4.00",
"diversity": "10.5"
}
]```

## Julia

Works with: Julia version 0.6
`function diversitytheorem(truth::T, pred::Vector{T}) where T<:Number    avg = mean(pred)    avgerr = mean((pred .- truth) .^ 2)    crderr = (avg - truth) ^ 2    divers = mean((pred .- avg) .^ 2)    return avgerr, crderr, diversend for (t, s) in [(49, [48, 47, 51]),               (49, [48, 47, 51, 42])]    avgerr, crderr, divers = diversitytheorem(t, s)    println("""    average-error : \$avgerr    crowd-error   : \$crderr    diversity     : \$divers    """)end`
Output:
```average-error : 3.0
crowd-error   : 0.11111111111111269
diversity     : 2.888888888888889

average-error : 14.5
crowd-error   : 4.0
diversity     : 10.5
```

## Kotlin

Translation of: TypeScript
`// version 1.1.4-3 fun square(d: Double) = d * d fun averageSquareDiff(d: Double, predictions: DoubleArray) =     predictions.map { square(it - d) }.average() fun diversityTheorem(truth: Double, predictions: DoubleArray): String {    val average = predictions.average()    val f = "%6.3f"    return "average-error : \${f.format(averageSquareDiff(truth, predictions))}\n" +           "crowd-error   : \${f.format(square(truth - average))}\n" +           "diversity     : \${f.format(averageSquareDiff(average, predictions))}\n"} fun main(args: Array<String>) {    println(diversityTheorem(49.0, doubleArrayOf(48.0, 47.0, 51.0)))    println(diversityTheorem(49.0, doubleArrayOf(48.0, 47.0, 51.0, 42.0)))}`
Output:
```average-error :  3.000
crowd-error   :  0.111
diversity     :  2.889

average-error : 14.500
crowd-error   :  4.000
diversity     : 10.500
```

## Perl

`sub diversity {    my(\$truth, @pred) = @_;    my(\$ae,\$ce,\$cp,\$pd,\$stats);     \$cp += \$_/@pred for @pred;      # collective prediction    \$ae = avg_error(\$truth, @pred); # average individual error    \$ce = (\$cp - \$truth)**2;        # collective error    \$pd = avg_error(\$cp, @pred);    # prediction diversity     my \$fmt = "%13s: %6.3f\n";    \$stats  = sprintf \$fmt, 'average-error', \$ae;    \$stats .= sprintf \$fmt, 'crowd-error',   \$ce;    \$stats .= sprintf \$fmt, 'diversity',     \$pd;} sub avg_error {    my(\$m, @v) = @_;    my(\$avg_err);    \$avg_err += (\$_ - \$m)**2 for @v;    \$avg_err/@v;} print diversity(49, qw<48 47 51>) . "\n";print diversity(49, qw<48 47 51 42>);`
Output:
```average-error:  3.000
crowd-error:  0.111
diversity:  2.889

average-error: 14.500
crowd-error:  4.000
diversity: 10.500```

## Perl 6

`sub diversity-calc(\$truth, @pred) {my \$ae = avg-error(\$truth, @pred); # average individual errormy \$cp = ([+] @pred)/[email protected]pred;       # collective predictionmy \$ce = (\$cp - \$truth)**2;        # collective errormy \$pd = avg-error(\$cp, @pred);    # prediction diversityreturn \$ae, \$ce, \$pd;} sub avg-error (\$m, @v) { ([+] (@v X- \$m) X**2) / +@v } sub diversity-format (@stats) {gather {    for <average-error crowd-error diversity> Z @stats -> (\$label,\$value) {        take \$label.fmt("%13s") ~ ':' ~ \$value.fmt("%7.3f");    }}} .say for diversity-format diversity-calc(49, <48 47 51>);.say for diversity-format diversity-calc(49, <48 47 51 42>);`
Output:
```average-error:  3.000
crowd-error:  0.111
diversity:  2.889
average-error: 14.500
crowd-error:  4.000
diversity: 10.500
```

## Phix

`function mean(sequence s)    return sum(s)/length(s)end function function variance(sequence s, atom d)    return mean(sq_power(sq_sub(s,d),2))end function function diversity_theorem(atom reference, sequence observations)    atom average_error = variance(observations,reference),         average = mean(observations),         crowd_error = power(reference-average,2),         diversity = variance(observations,average)    return {{"average_error",average_error},            {"crowd_error",crowd_error},            {"diversity",diversity}}end function procedure test(atom reference, sequence observations)    sequence res = diversity_theorem(reference, observations)    for i=1 to length(res) do        printf(1," %14s : %g\n",res[i])    end forend proceduretest(49, {48, 47, 51})test(49, {48, 47, 51, 42})`
Output:
```  average_error : 3
crowd_error : 0.111111
diversity : 2.88889
average_error : 14.5
crowd_error : 4
diversity : 10.5
```

## REXX

### version 1

`/* REXX */Numeric Digits 20Call diversityTheorem 49,'48 47 51'Say '--------------------------------------'Call diversityTheorem 49,'48 47 51 42'Exit diversityTheorem:  Parse Arg truth,list  average=average(list)  Say 'average-error='averageSquareDiff(truth,list)  Say 'crowd-error='||(truth-average)**2  Say 'diversity='averageSquareDiff(average,list)  Return average: Procedure  Parse Arg list  res=0  Do i=1 To words(list)    res=res+word(list,i)  /* accumulate list elements */    End  Return res/words(list)  /* return the average */ averageSquareDiff: Procedure  Parse Arg a,list  res=0  Do i=1 To words(list)    x=word(list,i)    res=res+(x-a)**2      /* accumulate square of differences */    End  Return res/words(list)  /* return the average */`
Output:
```average-error=3
crowd-error=0.11111111111111111089
diversity=2.8888888888888888889
--------------------------------------
average-error=14.5
crowd-error=4
diversity=10.5```

### version 2

`/*REXX program calculates:   average error,   crowd error,   and   prediction diversity.*/numeric digits 50                                /*set precision at fifty decimal digits*/call diversity 49,     48  47  51                /*true value,  and  crowd predictions. */call diversity 49,     48  47  51  42            /*  "    "      "     "        "       */exit                                             /*stick a fork in it,  we're all done. *//*──────────────────────────────────────────────────────────────────────────────────────*/avg:   \$=0;    do k=1  for #;   \$=\$ +  word(ests,k)             ;   end;        return \$/#avgSD: \$=0;    do j=1  for #;   \$=\$ + (word(ests,j) - arg(1))**2;   end;        return \$/#/*──────────────────────────────────────────────────────────────────────────────────────*/diversity: parse arg true, ests;  #=words(ests)  /*get args;  count number of estimates.*/           say '   the  true   value: '  true  copies('═', 20)  'crowd estimates: '   ests           avg=avg()                             /* [↓]  avgSD=avg of squared difference*/           say '   the average error: '      format( avgSD(true)   , , 6) / 1           say '   the  crowd  error: '      format( (true-avg)**2 , , 6) / 1           say 'prediction diversity: '      format( avgSD(avg)    , , 6) / 1;  say;   say           return                                /*only show up to 6───┘  decimal digits*/`
output   when using the default inputs:
```   the  true   value:  49 ════════════════════ crowd estimates:  48 47 51
the average error:  3
the  crowd  error:  0.111111
prediction diversity:  2.888889

the  true   value:  49 ════════════════════ crowd estimates:  48 47 51 42
the average error:  14.5
the  crowd  error:  4
prediction diversity:  10.5
```

## Sidef

Translation of: Perl 6
`func avg_error(m, v) {    v.map { (_ - m)**2 }.sum / v.len} func diversity_calc(truth, pred) {    var ae = avg_error(truth, pred)    var cp = pred.sum/pred.len    var ce = (cp - truth)**2    var pd = avg_error(cp, pred)    return [ae, ce, pd]} func diversity_format(stats) {    gather {        for t,v in (%w(average-error crowd-error diversity) ~Z stats) {            take(("%13s" % t) + ':' + ('%7.3f' % v))        }    }} diversity_format(diversity_calc(49, [48, 47, 51])).each{.say}diversity_format(diversity_calc(49, [48, 47, 51, 42])).each{.say}`
Output:
```average-error:  3.000
crowd-error:  0.111
diversity:  2.889
average-error: 14.500
crowd-error:  4.000
diversity: 10.500
```

## TypeScript

` function sum(array: Array<number>): number {    return array.reduce((a, b) => a + b)} function square(x : number) :number {    return x * x} function mean(array: Array<number>): number {    return sum(array) / array.length} function averageSquareDiff(a: number, predictions: Array<number>): number {    return mean(predictions.map(x => square(x - a)))} function diversityTheorem(truth: number, predictions: Array<number>): Object {    const average: number = mean(predictions)    return {        "average-error": averageSquareDiff(truth, predictions),        "crowd-error": square(truth - average),        "diversity": averageSquareDiff(average, predictions)    }} console.log(diversityTheorem(49, [48,47,51]))console.log(diversityTheorem(49, [48,47,51,42])) `
Output:
```{ 'average-error': 3,
'crowd-error': 0.11111111111111269,
diversity: 2.888888888888889 }
{ 'average-error': 14.5, 'crowd-error': 4, diversity: 10.5 }
```

## zkl

Translation of: Sidef
`fcn avgError(m,v){ v.apply('wrap(n){ (n - m).pow(2) }).sum(0.0)/v.len() } fcn diversityCalc(truth,pred){  //(Float,List of Float)   ae,cp := avgError(truth,pred), pred.sum(0.0)/pred.len();   ce,pd := (cp - truth).pow(2),  avgError(cp, pred);   return(ae,ce,pd)} fcn diversityFormat(stats){  // ( (averageError,crowdError,diversity) )   T("average-error","crowd-error","diversity").zip(stats)   .pump(String,Void.Xplode,"%13s :%7.3f\n".fmt)}`
`diversityCalc(49.0, T(48.0,47.0,51.0)) : diversityFormat(_).println();diversityCalc(49.0, T(48.0,47.0,51.0,42.0)) : diversityFormat(_).println();`
Output:
```average-error :  3.000
crowd-error :  0.111
diversity :  2.889

average-error : 14.500
crowd-error :  4.000
diversity : 10.500
```