Probabilistic choice

Given a mapping between items and their required probability of occurrence, generate a million items randomly subject to the given probabilities and compare the target propability of occurrence versus the generated values.

The total of all the probabilities should equal one. (Because floating point arithmetic is involved this is subject to rounding errors).

Use the following mapping to test your programs:

aleph   1/5.0
beth    1/6.0
gimel   1/7.0
daleth  1/8.0
he      1/9.0
waw     1/10.0
zayin   1/11.0
heth    0.063455988455988432 # adjust so that probabilities add to 1

Python

Two different algorithms are coded. <python> import random

def probchoice(items, probs):

 \
 Splits the interval 0.0-1.0 in proportion to probs
 then finds where each random.random() choice lies
 
 
 prob_accumulator = 0
 accumulator = []
 for p in probs:
   prob_accumulator += p
   accumulator.append(prob_accumulator)
   
 accumZitems = zip(accumulator, items)[:-1]
 last_item = items[-1]
 while True:
   r = random.random()
   for prob_accumulator, item in accumZitems:
     if r <= prob_accumulator:
       yield item
       break
   else:
     # last range handled by else clause
     yield last_item

def probchoice2(items, probs, bincount=10000):

 \
 Puts items in bins in proportion to probs
 then uses random.choice() to select items.
 
 Larger bincount for more memory use but
 higher accuracy (on avarage).
 
 
 prob_accumulator = 0
 bins = []
 for item,prob in zip(items, probs):
   bins += [item]*int(bincount*prob)
 while True:
   yield random.choice(bins)

def tester(func=probchoice, items='good bad ugly'.split(),

                   probs=[0.5, 0.3, 0.2],
                   trials = 100000
                   ):
 def problist2string(probs):
   \
   Turns a list of probabilities into a string
   Also rounds FP values
   
   return ",".join('%8.6f' % (p,) for p in probs)
 
 from collections import defaultdict
  
 counter = defaultdict(int)
 it = func(items, probs)
 for dummy in xrange(trials):
   counter[it.next()] += 1
 print "\n##\n## %s\n##" % func.func_name.upper()  
 print "Trials:              ", trials
 print "Items:               ", ' '.join(items)
 print "Target probability:  ", problist2string(probs)
 print "Attained probability:", problist2string(
   counter[x]/float(trials) for x in items)

if __name__ == '__main__':

 items = 'aleph beth gimel daleth he waw zayin heth'.split()
 probs = [1/(float(n)+5) for n in range(len(k))]
 probs[-1] = 1-sum(probs[:-1])
 tester(probchoice, items, probs, 1000000)
 tester(probchoice2, items, probs, 1000000)</python>

Sample output:

##
## PROBCHOICE
##
Trials:               1000000
Items:                aleph beth gimel daleth he waw zayin heth
Target probability:   0.200000,0.166667,0.142857,0.125000,0.111111,0.100000,0.090909,0.063456
Attained probability: 0.200050,0.167109,0.143364,0.124690,0.111237,0.099661,0.090338,0.063551

##
## PROBCHOICE2
##
Trials:               1000000
Items:                aleph beth gimel daleth he waw zayin heth
Target probability:   0.200000,0.166667,0.142857,0.125000,0.111111,0.100000,0.090909,0.063456
Attained probability: 0.199720,0.166424,0.142474,0.124561,0.111511,0.100313,0.091316,0.063681