Average loop length

Let f be a randomly chosen mapping from the numbers 1..N to the numbers 1..N. At some point, the sequence 1, f(1), f(f(1))... will contain a repetition, a number that occurring for the second time in the sequence.

Write a program or a script that estimates, for each N, the average length until the first such repetition.

Also calculate this expected length using an analytical formula, and optionally compare the simulated result with the theoretical one.

This problem comes from the end of Donald Knuth's Christmas tree lecture 2011.

Example of expected output:

 N    average    analytical    (error)
===  =========  ============  =========
  1     1.0000        1.0000  (  0.00%)
  2     1.4992        1.5000  (  0.05%)
  3     1.8784        1.8889  (  0.56%)
  4     2.2316        2.2188  (  0.58%)
  5     2.4982        2.5104  (  0.49%)
  6     2.7897        2.7747  (  0.54%)
  7     3.0153        3.0181  (  0.09%)
  8     3.2429        3.2450  (  0.07%)
  9     3.4536        3.4583  (  0.14%)
 10     3.6649        3.6602  (  0.13%)
 11     3.8091        3.8524  (  1.12%)
 12     3.9986        4.0361  (  0.93%)
 13     4.2074        4.2123  (  0.12%)
 14     4.3711        4.3820  (  0.25%)
 15     4.5275        4.5458  (  0.40%)
 16     4.6755        4.7043  (  0.61%)
 17     4.8877        4.8579  (  0.61%)
 18     4.9951        5.0071  (  0.24%)
 19     5.1312        5.1522  (  0.41%)
 20     5.2699        5.2936  (  0.45%)

C

<lang c>#include <stdio.h>

include <stdlib.h>
include <math.h>
include <time.h>

define MAX_N 20
define TIMES 1000000

double factorial(int n) { double f = 1; int i; for (i = 1; i <= n; i++) f *= i; return f; }

double expected(int n) { double sum = 0; int i; for (i = 1; i <= n; i++) sum += factorial(n) / pow(n, i) / factorial(n - i); return sum; }

int randint(int n) { int r, rmax = RAND_MAX / n * n; while ((r = rand()) >= rmax); return r / (RAND_MAX / n); }

int test(int n, int times) { int i, count = 0; for (i = 0; i < times; i++) { int x = 1, bits = 0; while (!(bits & x)) { count++; bits |= x; x = 1 << randint(n); } } return count; }

int main(void) { srand(time(0)); puts(" n\tavg\texp.\tdiff\n-------------------------------");

int n; for (n = 1; n <= MAX_N; n++) { int cnt = test(n, TIMES); double avg = (double)cnt / TIMES; double theory = expected(n); double diff = (avg / theory - 1) * 100; printf("%2d %8.4f %8.4f %6.3f%%\n", n, avg, theory, diff); } return 0; }</lang>

Output:

 n      avg     exp.    diff
-------------------------------
 1   1.0000   1.0000  0.000%
 2   1.4998   1.5000 -0.015%
 3   1.8879   1.8889 -0.051%
 4   2.2181   2.2188 -0.029%
 5   2.5107   2.5104  0.012%
 6   2.7741   2.7747 -0.021%
 7   3.0168   3.0181 -0.044%
 8   3.2455   3.2450  0.014%
 9   3.4591   3.4583  0.023%
10   3.6596   3.6602 -0.017%
11   3.8519   3.8524 -0.013%
12   4.0384   4.0361  0.059%
13   4.2106   4.2123 -0.042%
14   4.3840   4.3820  0.044%
15   4.5449   4.5458 -0.020%
16   4.7058   4.7043  0.033%
17   4.8549   4.8579 -0.060%
18   5.0084   5.0071  0.026%
19   5.1479   5.1522 -0.084%
20   5.2957   5.2936  0.040%

D

Translation of: Perl 6

<lang d>import std.stdio, std.random, std.math, std.algorithm, std.range;

real analytical(in int n) /*pure nothrow*/ {

   real total = 0.0;
   foreach (immutable k; 1 .. n + 1) {
       immutable x = reduce!q{a * b}(1.0L, iota(n - k + 1, n + 1))
                     * k ^^ 2;
       total += x / ((cast(real)n) ^^ (k + 1));
   }
   return total;

}

size_t loopLength(size_t maxN)(in int size, ref Xorshift rng) {

   __gshared bool[maxN + 1] seen;
   seen[0 .. size + 1] = false;
   int current = 1;
   size_t steps = 0;
   while (!seen[current]) {
       seen[current] = true;
       current = uniform(1, size + 1, rng);
       steps++;
   }
   return steps;

}

void main() {

   enum maxN  = 40;
   enum nTrials = 300_000;
   auto rng = Xorshift(unpredictableSeed);
   writeln(" n    average    analytical     (error)");
   writeln("===  =========  ============  ==========");

   foreach (immutable n; 1 .. maxN + 1) {
       long total = 0;
       foreach (immutable _; 0 .. nTrials)
           total += loopLength!maxN(n, rng);
       immutable average = total / cast(real)nTrials;
       immutable an = analytical(n);
       immutable percentError = abs(an - average) / an * 100;
       immutable errorS = format("%2.4f", percentError);
       writefln("%3d  %9.5f  %12.5f  (%7s%%)",
                n, average, an, errorS);
   }

}</lang>

Output:

 n    average    analytical     (error)
===  =========  ============  ==========
  1    1.00000       1.00000  ( 0.0000%)
  2    1.50017       1.50000  ( 0.0111%)
  3    1.88932       1.88889  ( 0.0226%)
  4    2.21795       2.21875  ( 0.0362%)
  5    2.51159       2.51040  ( 0.0474%)
  6    2.77373       2.77469  ( 0.0345%)
  7    3.01894       3.01814  ( 0.0264%)
  8    3.24734       3.24502  ( 0.0716%)
  9    3.45876       3.45832  ( 0.0127%)
 10    3.66595       3.66022  ( 0.1567%)
 11    3.85000       3.85237  ( 0.0616%)
 12    4.03532       4.03607  ( 0.0187%)
 13    4.20879       4.21235  ( 0.0843%)
 14    4.37664       4.38203  ( 0.1230%)
 15    4.54986       4.54581  ( 0.0892%)
 16    4.70431       4.70426  ( 0.0010%)
 17    4.85640       4.85787  ( 0.0302%)
 18    5.01359       5.00706  ( 0.1303%)
 19    5.15487       5.15220  ( 0.0519%)
 20    5.29486       5.29358  ( 0.0241%)
 21    5.43276       5.43150  ( 0.0231%)
 22    5.56570       5.56620  ( 0.0088%)
 23    5.70611       5.69788  ( 0.1443%)
 24    5.82618       5.82675  ( 0.0098%)
 25    5.94846       5.95298  ( 0.0759%)
 26    6.07440       6.07672  ( 0.0381%)
 27    6.20717       6.19811  ( 0.1461%)
 28    6.31546       6.31729  ( 0.0290%)
 29    6.44201       6.43437  ( 0.1187%)
 30    6.54592       6.54946  ( 0.0540%)
 31    6.65818       6.66265  ( 0.0671%)
 32    6.77215       6.77405  ( 0.0279%)
 33    6.88381       6.88372  ( 0.0013%)
 34    6.99790       6.99175  ( 0.0880%)
 35    7.10990       7.09820  ( 0.1648%)
 36    7.20391       7.20316  ( 0.0104%)
 37    7.30085       7.30667  ( 0.0796%)
 38    7.40366       7.40880  ( 0.0693%)
 39    7.51864       7.50959  ( 0.1204%)
 40    7.60255       7.60911  ( 0.0863%)

Mathematica

<lang mathematica>Grid@Prepend[

 Table[{n, #1, #2, 
     Row[{Round[10000 Abs[#1 - #2]/#2]/100., "%"}]} &@
   N[{Mean[Array[
       Length@NestWhileList[#, 1, UnsameQ[##] &, All] - 1 &[# /. 
           MapIndexed[#21 -> #1 &, 
            RandomInteger[{1, n}, n]] &] &, 10000]], 
     Sum[n! n^(n - k - 1)/(n - k)!, {k, n}]/n^(n - 1)}, 5], {n, 1, 
   20}], {"N", "average", "analytical", "error"}]</lang>

Output:

N	average	analytical	error
1	1.0000	1.0000		0.%
2	1.5017	1.5000		0.11%
3	1.8910	1.8889		0.11%
4	2.2334	2.2188		0.66%
5	2.5090	2.5104		0.06%
6	2.8092	2.7747		1.24%
7	3.0468	3.0181		0.95%
8	3.2253	3.2450		0.61%
9	3.4695	3.4583		0.32%
10	3.6661	3.6602		0.16%
11	3.8662	3.8524		0.36%
12	4.0393	4.0361		0.08%
13	4.2232	4.2123		0.26%
14	4.3496	4.3820		0.74%
15	4.5706	4.5458		0.55%
16	4.6963	4.7043		0.17%
17	4.8548	4.8579		0.06%
18	5.0671	5.0071		1.2%
19	5.1702	5.1522		0.35%
20	5.2264	5.2936		1.27%

Perl 6

Runs on Rakudo Warszawa (2012.12).

<lang perl6>constant MAX_N = 20; constant TRIALS = 100;

for 1 .. MAX_N -> $N {

   my $empiric = TRIALS R/ [+] find-loop(random-mapping($N)).elems xx TRIALS;
   my $theoric = [+]
       map -> $k { $N ** ($k + 1) R/ [*] $k**2, $N - $k + 1 .. $N }, 1 .. $N;

   FIRST say " N    empiric      theoric      (error)";
   FIRST say "===  =========  ============  =========";

   printf "%3d  %9.4f  %12.4f    (%4.2f%%)\n",
           $N,  $empiric,
                       $theoric, 100 * abs($theoric - $empiric) / $theoric;

}

sub random-mapping($size) { hash .list Z=> .roll($size) given ^$size } sub find-loop(%mapping) {

   my %seen;
   gather loop (
       my $current = 0;
       !%seen{$current}++;
       $current = %mapping{$current}
   ) { take $current }

}</lang>

Example:

 N    empiric      theoric      (error)
===  =========  ============  =========
  1     1.0000        1.0000    (0.00%)
  2     1.5600        1.5000    (4.00%)
  3     1.7800        1.8889    (5.76%)
  4     2.1800        2.2188    (1.75%)
  5     2.6200        2.5104    (4.37%)
  6     2.8300        2.7747    (1.99%)
  7     3.1200        3.0181    (3.37%)
  8     3.1400        3.2450    (3.24%)
  9     3.4500        3.4583    (0.24%)
 10     3.6700        3.6602    (0.27%)
 11     3.8300        3.8524    (0.58%)
 12     4.3600        4.0361    (8.03%)
 13     3.9000        4.2123    (7.42%)
 14     4.4900        4.3820    (2.46%)
 15     4.9500        4.5458    (8.89%)
 16     4.9800        4.7043    (5.86%)
 17     4.9100        4.8579    (1.07%)
 18     4.9700        5.0071    (0.74%)
 19     5.1000        5.1522    (1.01%)
 20     5.2300        5.2936    (1.20%)

Python

Translation of: C

<lang python>from __future__ import division from math import factorial from random import randint

MAX_N = 20 TIMES = 1000000

def analytical(n): return sum(factorial(n) / pow(n, i) / factorial(n -i) for i in range(1, n+1))

def test(n, times):

   count = 0
   for i in range(times):
       x, bits = 1, 0
       while not (bits & x):
           count += 1
           bits |= x
           x = 1 << randint(0, n-1)
   return count / times

if __name__ == '__main__':

   print(" n\tavg\texp.\tdiff\n-------------------------------")
   for n in range(1, MAX_N+1):
       avg = test(n, TIMES)
       theory = analytical(n)
       diff = (avg / theory - 1) * 100
       print("%2d %8.4f %8.4f %6.3f%%" % (n, avg, theory, diff))</lang>

Output:

 n	avg	exp.	diff
-------------------------------
 1   1.0000   1.0000  0.000%
 2   1.5006   1.5000  0.037%
 3   1.8887   1.8889 -0.012%
 4   2.2190   2.2188  0.011%
 5   2.5101   2.5104 -0.012%
 6   2.7750   2.7747  0.012%
 7   3.0158   3.0181 -0.076%
 8   3.2447   3.2450 -0.009%
 9   3.4586   3.4583  0.009%
10   3.6598   3.6602 -0.010%
11   3.8510   3.8524 -0.036%
12   4.0368   4.0361  0.017%
13   4.2099   4.2123 -0.058%
14   4.3784   4.3820 -0.083%
15   4.5484   4.5458  0.058%
16   4.7045   4.7043  0.006%
17   4.8611   4.8579  0.067%
18   5.0074   5.0071  0.007%
19   5.1534   5.1522  0.024%
20   5.2927   5.2936 -0.017%