Average loop length
Let f
be a uniformly-randomly chosen mapping from the numbers 1..N to the numbers 1..N (note: not necessarily a permutation of 1..N; the mapping could produce a number in more than one way or not at all). At some point, the sequence 1, f(1), f(f(1))...
will contain a repetition, a number that occurring for the second time in the sequence.
You are encouraged to solve this task according to the task description, using any language you may know.
Write a program or a script that estimates, for each N
, the average length until the first such repetition.
Also calculate this expected length using an analytical formula, and optionally compare the simulated result with the theoretical one.
This problem comes from the end of Donald Knuth's Christmas tree lecture 2011.
Example of expected output:
N average analytical (error) === ========= ============ ========= 1 1.0000 1.0000 ( 0.00%) 2 1.4992 1.5000 ( 0.05%) 3 1.8784 1.8889 ( 0.56%) 4 2.2316 2.2188 ( 0.58%) 5 2.4982 2.5104 ( 0.49%) 6 2.7897 2.7747 ( 0.54%) 7 3.0153 3.0181 ( 0.09%) 8 3.2429 3.2450 ( 0.07%) 9 3.4536 3.4583 ( 0.14%) 10 3.6649 3.6602 ( 0.13%) 11 3.8091 3.8524 ( 1.12%) 12 3.9986 4.0361 ( 0.93%) 13 4.2074 4.2123 ( 0.12%) 14 4.3711 4.3820 ( 0.25%) 15 4.5275 4.5458 ( 0.40%) 16 4.6755 4.7043 ( 0.61%) 17 4.8877 4.8579 ( 0.61%) 18 4.9951 5.0071 ( 0.24%) 19 5.1312 5.1522 ( 0.41%) 20 5.2699 5.2936 ( 0.45%)
Ada
<lang Ada>with Ada.Text_IO; use Ada.Text_IO; with Ada.Numerics.Generic_Elementary_Functions; with Ada.Numerics.Discrete_Random; procedure Avglen is
package IIO is new Ada.Text_IO.Integer_IO (Positive); use IIO; package LFIO is new Ada.Text_IO.Float_IO (Long_Float); use LFIO; subtype FactN is Natural range 0..20; TESTS : constant Natural := 1_000_000;
function Factorial (N : FactN) return Long_Float is Result : Long_Float := 1.0; begin for I in 2..N loop Result := Result * Long_Float(I); end loop; return Result; end Factorial;
function Analytical (N : FactN) return Long_Float is Sum : Long_Float := 0.0; begin for I in 1..N loop Sum := Sum + Factorial(N) / Factorial(N - I) / Long_Float(N)**I; end loop; return Sum; end Analytical;
function Experimental (N : FactN) return Long_Float is subtype RandInt is Natural range 1..N; package Random is new Ada.Numerics.Discrete_Random(RandInt); seed : Random.Generator; Num : RandInt; count : Natural := 0; bits : array(RandInt'Range) of Boolean; begin Random.Reset(seed); for run in 1..TESTS loop bits := (others => false); for I in RandInt'Range loop Num := Random.Random(seed); exit when bits(Num); bits(Num) := True; count := count + 1; end loop; end loop; return Long_Float(count)/Long_Float(TESTS); end Experimental;
A, E, err : Long_Float;
begin
Put_Line(" N avg calc %diff"); for I in 1..20 loop A := Analytical(I); E := Experimental(I); err := abs(E-A)/A*100.0; Put(I, Width=>2); Put(E ,Aft=>4, exp=>0); Put(A, Aft=>4, exp=>0); Put(err, Fore=>3, Aft=>3, exp=>0); New_line; end loop;
end Avglen;</lang>
- Output:
N avg calc %diff 1 1.0000 1.0000 0.000 2 1.5000 1.5000 0.003 3 1.8886 1.8889 0.015 4 2.2180 2.2188 0.033 5 2.5104 2.5104 0.000 6 2.7745 2.7747 0.006 7 3.0191 3.0181 0.033 8 3.2433 3.2450 0.052 9 3.4583 3.4583 0.001 10 3.6597 3.6602 0.015 11 3.8524 3.8524 0.001 12 4.0352 4.0361 0.022 13 4.2147 4.2123 0.055 14 4.3853 4.3820 0.075 15 4.5453 4.5458 0.011 16 4.7055 4.7043 0.027 17 4.8592 4.8579 0.028 18 5.0062 5.0071 0.017 19 5.1535 5.1522 0.025 20 5.2955 5.2936 0.035
C
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <math.h>
- include <time.h>
- define MAX_N 20
- define TIMES 1000000
double factorial(int n) { double f = 1; int i; for (i = 1; i <= n; i++) f *= i; return f; }
double expected(int n) { double sum = 0; int i; for (i = 1; i <= n; i++) sum += factorial(n) / pow(n, i) / factorial(n - i); return sum; }
int randint(int n) { int r, rmax = RAND_MAX / n * n; while ((r = rand()) >= rmax); return r / (RAND_MAX / n); }
int test(int n, int times) { int i, count = 0; for (i = 0; i < times; i++) { int x = 1, bits = 0; while (!(bits & x)) { count++; bits |= x; x = 1 << randint(n); } } return count; }
int main(void) { srand(time(0)); puts(" n\tavg\texp.\tdiff\n-------------------------------");
int n; for (n = 1; n <= MAX_N; n++) { int cnt = test(n, TIMES); double avg = (double)cnt / TIMES; double theory = expected(n); double diff = (avg / theory - 1) * 100; printf("%2d %8.4f %8.4f %6.3f%%\n", n, avg, theory, diff); } return 0; }</lang>
- Output:
n avg exp. diff ------------------------------- 1 1.0000 1.0000 0.000% 2 1.4998 1.5000 -0.015% 3 1.8879 1.8889 -0.051% 4 2.2181 2.2188 -0.029% 5 2.5107 2.5104 0.012% 6 2.7741 2.7747 -0.021% 7 3.0168 3.0181 -0.044% 8 3.2455 3.2450 0.014% 9 3.4591 3.4583 0.023% 10 3.6596 3.6602 -0.017% 11 3.8519 3.8524 -0.013% 12 4.0384 4.0361 0.059% 13 4.2106 4.2123 -0.042% 14 4.3840 4.3820 0.044% 15 4.5449 4.5458 -0.020% 16 4.7058 4.7043 0.033% 17 4.8549 4.8579 -0.060% 18 5.0084 5.0071 0.026% 19 5.1479 5.1522 -0.084% 20 5.2957 5.2936 0.040%
D
<lang d>import std.stdio, std.random, std.math, std.algorithm, std.range;
real analytical(in int n) /*pure nothrow*/ {
real total = 0.0; foreach (immutable k; 1 .. n + 1) { immutable x = reduce!q{a * b}(1.0L, iota(n - k + 1, n + 1)) * k ^^ 2; total += x / ((cast(real)n) ^^ (k + 1)); } return total;
}
size_t loopLength(size_t maxN)(in int size, ref Xorshift rng) {
__gshared bool[maxN + 1] seen; seen[0 .. size + 1] = false; int current = 1; size_t steps = 0; while (!seen[current]) { seen[current] = true; current = uniform(1, size + 1, rng); steps++; } return steps;
}
void main() {
enum maxN = 40; enum nTrials = 300_000; auto rng = Xorshift(unpredictableSeed); writeln(" n average analytical (error)"); writeln("=== ========= ============ ==========");
foreach (immutable n; 1 .. maxN + 1) { long total = 0; foreach (immutable _; 0 .. nTrials) total += loopLength!maxN(n, rng); immutable average = total / cast(real)nTrials; immutable an = analytical(n); immutable percentError = abs(an - average) / an * 100; immutable errorS = format("%2.4f", percentError); writefln("%3d %9.5f %12.5f (%7s%%)", n, average, an, errorS); }
}</lang>
- Output:
n average analytical (error) === ========= ============ ========== 1 1.00000 1.00000 ( 0.0000%) 2 1.50017 1.50000 ( 0.0111%) 3 1.88932 1.88889 ( 0.0226%) 4 2.21795 2.21875 ( 0.0362%) 5 2.51159 2.51040 ( 0.0474%) 6 2.77373 2.77469 ( 0.0345%) 7 3.01894 3.01814 ( 0.0264%) 8 3.24734 3.24502 ( 0.0716%) 9 3.45876 3.45832 ( 0.0127%) 10 3.66595 3.66022 ( 0.1567%) 11 3.85000 3.85237 ( 0.0616%) 12 4.03532 4.03607 ( 0.0187%) 13 4.20879 4.21235 ( 0.0843%) 14 4.37664 4.38203 ( 0.1230%) 15 4.54986 4.54581 ( 0.0892%) 16 4.70431 4.70426 ( 0.0010%) 17 4.85640 4.85787 ( 0.0302%) 18 5.01359 5.00706 ( 0.1303%) 19 5.15487 5.15220 ( 0.0519%) 20 5.29486 5.29358 ( 0.0241%) 21 5.43276 5.43150 ( 0.0231%) 22 5.56570 5.56620 ( 0.0088%) 23 5.70611 5.69788 ( 0.1443%) 24 5.82618 5.82675 ( 0.0098%) 25 5.94846 5.95298 ( 0.0759%) 26 6.07440 6.07672 ( 0.0381%) 27 6.20717 6.19811 ( 0.1461%) 28 6.31546 6.31729 ( 0.0290%) 29 6.44201 6.43437 ( 0.1187%) 30 6.54592 6.54946 ( 0.0540%) 31 6.65818 6.66265 ( 0.0671%) 32 6.77215 6.77405 ( 0.0279%) 33 6.88381 6.88372 ( 0.0013%) 34 6.99790 6.99175 ( 0.0880%) 35 7.10990 7.09820 ( 0.1648%) 36 7.20391 7.20316 ( 0.0104%) 37 7.30085 7.30667 ( 0.0796%) 38 7.40366 7.40880 ( 0.0693%) 39 7.51864 7.50959 ( 0.1204%) 40 7.60255 7.60911 ( 0.0863%)
J
First, let's consider an exact, brute force approach.
Since J array indices start at 0, we'll work with 0..N-1 instead of 1..N, dealing with the difference at the boundaries.
We can implement f as {&LIST where LIST is an arbitrary list of N numbers, each picked independently from the range 0..(N-1). We can incrementally build the described sequence using (, f@{:) - here we extend the sequence by applying f to the last element of the sequence. Since we are only concerned with the sequence up to the point of the first repeat, we can select the unique values giving us (~.@, f@{:). This routine stops changing when we reach the desired length, so we can repeatedly apply it forever. For example:
<lang J> (~.@, {&0 0@{:)^:_] 0 0
(~.@, {&0 0@{:)^:_] 1
1 0</lang>
Once we have the sequence, we can count how many elements are in it.
<lang J> 0 0 ([: # (] ~.@, {:@] { [)^:_) 1 2</lang>
Meanwhile, we can also generate all possible values of 1..N by counting out N^N values and breaking out the result as a base N list of digits.
<lang J> (#.inv i.@^~)2 0 0 0 1 1 0 1 1</lang>
All that's left is to count the lengths of all possible sequences for all possible distinct instances of f and average the results:
<lang J> (+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)1 1
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)2
1.5
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)3
1.88889
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)4
2.21875
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)5
2.5104
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)6
2.77469</lang>
Meanwhile the analytic solution (derived by reading the Ada implementation) looks like this:
<lang J> ana=: +/@(!@[ % !@- * ^) 1+i.
ana"0]1 2 3 4 5 6
1 1.5 1.88889 2.21875 2.5104 2.77469</lang>
To get our simulation, we can take the exact approach and replace the part that generates all possible values for f with a random mechanism. Since the task does not specify how long to run the simulation, and to make this change easy, we'll use N*1e4 tests.
<lang J> sim=: (+/ % #)@,@((]?@$~1e4,]) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)
sim"0]1 2 3 4 5 6
1 1.5034 1.8825 2.22447 2.51298 2.76898</lang>
The simulation approach is noticeably slower than the analytic approach, while being less accurate.
Finally, we can generate our desired results:
<lang J> (;:'N average analytic error'),:,.each(;ana"0 ([;];-|@%[) sim"0)1+i.20 +--+-------+--------+-----------+ |N |average|analytic|error | +--+-------+--------+-----------+ | 1| 1| 1 | 0| | 2| 1.5|1.49955 | 0.0003| | 3|1.88889| 1.8928 | 0.00207059| | 4|2.21875|2.23082 | 0.00544225| | 5| 2.5104|2.52146 | 0.00440567| | 6|2.77469|2.78147 | 0.00244182| | 7|3.01814| 3.0101 | 0.00266346| | 8|3.24502|3.25931 | 0.00440506| | 9|3.45832|3.45314 | 0.00149532| |10|3.66022| 3.6708 | 0.00289172| |11|3.85237|3.84139 | 0.00285049| |12|4.03607|4.03252 |0.000881304| |13|4.21235|4.18358 | 0.00682833| |14|4.38203|4.38791 | 0.00134132| |15|4.54581|4.54443 |0.000302246| |16|4.70426|4.71351 | 0.00196721| |17|4.85787|4.85838 |0.000104089| |18|5.00706|5.00889 |0.000365752| |19| 5.1522|5.14785 |0.000843052| |20|5.29358|5.28587 | 0.00145829| +--+-------+--------+-----------+</lang>
Here, error is the difference between the two values divided by the analytic value.
Mathematica
<lang mathematica>Grid@Prepend[
Table[{n, #1, #2, Row[{Round[10000 Abs[#1 - #2]/#2]/100., "%"}]} &@ N[{Mean[Array[ Length@NestWhileList[#, 1, UnsameQ[##] &, All] - 1 &[# /. MapIndexed[#21 -> #1 &, RandomInteger[{1, n}, n]] &] &, 10000]], Sum[n! n^(n - k - 1)/(n - k)!, {k, n}]/n^(n - 1)}, 5], {n, 1, 20}], {"N", "average", "analytical", "error"}]</lang>
- Output:
N average analytical error 1 1.0000 1.0000 0.% 2 1.5017 1.5000 0.11% 3 1.8910 1.8889 0.11% 4 2.2334 2.2188 0.66% 5 2.5090 2.5104 0.06% 6 2.8092 2.7747 1.24% 7 3.0468 3.0181 0.95% 8 3.2253 3.2450 0.61% 9 3.4695 3.4583 0.32% 10 3.6661 3.6602 0.16% 11 3.8662 3.8524 0.36% 12 4.0393 4.0361 0.08% 13 4.2232 4.2123 0.26% 14 4.3496 4.3820 0.74% 15 4.5706 4.5458 0.55% 16 4.6963 4.7043 0.17% 17 4.8548 4.8579 0.06% 18 5.0671 5.0071 1.2% 19 5.1702 5.1522 0.35% 20 5.2264 5.2936 1.27%
Perl 6
Runs on Rakudo Warszawa (2012.12).
<lang perl6>constant MAX_N = 20; constant TRIALS = 100;
for 1 .. MAX_N -> $N {
my $empiric = TRIALS R/ [+] find-loop(random-mapping($N)).elems xx TRIALS; my $theoric = [+] map -> $k { $N ** ($k + 1) R/ [*] $k**2, $N - $k + 1 .. $N }, 1 .. $N; FIRST say " N empiric theoric (error)"; FIRST say "=== ========= ============ ========="; printf "%3d %9.4f %12.4f (%4.2f%%)\n", $N, $empiric, $theoric, 100 * abs($theoric - $empiric) / $theoric;
}
sub random-mapping { hash .list Z=> .roll given ^$^size } sub find-loop { 0, %^mapping{*} ...^ { (state %){$_}++ } }</lang>
Example:
N empiric theoric (error) === ========= ============ ========= 1 1.0000 1.0000 (0.00%) 2 1.5600 1.5000 (4.00%) 3 1.7800 1.8889 (5.76%) 4 2.1800 2.2188 (1.75%) 5 2.6200 2.5104 (4.37%) 6 2.8300 2.7747 (1.99%) 7 3.1200 3.0181 (3.37%) 8 3.1400 3.2450 (3.24%) 9 3.4500 3.4583 (0.24%) 10 3.6700 3.6602 (0.27%) 11 3.8300 3.8524 (0.58%) 12 4.3600 4.0361 (8.03%) 13 3.9000 4.2123 (7.42%) 14 4.4900 4.3820 (2.46%) 15 4.9500 4.5458 (8.89%) 16 4.9800 4.7043 (5.86%) 17 4.9100 4.8579 (1.07%) 18 4.9700 5.0071 (0.74%) 19 5.1000 5.1522 (1.01%) 20 5.2300 5.2936 (1.20%)
Python
<lang python>from __future__ import division # Only necessary for Python 2.X from math import factorial from random import randrange
MAX_N = 20 TIMES = 1000000
def analytical(n): return sum(factorial(n) / pow(n, i) / factorial(n -i) for i in range(1, n+1))
def test(n, times):
count = 0 for i in range(times): x, bits = 1, 0 while not (bits & x): count += 1 bits |= x x = 1 << randrange(n) return count / times
if __name__ == '__main__':
print(" n\tavg\texp.\tdiff\n-------------------------------") for n in range(1, MAX_N+1): avg = test(n, TIMES) theory = analytical(n) diff = (avg / theory - 1) * 100 print("%2d %8.4f %8.4f %6.3f%%" % (n, avg, theory, diff))</lang>
- Output:
n avg exp. diff ------------------------------- 1 1.0000 1.0000 0.000% 2 1.5006 1.5000 0.037% 3 1.8887 1.8889 -0.012% 4 2.2190 2.2188 0.011% 5 2.5101 2.5104 -0.012% 6 2.7750 2.7747 0.012% 7 3.0158 3.0181 -0.076% 8 3.2447 3.2450 -0.009% 9 3.4586 3.4583 0.009% 10 3.6598 3.6602 -0.010% 11 3.8510 3.8524 -0.036% 12 4.0368 4.0361 0.017% 13 4.2099 4.2123 -0.058% 14 4.3784 4.3820 -0.083% 15 4.5484 4.5458 0.058% 16 4.7045 4.7043 0.006% 17 4.8611 4.8579 0.067% 18 5.0074 5.0071 0.007% 19 5.1534 5.1522 0.024% 20 5.2927 5.2936 -0.017%
REXX
<lang rexx>/*REXX program to read a config file and assign VARs as found within. */ numeric digits 10000 /*be able to calculate !(runs). */ parse arg runs tests seed . if runs ==',' | runs == then runs = 40 /*num of runs. */ if tests ==',' | tests == then tests = 1000000 /*num of trials.*/ if seed\==',' & seed\== then call random ,,seed /*repeatability?*/ numeric digits max(9,length(!(runs))) /*set NUMERIC digits for !(runs).*/ say right( runs, 24) 'runs' /*display # of runs we're using*/ say right( tests, 24) 'tests' /* " " " tests " " */ say right( digits(), 24) 'digits' /* " " " digits " " */ say say ' N average exact % error' /*headers & pad.*/ h= ' ─── ───────── ───────── ─────────'; say h; pad=left(,3)
do #=1 for runs; ##=right(#,9) /*## is used for indenting output*/ a= format(exact(#) ,,4) /*use 4 digits past decimal point*/ e= format(exper(#) ,,4) /* " " " " " " */ err= format(abs(e-a)*100/a ,,4) /* " " " " " " */ if err=0 then err=err/1 /*present a clean & concise zero.*/ say ## pad e pad a pad err /*display a line of statistics. */ end /*#*/
say h /*display the final header bar. */ exit /*stick a fork in it, we're done.*/ /*──────────────────────────────────! subroutine────────────────────────*/ !: procedure; !=1; do j=2 to arg(1); !=!*j; end; return ! /*──────────────────────────────────EXACT subroutine────────────────────*/ exact: parse arg x; s=0; do j=1 for x; s=s+!(x)/!(x-j)/x**j; end; return s /*──────────────────────────────────EXPER subroutine────────────────────*/ exper: parse arg n k=0; do tests; !.=0 /*repeat TESTS times, reset found*/
!.=0 /*stemmed array: expected results*/ do n; r=random(1,n); if !.r then leave !.r=1; k=k+1 /*bump the ctr. */ end /*n*/ end /*tests*/
return k/tests </lang> output when using the default input
40 runs 1000000 tests 48 digits N average exact % error ─── ───────── ───────── ───────── 1 1.0000 1.0000 0 2 1.4964 1.5000 0.2400 3 1.8876 1.8889 0.0688 4 2.2222 2.2188 0.1532 5 2.5104 2.5104 0 6 2.7758 2.7747 0.0396 7 3.0194 3.0181 0.0431 8 3.2608 3.2450 0.4869 9 3.4565 3.4583 0.0520 10 3.6583 3.6602 0.0519 11 3.8513 3.8524 0.0286 12 4.0401 4.0361 0.0991 13 4.2133 4.2123 0.0237 14 4.3835 4.3820 0.0342 15 4.5445 4.5458 0.0286 16 4.6672 4.7043 0.7886 17 4.8575 4.8579 0.0082 18 5.0105 5.0071 0.0679 19 5.1517 5.1522 0.0097 20 5.2903 5.2936 0.0623 21 5.4328 5.4315 0.0239 22 5.5674 5.5662 0.0216 23 5.6990 5.6979 0.0193 24 5.8353 5.8268 0.1459 25 5.9536 5.9530 0.0101 26 6.0801 6.0767 0.0560 27 6.1997 6.1981 0.0258 28 6.3197 6.3173 0.0380 29 6.4328 6.4344 0.0249 30 6.5485 6.5495 0.0153 31 6.6615 6.6627 0.0180 32 6.7102 6.7740 0.9418 33 6.8826 6.8837 0.0160 34 6.9878 6.9917 0.0558 35 7.0996 7.0982 0.0197 36 7.2054 7.2032 0.0305 37 7.3073 7.3067 0.0082 38 7.4089 7.4088 0.0013 39 7.5052 7.5096 0.0586 40 7.6151 7.6091 0.0789 ─── ───────── ───────── ─────────
Unicon
<lang unicon> link printf, factors
$define MAX_N 20 $define TIMES 1000000 $define RAND_MAX 2147483647
procedure expected(n)
local sum := 0 every i := 1 to n do sum +:= factorial(n) / (n ^ i) / factorial(n - i) return sum
end
procedure test(n, times)
local i, count := 0, x, bits every i := 0 to times-1 do {
x := 1 bits := 0 while iand(bits, x)=0 do {
count +:= 1 bits := ior(bits, x) x := ishift(1 , ?n-1) } } return count
end
procedure main(void)
local n, cnt, avg, theory, diff write(" n\tavg\texp.\tdiff\n", repl("-",29)) every n := 1 to MAX_N do { cnt := test(n, TIMES) avg := real(cnt) / TIMES theory := expected(n) diff := (avg / theory - 1) * 100 printf("%2d %8.4r %8.4r %6.3r%%\n", n, avg, theory, diff) } return 0
end</lang>
- Output:
n avg exp. diff ----------------------------- 1 1.0000 1.0000 0.000% 2 1.5008 1.5000 0.056% 3 1.8879 1.8889 -0.051% 4 2.2208 2.2188 0.091% 5 2.5127 2.5104 0.093% 6 2.7759 2.7747 0.044% 7 3.0175 3.0181 -0.023% 8 3.2425 3.2450 -0.079% 9 3.4571 3.4583 -0.034% 10 3.6613 3.6602 0.029% 11 3.8493 3.8524 -0.081% 12 4.0384 4.0361 0.058% 13 4.2133 4.2123 0.023% 14 4.3804 4.3820 -0.037% 15 4.5475 4.5458 0.038% 16 4.7049 4.7043 0.014% 17 4.8575 4.8579 -0.008% 18 5.0088 5.0071 0.035% 19 5.1533 5.1522 0.021% 20 5.2893 5.2936 -0.081%