Average loop length
You are encouraged to solve this task according to the task description, using any language you may know.
Let f
be a uniformly-randomly chosen mapping from the numbers 1..N to the numbers 1..N (note: not necessarily a permutation of 1..N; the mapping could produce a number in more than one way or not at all). At some point, the sequence 1, f(1), f(f(1))...
will contain a repetition, a number that occurring for the second time in the sequence.
Write a program or a script that estimates, for each N
, the average length until the first such repetition.
Also calculate this expected length using an analytical formula, and optionally compare the simulated result with the theoretical one.
This problem comes from the end of Donald Knuth's Christmas tree lecture 2011.
Example of expected output:
N average analytical (error) === ========= ============ ========= 1 1.0000 1.0000 ( 0.00%) 2 1.4992 1.5000 ( 0.05%) 3 1.8784 1.8889 ( 0.56%) 4 2.2316 2.2188 ( 0.58%) 5 2.4982 2.5104 ( 0.49%) 6 2.7897 2.7747 ( 0.54%) 7 3.0153 3.0181 ( 0.09%) 8 3.2429 3.2450 ( 0.07%) 9 3.4536 3.4583 ( 0.14%) 10 3.6649 3.6602 ( 0.13%) 11 3.8091 3.8524 ( 1.12%) 12 3.9986 4.0361 ( 0.93%) 13 4.2074 4.2123 ( 0.12%) 14 4.3711 4.3820 ( 0.25%) 15 4.5275 4.5458 ( 0.40%) 16 4.6755 4.7043 ( 0.61%) 17 4.8877 4.8579 ( 0.61%) 18 4.9951 5.0071 ( 0.24%) 19 5.1312 5.1522 ( 0.41%) 20 5.2699 5.2936 ( 0.45%)
Ada
<lang Ada>with Ada.Text_IO; use Ada.Text_IO; with Ada.Numerics.Generic_Elementary_Functions; with Ada.Numerics.Discrete_Random; procedure Avglen is
package IIO is new Ada.Text_IO.Integer_IO (Positive); use IIO; package LFIO is new Ada.Text_IO.Float_IO (Long_Float); use LFIO; subtype FactN is Natural range 0..20; TESTS : constant Natural := 1_000_000;
function Factorial (N : FactN) return Long_Float is Result : Long_Float := 1.0; begin for I in 2..N loop Result := Result * Long_Float(I); end loop; return Result; end Factorial;
function Analytical (N : FactN) return Long_Float is Sum : Long_Float := 0.0; begin for I in 1..N loop Sum := Sum + Factorial(N) / Factorial(N - I) / Long_Float(N)**I; end loop; return Sum; end Analytical;
function Experimental (N : FactN) return Long_Float is subtype RandInt is Natural range 1..N; package Random is new Ada.Numerics.Discrete_Random(RandInt); seed : Random.Generator; Num : RandInt; count : Natural := 0; bits : array(RandInt'Range) of Boolean; begin Random.Reset(seed); for run in 1..TESTS loop bits := (others => false); for I in RandInt'Range loop Num := Random.Random(seed); exit when bits(Num); bits(Num) := True; count := count + 1; end loop; end loop; return Long_Float(count)/Long_Float(TESTS); end Experimental;
A, E, err : Long_Float;
begin
Put_Line(" N avg calc %diff"); for I in 1..20 loop A := Analytical(I); E := Experimental(I); err := abs(E-A)/A*100.0; Put(I, Width=>2); Put(E ,Aft=>4, exp=>0); Put(A, Aft=>4, exp=>0); Put(err, Fore=>3, Aft=>3, exp=>0); New_line; end loop;
end Avglen;</lang>
- Output:
N avg calc %diff 1 1.0000 1.0000 0.000 2 1.5000 1.5000 0.003 3 1.8886 1.8889 0.015 4 2.2180 2.2188 0.033 5 2.5104 2.5104 0.000 6 2.7745 2.7747 0.006 7 3.0191 3.0181 0.033 8 3.2433 3.2450 0.052 9 3.4583 3.4583 0.001 10 3.6597 3.6602 0.015 11 3.8524 3.8524 0.001 12 4.0352 4.0361 0.022 13 4.2147 4.2123 0.055 14 4.3853 4.3820 0.075 15 4.5453 4.5458 0.011 16 4.7055 4.7043 0.027 17 4.8592 4.8579 0.028 18 5.0062 5.0071 0.017 19 5.1535 5.1522 0.025 20 5.2955 5.2936 0.035
C
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <math.h>
- include <time.h>
- define MAX_N 20
- define TIMES 1000000
double factorial(int n) { double f = 1; int i; for (i = 1; i <= n; i++) f *= i; return f; }
double expected(int n) { double sum = 0; int i; for (i = 1; i <= n; i++) sum += factorial(n) / pow(n, i) / factorial(n - i); return sum; }
int randint(int n) { int r, rmax = RAND_MAX / n * n; while ((r = rand()) >= rmax); return r / (RAND_MAX / n); }
int test(int n, int times) { int i, count = 0; for (i = 0; i < times; i++) { int x = 1, bits = 0; while (!(bits & x)) { count++; bits |= x; x = 1 << randint(n); } } return count; }
int main(void) { srand(time(0)); puts(" n\tavg\texp.\tdiff\n-------------------------------");
int n; for (n = 1; n <= MAX_N; n++) { int cnt = test(n, TIMES); double avg = (double)cnt / TIMES; double theory = expected(n); double diff = (avg / theory - 1) * 100; printf("%2d %8.4f %8.4f %6.3f%%\n", n, avg, theory, diff); } return 0; }</lang>
- Output:
n avg exp. diff ------------------------------- 1 1.0000 1.0000 0.000% 2 1.4998 1.5000 -0.015% 3 1.8879 1.8889 -0.051% 4 2.2181 2.2188 -0.029% 5 2.5107 2.5104 0.012% 6 2.7741 2.7747 -0.021% 7 3.0168 3.0181 -0.044% 8 3.2455 3.2450 0.014% 9 3.4591 3.4583 0.023% 10 3.6596 3.6602 -0.017% 11 3.8519 3.8524 -0.013% 12 4.0384 4.0361 0.059% 13 4.2106 4.2123 -0.042% 14 4.3840 4.3820 0.044% 15 4.5449 4.5458 -0.020% 16 4.7058 4.7043 0.033% 17 4.8549 4.8579 -0.060% 18 5.0084 5.0071 0.026% 19 5.1479 5.1522 -0.084% 20 5.2957 5.2936 0.040%
D
<lang d>import std.stdio, std.random, std.math, std.algorithm, std.range;
real analytical(in int n) /*pure nothrow*/ {
real total = 0.0; foreach (immutable k; 1 .. n + 1) { immutable x = reduce!q{a * b}(1.0L, iota(n - k + 1, n + 1)) * k ^^ 2; total += x / ((cast(real)n) ^^ (k + 1)); } return total;
}
size_t loopLength(size_t maxN)(in int size, ref Xorshift rng) {
__gshared bool[maxN + 1] seen; seen[0 .. size + 1] = false; int current = 1; size_t steps = 0; while (!seen[current]) { seen[current] = true; current = uniform(1, size + 1, rng); steps++; } return steps;
}
void main() {
enum maxN = 40; enum nTrials = 300_000; auto rng = Xorshift(unpredictableSeed); writeln(" n average analytical (error)"); writeln("=== ========= ============ ==========");
foreach (immutable n; 1 .. maxN + 1) { long total = 0; foreach (immutable _; 0 .. nTrials) total += loopLength!maxN(n, rng); immutable average = total / cast(real)nTrials; immutable an = analytical(n); immutable percentError = abs(an - average) / an * 100; immutable errorS = format("%2.4f", percentError); writefln("%3d %9.5f %12.5f (%7s%%)", n, average, an, errorS); }
}</lang>
- Output:
n average analytical (error) === ========= ============ ========== 1 1.00000 1.00000 ( 0.0000%) 2 1.50017 1.50000 ( 0.0111%) 3 1.88932 1.88889 ( 0.0226%) 4 2.21795 2.21875 ( 0.0362%) 5 2.51159 2.51040 ( 0.0474%) 6 2.77373 2.77469 ( 0.0345%) 7 3.01894 3.01814 ( 0.0264%) 8 3.24734 3.24502 ( 0.0716%) 9 3.45876 3.45832 ( 0.0127%) 10 3.66595 3.66022 ( 0.1567%) 11 3.85000 3.85237 ( 0.0616%) 12 4.03532 4.03607 ( 0.0187%) 13 4.20879 4.21235 ( 0.0843%) 14 4.37664 4.38203 ( 0.1230%) 15 4.54986 4.54581 ( 0.0892%) 16 4.70431 4.70426 ( 0.0010%) 17 4.85640 4.85787 ( 0.0302%) 18 5.01359 5.00706 ( 0.1303%) 19 5.15487 5.15220 ( 0.0519%) 20 5.29486 5.29358 ( 0.0241%) 21 5.43276 5.43150 ( 0.0231%) 22 5.56570 5.56620 ( 0.0088%) 23 5.70611 5.69788 ( 0.1443%) 24 5.82618 5.82675 ( 0.0098%) 25 5.94846 5.95298 ( 0.0759%) 26 6.07440 6.07672 ( 0.0381%) 27 6.20717 6.19811 ( 0.1461%) 28 6.31546 6.31729 ( 0.0290%) 29 6.44201 6.43437 ( 0.1187%) 30 6.54592 6.54946 ( 0.0540%) 31 6.65818 6.66265 ( 0.0671%) 32 6.77215 6.77405 ( 0.0279%) 33 6.88381 6.88372 ( 0.0013%) 34 6.99790 6.99175 ( 0.0880%) 35 7.10990 7.09820 ( 0.1648%) 36 7.20391 7.20316 ( 0.0104%) 37 7.30085 7.30667 ( 0.0796%) 38 7.40366 7.40880 ( 0.0693%) 39 7.51864 7.50959 ( 0.1204%) 40 7.60255 7.60911 ( 0.0863%)
J
First, let's consider an exact, brute force approach.
Since J array indices start at 0, we'll work with 0..N-1 instead of 1..N, dealing with the difference at the boundaries.
We can implement f as {&LIST where LIST is an arbitrary list of N numbers, each picked independently from the range 0..(N-1). We can incrementally build the described sequence using (, f@{:) - here we extend the sequence by applying f to the last element of the sequence. Since we are only concerned with the sequence up to the point of the first repeat, we can select the unique values giving us (~.@, f@{:). This routine stops changing when we reach the desired length, so we can repeatedly apply it forever. For example:
<lang J> (~.@, {&0 0@{:)^:_] 0 0
(~.@, {&0 0@{:)^:_] 1
1 0</lang>
Once we have the sequence, we can count how many elements are in it.
<lang J> 0 0 ([: # (] ~.@, {:@] { [)^:_) 1 2</lang>
Meanwhile, we can also generate all possible values of 1..N by counting out N^N values and breaking out the result as a base N list of digits.
<lang J> (#.inv i.@^~)2 0 0 0 1 1 0 1 1</lang>
All that's left is to count the lengths of all possible sequences for all possible distinct instances of f and average the results:
<lang J> (+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)1 1
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)2
1.5
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)3
1.88889
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)4
2.21875
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)5
2.5104
(+/ % #)@,@((#.inv i.@^~) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)6
2.77469</lang>
Meanwhile the analytic solution (derived by reading the Ada implementation) looks like this:
<lang J> ana=: +/@(!@[ % !@- * ^) 1+i.
ana"0]1 2 3 4 5 6
1 1.5 1.88889 2.21875 2.5104 2.77469</lang>
To get our simulation, we can take the exact approach and replace the part that generates all possible values for f with a random mechanism. Since the task does not specify how long to run the simulation, and to make this change easy, we'll use N*1e4 tests.
<lang J> sim=: (+/ % #)@,@((]?@$~1e4,]) ([: # (] ~.@, {:@] { [)^:_)"1 0/ i.)
sim"0]1 2 3 4 5 6
1 1.5034 1.8825 2.22447 2.51298 2.76898</lang>
The simulation approach is noticeably slower than the analytic approach, while being less accurate.
Finally, we can generate our desired results:
<lang J> (;:'N average analytic error'),:,.each(;ana"0 ([;];-|@%[) sim"0)1+i.20 +--+-------+--------+-----------+ |N |average|analytic|error | +--+-------+--------+-----------+ | 1| 1| 1 | 0| | 2| 1.5|1.49955 | 0.0003| | 3|1.88889| 1.8928 | 0.00207059| | 4|2.21875|2.23082 | 0.00544225| | 5| 2.5104|2.52146 | 0.00440567| | 6|2.77469|2.78147 | 0.00244182| | 7|3.01814| 3.0101 | 0.00266346| | 8|3.24502|3.25931 | 0.00440506| | 9|3.45832|3.45314 | 0.00149532| |10|3.66022| 3.6708 | 0.00289172| |11|3.85237|3.84139 | 0.00285049| |12|4.03607|4.03252 |0.000881304| |13|4.21235|4.18358 | 0.00682833| |14|4.38203|4.38791 | 0.00134132| |15|4.54581|4.54443 |0.000302246| |16|4.70426|4.71351 | 0.00196721| |17|4.85787|4.85838 |0.000104089| |18|5.00706|5.00889 |0.000365752| |19| 5.1522|5.14785 |0.000843052| |20|5.29358|5.28587 | 0.00145829| +--+-------+--------+-----------+</lang>
Here, error is the difference between the two values divided by the analytic value.
Mathematica
<lang mathematica>Grid@Prepend[
Table[{n, #1, #2, Row[{Round[10000 Abs[#1 - #2]/#2]/100., "%"}]} &@ N[{Mean[Array[ Length@NestWhileList[#, 1, UnsameQ[##] &, All] - 1 &[# /. MapIndexed[#21 -> #1 &, RandomInteger[{1, n}, n]] &] &, 10000]], Sum[n! n^(n - k - 1)/(n - k)!, {k, n}]/n^(n - 1)}, 5], {n, 1, 20}], {"N", "average", "analytical", "error"}]</lang>
- Output:
N average analytical error 1 1.0000 1.0000 0.% 2 1.5017 1.5000 0.11% 3 1.8910 1.8889 0.11% 4 2.2334 2.2188 0.66% 5 2.5090 2.5104 0.06% 6 2.8092 2.7747 1.24% 7 3.0468 3.0181 0.95% 8 3.2253 3.2450 0.61% 9 3.4695 3.4583 0.32% 10 3.6661 3.6602 0.16% 11 3.8662 3.8524 0.36% 12 4.0393 4.0361 0.08% 13 4.2232 4.2123 0.26% 14 4.3496 4.3820 0.74% 15 4.5706 4.5458 0.55% 16 4.6963 4.7043 0.17% 17 4.8548 4.8579 0.06% 18 5.0671 5.0071 1.2% 19 5.1702 5.1522 0.35% 20 5.2264 5.2936 1.27%
Perl 6
Runs on Rakudo Warszawa (2012.12).
<lang perl6>constant MAX_N = 20; constant TRIALS = 100;
for 1 .. MAX_N -> $N {
my $empiric = TRIALS R/ [+] find-loop(random-mapping($N)).elems xx TRIALS; my $theoric = [+] map -> $k { $N ** ($k + 1) R/ [*] $k**2, $N - $k + 1 .. $N }, 1 .. $N; FIRST say " N empiric theoric (error)"; FIRST say "=== ========= ============ ========="; printf "%3d %9.4f %12.4f (%4.2f%%)\n", $N, $empiric, $theoric, 100 * abs($theoric - $empiric) / $theoric;
}
sub random-mapping { hash .list Z=> .roll given ^$^size } sub find-loop { 0, %^mapping{*} ...^ { (state %){$_}++ } }</lang>
Example:
N empiric theoric (error) === ========= ============ ========= 1 1.0000 1.0000 (0.00%) 2 1.5600 1.5000 (4.00%) 3 1.7800 1.8889 (5.76%) 4 2.1800 2.2188 (1.75%) 5 2.6200 2.5104 (4.37%) 6 2.8300 2.7747 (1.99%) 7 3.1200 3.0181 (3.37%) 8 3.1400 3.2450 (3.24%) 9 3.4500 3.4583 (0.24%) 10 3.6700 3.6602 (0.27%) 11 3.8300 3.8524 (0.58%) 12 4.3600 4.0361 (8.03%) 13 3.9000 4.2123 (7.42%) 14 4.4900 4.3820 (2.46%) 15 4.9500 4.5458 (8.89%) 16 4.9800 4.7043 (5.86%) 17 4.9100 4.8579 (1.07%) 18 4.9700 5.0071 (0.74%) 19 5.1000 5.1522 (1.01%) 20 5.2300 5.2936 (1.20%)
Python
<lang python>from __future__ import division # Only necessary for Python 2.X from math import factorial from random import randrange
MAX_N = 20 TIMES = 1000000
def analytical(n): return sum(factorial(n) / pow(n, i) / factorial(n -i) for i in range(1, n+1))
def test(n, times):
count = 0 for i in range(times): x, bits = 1, 0 while not (bits & x): count += 1 bits |= x x = 1 << randrange(n) return count / times
if __name__ == '__main__':
print(" n\tavg\texp.\tdiff\n-------------------------------") for n in range(1, MAX_N+1): avg = test(n, TIMES) theory = analytical(n) diff = (avg / theory - 1) * 100 print("%2d %8.4f %8.4f %6.3f%%" % (n, avg, theory, diff))</lang>
- Output:
n avg exp. diff ------------------------------- 1 1.0000 1.0000 0.000% 2 1.5006 1.5000 0.037% 3 1.8887 1.8889 -0.012% 4 2.2190 2.2188 0.011% 5 2.5101 2.5104 -0.012% 6 2.7750 2.7747 0.012% 7 3.0158 3.0181 -0.076% 8 3.2447 3.2450 -0.009% 9 3.4586 3.4583 0.009% 10 3.6598 3.6602 -0.010% 11 3.8510 3.8524 -0.036% 12 4.0368 4.0361 0.017% 13 4.2099 4.2123 -0.058% 14 4.3784 4.3820 -0.083% 15 4.5484 4.5458 0.058% 16 4.7045 4.7043 0.006% 17 4.8611 4.8579 0.067% 18 5.0074 5.0071 0.007% 19 5.1534 5.1522 0.024% 20 5.2927 5.2936 -0.017%
Unicon
<lang unicon> link printf, factors
$define MAX_N 20 $define TIMES 1000000 $define RAND_MAX 2147483647
procedure expected(n)
local sum := 0 every i := 1 to n do sum +:= factorial(n) / (n ^ i) / factorial(n - i) return sum
end
procedure test(n, times)
local i, count := 0, x, bits every i := 0 to times-1 do {
x := 1 bits := 0 while iand(bits, x)=0 do {
count +:= 1 bits := ior(bits, x) x := ishift(1 , ?n-1) } } return count
end
procedure main(void)
local n, cnt, avg, theory, diff write(" n\tavg\texp.\tdiff\n", repl("-",29)) every n := 1 to MAX_N do { cnt := test(n, TIMES) avg := real(cnt) / TIMES theory := expected(n) diff := (avg / theory - 1) * 100 printf("%2d %8.4r %8.4r %6.3r%%\n", n, avg, theory, diff) } return 0
end</lang>
- Output:
n avg exp. diff ----------------------------- 1 1.0000 1.0000 0.000% 2 1.5008 1.5000 0.056% 3 1.8879 1.8889 -0.051% 4 2.2208 2.2188 0.091% 5 2.5127 2.5104 0.093% 6 2.7759 2.7747 0.044% 7 3.0175 3.0181 -0.023% 8 3.2425 3.2450 -0.079% 9 3.4571 3.4583 -0.034% 10 3.6613 3.6602 0.029% 11 3.8493 3.8524 -0.081% 12 4.0384 4.0361 0.058% 13 4.2133 4.2123 0.023% 14 4.3804 4.3820 -0.037% 15 4.5475 4.5458 0.038% 16 4.7049 4.7043 0.014% 17 4.8575 4.8579 -0.008% 18 5.0088 5.0071 0.035% 19 5.1533 5.1522 0.021% 20 5.2893 5.2936 -0.081%