Percolation/Mean cluster density

Let $c$ be a 2D boolean square matrix of $n\times n$ values of either 1 or 0 where the probability of any value being 1 is $p$ , (and of 0 is therefore $1-p$ ). We define a cluster of 1's as being a group of 1's connected vertically or horizontally (i.e., using the Von Neumann neighborhood rule) and bounded by either $0$ or by the limits of the matrix. Let the number of such clusters in such a randomly constructed matrix be $C_{n}$ .

Percolation theory states that $K(p)$ (the mean cluster density) will satisfy $K(p)=C_{n}/n^{2}$ as $n$ tends to infinity. For $p=0.5$ , $K(p)$ is found numerically to approximate $0.065770$ ...

Task

Show the effect of varying $n$ on the accuracy of simulated $K(p)$ for $p=0.5$ and for values of $n$ up to at least $1000$ . Any calculation of $C_{n}$ for finite $n$ is subject to randomness, so an approximation should be computed as the average of $t$ runs, where $t$ ≥ $5$ .

For extra credit, graphically show clusters in a $15\times 15$ , $p=0.5$ grid.

Show your output here.

See also

s-Cluster on Wolfram mathworld.

C

<lang c>#include <stdio.h>

include <stdlib.h>

int *map, w, ww;

void make_map(double p) { int i, thresh = RAND_MAX * p; i = ww = w * w;

map = realloc(map, i * sizeof(int)); while (i--) map[i] = -(rand() < thresh); }

char alpha[] = "+.ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";

define ALEN ((int)(sizeof(alpha) - 3))

void show_cluster(void) { int i, j, *s = map;

for (i = 0; i < w; i++) { for (j = 0; j < w; j++, s++) printf(" %c", *s < ALEN ? alpha[1 + *s] : '?'); putchar('\n'); } }

void recur(int x, int v) { if (x >= 0 && x < ww && map[x] == -1) { map[x] = v; recur(x - w, v); recur(x - 1, v); recur(x + 1, v); recur(x + w, v); } }

int count_clusters(void) { int i, cls;

for (cls = i = 0; i < ww; i++) { if (-1 != map[i]) continue; recur(i, ++cls); }

return cls; }

double tests(int n, double p) { int i; double k;

for (k = i = 0; i < n; i++) { make_map(p); k += (double)count_clusters() / ww; } return k / n; }

int main(void) { w = 15; make_map(.5); printf("width=15, p=0.5, %d clusters:\n", count_clusters()); show_cluster();

printf("\np=0.5, iter=5:\n"); for (w = 1<<2; w <= 1<<14; w<<=2) printf("%5d %9.6f\n", w, tests(5, .5));

free(map); return 0; }</lang>

Output:

width=15, p=0.5, 23 clusters:
 A . . . B . C C C C . D . E .
 A . . B B . . . . . . . . . .
 A . . . . . F . . G . H . . I
 . . J J J . . K K . L . M . I
 . J J . . . K K K K . M M . .
 . . . . K K . K . K . M . N .
 O O . K K K . K . . . . N N N
 . O O . K K K K K . P . N . .
 Q . . K K . . . K . P . . . .
 . R . K K . . K K . P . . S .
 . . K K . . . . K . P . . . K
 K K K K K . . K K . . T . . K
 K . K . . . U . K . . T . . .
 K . K K K K . K K K . T . . .
 . . K . K . V . K K . . . W .

p=0.5, iter=5:
    4  0.125000
   16  0.083594
   64  0.064453
  256  0.066864
 1024  0.065922
 4096  0.065836
16384  0.065774

D

Translation of: python

<lang d>import std.stdio, std.algorithm, std.random, std.math, std.array,

      std.range, std.ascii;

alias Cell = ubyte; alias Grid = Cell[][]; enum Cell notClustered = 1; // Filled cell, but not in a cluster.

Grid initialize(Grid grid, in double prob, ref Xorshift rng) nothrow {

   foreach (row; grid)
       foreach (ref cell; row)
           cell = Cell(rng.uniform01 < prob);
   return grid;

}

void show(in Grid grid) {

   immutable static cell2char = " #" ~ letters;
   writeln('+', "-".replicate(grid.length), '+');
   foreach (row; grid) {
       write('|');
       row.map!(c => c < cell2char.length ? cell2char[c] : '@').write;
       writeln('|');
   }
   writeln('+', "-".replicate(grid.length), '+');

}

size_t countClusters(bool justCount=false)(Grid grid) pure nothrow {

   immutable side = grid.length;
   static if (justCount)
       enum Cell clusterID = 2;
   else
       Cell clusterID = 1;

   void walk(in size_t r, in size_t c) nothrow {
       grid[r][c] = clusterID; // Fill grid.

       if (r < side - 1 && grid[r + 1][c] == notClustered) // Down.
           walk(r + 1, c);
       if (c < side - 1 && grid[r][c + 1] == notClustered) // Right.
           walk(r, c + 1);
       if (c > 0 && grid[r][c - 1] == notClustered) // Left.
           walk(r, c - 1);
       if (r > 0 && grid[r - 1][c] == notClustered) // Up.
           walk(r - 1, c);
   }

   size_t nClusters = 0;

   foreach (immutable r; 0 .. side)
       foreach (immutable c; 0 .. side)
           if (grid[r][c] == notClustered) {
               static if (!justCount)
                   clusterID++;
               nClusters++;
               walk(r, c);
           }
   return nClusters;

}

double clusterDensity(Grid grid, in double prob, ref Xorshift rng) {

   return grid.initialize(prob, rng).countClusters!true /
          double(grid.length ^^ 2);

}

void showDemo(in size_t side, in double prob, ref Xorshift rng) {

   auto grid = new Grid(side, side);
   grid.initialize(prob, rng);
   writefln("Found %d clusters in this %d by %d grid:\n",
            grid.countClusters, side, side);
   grid.show;

}

void main() {

   immutable prob = 0.5;
   immutable nIters = 5;
   auto rng = Xorshift(unpredictableSeed);

   showDemo(15, prob, rng);
   writeln;
   foreach (immutable i; iota(4, 14, 2)) {
       immutable side = 2 ^^ i;
       auto grid = new Grid(side, side);
       immutable density = nIters
                           .iota
                           .map!(_ => grid.clusterDensity(prob, rng))
                           .sum / nIters;
       writefln("n_iters=%3d, p=%4.2f, n=%5d, sim=%7.8f",
                nIters, prob, side, density);
   }

}</lang>

Output:

Found 26 clusters in this 15 by 15 grid:

+---------------+
| AA B    CCCC  |
|AA D E F CC  G |
|  DDD FF  CC  H|
| I D FF  J   K |
|  L  FF JJJJ   |
|L LLL      J  M|
|LLLLLL   JJJ MM|
|L LL L N  J   M|
|LL    O P J   M|
|LLL QQ R JJ  S |
|LL T  RR  J SSS|
| L   U  V JJ  S|
|  WW  XX JJ YY |
|    XXX   JJ YY|
|ZZ   XXX   JJ  |
+---------------+

n_iters=  5, p=0.50, n=   16, sim=0.09765625
n_iters=  5, p=0.50, n=   64, sim=0.07260742
n_iters=  5, p=0.50, n=  256, sim=0.06679993
n_iters=  5, p=0.50, n= 1024, sim=0.06609497
n_iters=  5, p=0.50, n= 4096, sim=0.06580237

Increasing the index i to 15:

n_iters=  5, p=0.50, n=32768, sim=0.06578374

J

The first thing this task seems to need is some mechanism of identifying "clusters", using "percolation". We can achieve this by assigning every "1" in a matrix a unique integer value and then defining an operation which combines two numbers - doing nothing unless both are non-zero. If they are both non-zero we pick the larger of the two values. (*@[ * >.)

Once we have this, we can identify clusters by propagating information in a single direction through the matrix using this operation, rotating the matrix 90 degrees, and then repeating this combination of operations four times. And, finally, by keeping at this until there's nothing more to be done.

<lang J>congeal=: |.@|:@((*@[*>.)/\.)^:4^:_</lang>

Example:

<lang J> M=:0.4>?6 6$0

1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 1 1 1 0 1 1 0

  M*p:i.$M
 2   0 0   0   0   0
 0   0 0  29   0   0
 0   0 0  53   0   0
67  71 0   0   0   0
 0   0 0 107   0 113

127 131 0 139 149 0

  congeal M*p:i.$M
 2   0 0   0   0   0
 0   0 0  53   0   0
 0   0 0  53   0   0
71  71 0   0   0   0
 0   0 0 149   0 113

131 131 0 149 149 0</lang>

We did not have to use primes there - any mechanism for assigning distinct positive integers to the 1s would work. And, in fact, it might be nice if - once we found our clusters - we assigned the smallest distinct positive integers to the clusters. This would allow us to use simple indexing to map the array to characters.

<lang J>idclust=: $ $ [: (~. i.])&.(0&,)@,@congeal ] * 1 + i.@$</lang>

Example use:

<lang J> idclust M 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 0 3 3 0 0 0 0 0 0 0 4 0 5 6 6 0 4 4 0

  (idclust M) {'.ABCDEFG'

A..... ...B.. ...B.. CC.... ...D.E FF.DD.</lang>

Now we just need a measure of cluster density. Formally cluster density seems to be defined as the number of clusters divided by the total number of elements of the matrix. Thus:

Example use:

<lang J> K idclust M 0.1666667</lang>

So we can create a word that performs a simulation experiment, given a probability getting a 1 and the number of rows (and columns) of our square matrix M.

<lang J>experiment=: K@ idclust@: > 0 ?@$~ ,~</lang>

Example use:

<lang J> 0.4 experiment 6 0.1666667

  0.4 experiment 6

0.1944444</lang>

The task wants us to perform at least five trials for sizes up to 1000 by 1000 with probability of 1 being 0.5:

<lang J>trials=: 0.5&experiment"0@#</lang>

Example use:

<lang J> 6 trials 3 0.1111111 0.1111111 0.2222222 0.1111111 0.1111111 0.3333333

  6 trials 10

0.16 0.12 0.09 0.1 0.1 0.03

  6 trials 30

0.05666667 0.1033333 0.08222222 0.07444444 0.08333333 0.07666667

  6 trials 100

0.069 0.0678 0.0666 0.0677 0.0653 0.0739

  6 trials 300

0.06563333 0.06663333 0.06713333 0.06727778 0.06658889 0.06664444

  6 trials 1000

0.066079 0.066492 0.065847 0.065943 0.066318 0.065998</lang>

Now for averages (these are different trials from the above):

<lang J>mean=: +/%#

  mean 8 trials 3

0.1805556

  mean 8 trials 10

0.0875

  mean 8 trials 30

0.07486111

  mean 8 trials 100

0.0690625

  mean 8 trials 300

0.06749861

  mean 8 trials 1000

0.06616738</lang>

Finally, for the extra credit (thru taken from the Loops/Downward for task):

<lang J> (idclust 0.5 > 0 ?@$~ 15 15) {'.', 'A' thru&.(a.&i.) 'Z' A.......B..C... AAAA...D..E.F.. A..A.G.D.D.FFF. AA..H..DDD.FF.I AAA...J...FFF.. ..AAAA.A.K...AA LL.A...A..A.AAA .L.A..AAA.AAAAA ..AA.AAA.AAA.A. AA.AAAAAA....A. A.AAAA.AAAA.AA. AAA...AAA.AAAAA ..AA..A.A...AAA .M.A.AA.AA..AA. .MM..A.N..O..A.</lang>

Collected definitions

<lang J>congeal=: |.@|:@((*@[*>.)/\.)^:4^:_ idclust=: $ $ [: (~. i.])&.(0&,)@,@congeal ] * 1 + i.@$

K=: (%&#~ }.@~.)&,

experiment=: K@ idclust@: > 0 ?@$~ ,~ trials=: 0.5&experiment"0@#

mean=:+/ % #

thru=: <./ + i.@(+*)@-~</lang>

Python

<lang python>from __future__ import division from random import random import string from math import fsum

n_range, p, t = (2**n2 for n2 in range(4, 14, 2)), 0.5, 5 N = M = 15

NOT_CLUSTERED = 1 # filled but not clustered cell cell2char = ' #' + string.ascii_letters

def newgrid(n, p):

   return [[int(random() < p) for x in range(n)] for y in range(n)]

def pgrid(cell):

   for n in range(N):
       print( '%i)  ' % (n % 10) 
              + ' '.join(cell2char[cell[n][m]] for m in range(M)))

def cluster_density(n, p):

   cc = clustercount(newgrid(n, p))
   return cc / n / n

def clustercount(cell):

   walk_index = 1
   for n in range(N):
       for m in range(M):
           if cell[n][m] == NOT_CLUSTERED:
               walk_index += 1
               walk_maze(m, n, cell, walk_index)
   return walk_index - 1

def walk_maze(m, n, cell, indx):

   # fill cell 
   cell[n][m] = indx
   # down
   if n < N - 1 and cell[n+1][m] == NOT_CLUSTERED:
       walk_maze(m, n+1, cell, indx)
   # right
   if m < M - 1 and cell[n][m + 1] == NOT_CLUSTERED:
       walk_maze(m+1, n, cell, indx)
   # left
   if m and cell[n][m - 1] == NOT_CLUSTERED:
       walk_maze(m-1, n, cell, indx)
   # up
   if n and cell[n-1][m] == NOT_CLUSTERED:
       walk_maze(m, n-1, cell, indx)

if __name__ == '__main__':

   cell = newgrid(n=N, p=0.5)
   print('Found %i clusters in this %i by %i grid\n' 
         % (clustercount(cell), N, N))
   pgrid(cell)
   print()
   
   for n in n_range:
       N = M = n
       sim = fsum(cluster_density(n, p) for i in range(t)) / t
       print('t=%3i p=%4.2f n=%5i sim=%7.5f'
             % (t, p, n, sim))</lang>

Output:

Found 20 clusters in this 15 by 15 grid

0)  a a     b     c       d d d d
1)  a a   e     f   g g   d      
2)        e   f f f       d      
3)  h h   e     f   i i   d d    
4)        e       j     d d d d  
5)    k k   k   k   l         d d
6)  k k k k k   k   l   m   n    
7)  k k k   k   k   l     o   p p
8)      k   k k k   l l l   q    
9)  k     k k k k     l     q q q
0)  k   k k k         l     q q q
1)  k k     k k k       r r      
2)  k     k k       r r r   s s  
3)  k k k k   r r r r r     s s  
4)  k   k   t   r   r r     s s  

t=  5 p=0.50 n=   16 sim=0.08984
t=  5 p=0.50 n=   64 sim=0.07310
t=  5 p=0.50 n=  256 sim=0.06706
t=  5 p=0.50 n= 1024 sim=0.06612
t=  5 p=0.50 n= 4096 sim=0.06587

As n increases, the sim result gets closer to 0.065770...

Racket

<lang racket>#lang racket (require srfi/14) ; character sets

much faster than safe fixnum functions

(require

 racket/require ; for fancy require clause below
 (filtered-in
         (lambda (name) (regexp-replace #rx"unsafe-" name ""))
         racket/unsafe/ops)
 ; these aren't in racket/unsafe/ops
 (only-in racket/fixnum for/fxvector in-fxvector fxvector-copy))

...(but less safe). if in doubt use this rather than the one above
(require racket/fixnum)

(define t (make-parameter 5))

(define (build-random-grid p M N)

 (define p-num (numerator p))
 (define p-den (denominator p))
 (for/fxvector #:length (fx* M N) ((_ (in-range (* M N))))
               (if (< (random p-den) p-num) 1 0)))

(define letters

 (sort (char-set->list (char-set-intersection
                        char-set:letter
                        ; char-set:ascii
                        )) char<?))

(define n-letters (length letters)) (define cell->char

 (match-lambda
   (0 #\space) (1 #\.)
   (c (list-ref letters (modulo (- c 2) n-letters)))))

(define (draw-percol-grid M N . gs)

 (for ((r N))
   (for ((g gs))
     (define row-str
       (list->string
        (for/list ((idx (in-range (* r M) (* (+ r 1) M))))
          (cell->char (fxvector-ref g idx)))))
     (printf "|~a| " row-str))
   (newline)))

(define (count-clusters! M N g)

 (define (gather-cluster! k c)
   (when (fx= 1 (fxvector-ref g k))
     (define k-r (fxquotient k M))
     (define k-c (fxremainder k M))
     (fxvector-set! g k c)       
     (define-syntax-rule (gather-surrounds range? k+)
       (let ((idx k+))
         (when (and range? (fx= 1 (fxvector-ref g idx)))
           (gather-cluster! idx c))))
     (gather-surrounds (fx> k-r 0) (fx- k M))
     (gather-surrounds (fx> k-c 0) (fx- k 1))
     (gather-surrounds (fx< k-c (fx- M 1)) (fx+ k 1))
     (gather-surrounds (fx< k-r (fx- N 1)) (fx+ k M))))
 
 (define-values (rv _c)
   (for/fold ((rv 0) (c 2))
     ((pos (in-range (fx* M N)))
      #:when (fx= 1 (fxvector-ref g pos)))
     (gather-cluster! pos c)
     (values (fx+ rv 1) (fx+ c 1))))
 rv)

(define (display-sample-clustering p)

 (printf "Percolation cluster sample: p=~a~%" p)
 (define g (build-random-grid p 15 15))
 (define g+ (fxvector-copy g))
 (define g-count (count-clusters! 15 15 g+))
 (draw-percol-grid 15 15 g g+)
 (printf "~a clusters~%" g-count))

(define (experiment p n t)

 (printf "Experiment: ~a ~a ~a\t" p n t) (flush-output)
 (define sum-Cn
   (for/sum ((run (in-range t)))
     (printf "[~a" run) (flush-output)
     (define g (build-random-grid p n n))
     (printf "*") (flush-output)
     (define Cn (count-clusters! n n g))
     (printf "]") (flush-output)
     Cn))
 (printf "\tmean K(p) = ~a~%" (real->decimal-string (/ sum-Cn t (sqr n)) 6)))

(module+ main

 (t 10)
 (for ((n (in-list '(4000 1000 750 500 400 300 200 100 15))))
   (experiment 1/2 n (t)))
 (display-sample-clustering 1/2))

(module+ test

 (define grd (build-random-grid 1/2 1000 1000))
 (/ (for/sum ((g (in-fxvector grd)) #:when (zero? g)) 1) (fxvector-length grd))
 (display-sample-clustering 1/2))</lang>

Output:

Run from DrRacket, which runs the test and main modules. From the command line, you'll want two commands: ``racket percolation_m_c_d.rkt`` and ``raco test percolation_m_c_d.rkt`` for the same result.

Experiment: 1/2 4000 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.065860
Experiment: 1/2 1000 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.066130
Experiment: 1/2 750 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.066195
Experiment: 1/2 500 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.066522
Experiment: 1/2 400 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.066778
Experiment: 1/2 300 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.066813
Experiment: 1/2 200 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.067908
Experiment: 1/2 100 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.069980
Experiment: 1/2 15 10	[0*][1*][2*][3*][4*][5*][6*][7*][8*][9*]	mean K(p) = 0.089778
Percolation cluster sample: p=1/2
|.  ...     . . | |A  BBB     A A | 
|...    .. .... | |AAA    AA AAAA | 
|. .   .... ... | |A A   AAAA AAA | 
|. . . .........| |A A C AAAAAAAAA| 
| ...   ..  ....| | AAA   AA  AAAA| 
|.. ......... ..| |AA AAAAAAAAA AA| 
|     . ...     | |     A AAA     | 
|. ..  ..       | |D AA  AA       | 
|   .. ... . .. | |   AA AAA E AA | 
|.  ..  ..  . . | |F  AA  AA  A A | 
|. ........ . ..| |F AAAAAAAA A AA| 
|.. .  .... ... | |FF A  AAAA AAA | 
| .  .  . ....  | | F  G  A AAAA  | 
|.... .. ..  . .| |FFFF HH AA  A A| 
|  .  ..   .....| |  F  HH   AAAAA| 
8 clusters

Tcl

Note that the queue (variables q and k) used to remember where to find cells when flood-filling the cluster is maintained as a list segment; the front of the list is not trimmed for performance reasons. (This would matter with very long queues, in which case the queue could be shortened occasionally; frequent trimming is still slower though, because Tcl backs its “list” datatype with arrays and not linked lists.)

Works with: Tcl version 8.6

<lang tcl>package require Tcl 8.6

proc determineClusters {w h p} {

   # Construct the grid
   set grid [lrepeat $h [lrepeat $w 0]]
   for {set i 0} {$i < $h} {incr i} {

for {set j 0} {$j < $w} {incr j} { lset grid $i $j [expr {rand() < $p ? -1 : 0}] }

   }
   # Find (and count) the clusters
   set cl 0
   for {set i 0} {$i < $h} {incr i} {

for {set j 0} {$j < $w} {incr j} { if {[lindex $grid $i $j] == -1} { incr cl for {set q [list $i $j];set k 0} {$k<[llength $q]} {incr k} { set y [lindex $q $k] set x [lindex $q [incr k]] if {[lindex $grid $y $x] != -1} continue lset grid $y $x $cl foreach dx {1 0 -1 0} dy {0 1 0 -1} { set nx [expr {$x+$dx}] set ny [expr {$y+$dy}] if { $nx >= 0 && $ny >= 0 && $nx < $w && $ny < $h && [lindex $grid $ny $nx] == -1 } then { lappend q $ny $nx } } } } }

   }
   return [list $cl $grid]

}

Print a sample 15x15 grid

lassign [determineClusters 15 15 0.5] n g puts "15x15 grid, p=0.5, with $n clusters" puts "+[string repeat - 15]+" foreach r $g {puts |[join [lmap x $r {format %c [expr {$x==0?32:64+$x}]}] ""]|} puts "+[string repeat - 15]+"

Determine the densities as the grid size increases

puts "p=0.5, iter=5" foreach n {5 30 180 1080 6480} {

   set tot 0
   for {set i 0} {$i < 5} {incr i} {

lassign [determineClusters $n $n 0.5] nC incr tot $nC

   }
   puts "n=$n, K(p)=[expr {$tot/5.0/$n**2}]"

}</lang>

Output:

15x15 grid, p=0.5, with 21 clusters
+---------------+
|    A   B CCCCC|
|  D A BBB  C   |
|E     B  F CCCC|
| B    B  F CC C|
|BBB B BB    CCC|
|B BBBBBB  CCCCC|
|  B   B G  C  C|
|H  II   G G  J |
|HH II   G GG  K|
|HH II GGG  GG K|
|   I  G GGGG   |
|LL  GGG  GG M N|
| L  G G O    P |
|LLLL Q     R   |
|L  L   S T  UUU|
+---------------+
p=0.5, iter=5
n=5, K(p)=0.184
n=30, K(p)=0.07155555555555557
n=180, K(p)=0.06880246913580246
n=1080, K(p)=0.0661267146776406
n=6480, K(p)=0.06582889898643499