Elementary cellular automaton/Random number generator: Difference between revisions

Content added Content deleted
(Realize in F#)
m (→‎{{header|Pascal}}: inline all together to get next byte doubles speed. ~4/5 cycles per Bit.Needs long wake up to get stable results)
Line 318: Line 318:
=={{header|Pascal}}==
=={{header|Pascal}}==
{{Works with|Free Pascal}}
{{Works with|Free Pascal}}
Using ROR and ROL is as fast as assembler and more portable.<BR>[https://tio.run/##7VZdb@pGEH33r5iHSEAvYJsQ0kBTifBxawmwC6a9bVVFjr3AKmZtrZdwaZS/Xjq7iwPckOThvvQhSHx45szMmbPD7qZBFgZxZZaG263HkzkPljBexeTcahmmuRAibZomYdU1vacpiWhQTfjclE/miHwVtxMRCHIrI26PQpaBWKwTHkfVdRLPMGs1TJamzlxdiGVsPJ45/W6vD32v82QAPJ4Nk4hAl8Tpgj49nrUnw6Hb7YEz8nsDDXA93xk6f7Z9xx2BOyq3B@gwTenqILQ9cD6PIOVJeG0/YfreYNLTgW3P8//wetBxRxN30FPOUdfpPxmrjGQImWyyqaBx1jLChGUCLcvg6zhZsSiDa6j9YFuWpT5a6OLajq9rsNFiPAQczbo3LQg0YUqZaNTRO1uxUNCEwWciOt701qdL0oSdV2xSgrF@J11hNk7ChEcGHLx@oegqH5kGiUQ3oYv6Rq29izB80lwQIBAh07aMOzKnDI1BtpQ0u/6kI6OG7m86BXiCw18I9asq9d/lXvvLKwBFBwFdCVAFAdZULHTFKFFsOMlWMda/1l0WMcibliBbxHBeg0@6gZahwg25XiRacQIOo@JQxBZlMWVk38ChE5PbL1OcGMk8iRaGr1gZR4Q8lBlC96uUl0A/SOJHPNDuYYiyu@NfpSRFmcguSZdMpF2Db11HIyExRQV2x7JOCb7gD8kl7@N5UmQbOvZmg62OAkEfiMOE/H816pUbx4cwYIDDi3PKNij4nGaC8OydLnGsNy5T0@loy807fe@X0tqHgxYfwDSnjCcxCjyHgEWgtJYPL9cB7jawQMxRzqL@@Ul/laQwUj1oj7q61HsCv7EseUevSf5B5IPIB5H/LZHj0/S9nXN/AkxSQiJBMpHv@L5d9i3c8ZzRbgukTTjYUPMNcM2pIDErFp4TwCzhkMn6Gf2HQDKDQrlRLxfgjoqsUNL73@8BX8IqlTguQCSwDu6JNOCJh4A@pqBqu9zdH9RHxcaTcs0QbeVH5qm7lCRvyeCDi4Os@uKc3BXSlZ4vLq9U2Z8rLaXOiQK5Fsfw0qGrEG7CmGSQEg7SiQIXykXfrgirZD5TaFrN2mHYy@Xyg@w@XymqbkVkTviJZfEXBASCQSv/tga2XIndNW3Xukr0TUfN@ilyeWk1CL6aJNjxzNMU4KceXkvHP0s2nATRLrqK5zNec1MakwjkQU2F8cY8Nepqlox63XgpJ16Try4MI@/bgFrNAvvqEuw6vi/rYNuXAOr5Cp9tOWJgX9hGzs04JHNe@y4ydu3H6kXju9hst/@GsziYZ9uKe76tTB7@Aw Try it online!] counting CPU-Cycles 32 vs 31 on Ryzen Zen1 per Byte -> 100Mb/s
Using ROR and ROL is nearly as fast as assembler and more portable.<BR>[https://tio.run/##7VVdb@JGFH33r7gPkYCuwXYSrVa4VGLB7FoC2wtOu21VRY49wGiNx5oZh02j/PXSOx7MR8L@gtYPYM8599yPuXOnTESa5N1lme52EWcrnmxgXuXkxnYNy1pLWfYtixS9Lf1GS5LRpMf4ylJfVkC@y/uFTCS5Vxb3ZyabRK63jOdZb8vyJar2UraxtHJvLTe58XzlT8beBCbR6MUAeL6asYzAmOTlmr48Xw0Xs1k49sAPYm@qCWEU@zP/j2HshwGEgTmcImBZChohdTj1PwVQcpYOPpjTMIwGzgt68aYLT9sPoyj@PfJgFAaLcOrVYDD2Jy9GJYhAyuJJ3EmaC9dIWSEkrmyS73NWFZmAAVz/5Ni2Xf@4CHG9js8AHFwxHhOOijqKm2vlUqerawR9uKOFfH@LzGVVpJKyAj4ROYru7mO6IX3Yo/KpJGgbj8oKlTlJGc8MOHk@U4TMs6UpU@w@jLHkmXuESIFfKi4AJEhkKFnXeCArWuBiIjYqzHG8GCmrWfirloBIcvgTqXGvlv7L9IZff0Cow0HCWBFqhwBbKtfaY8bqaDgRVY7@BzrLNhpFdx0Q6xxLBe90Aq5RmxtqC0lWcQJ@QeVpEV1a5LQgxwROQRR33kpc6NJGRBeGV4WJXUMezQKpx11qXCAOKvCzOHA9QpN6PZx/USVpKyGnoyAlpKHpa@isJRSnXZPDufLTga/4omJp8jh0ikpD2358wlSDRNJH4hfSNT42ceoSK03dn4eSt/XrO/3XUc7aZ3EMgzFgeJcO9P86/02d18doURKSSSJkc2xix4xtHCh@UJ8WANqHC1255VSSvGi3DgKwZByE8ibo3wTYElrm@1uzBQ9UilZ9SCzrt4RvoCoVj0uIK/7AFBOHBsITFKAqQcc@juSug7NmW0gGdjN0LuWFwJuhorKxld7JOHb3frSjwzXwAy/Hk1mLORfEmkKc0zunUCt9SnMioCQcFIjVbZnt2OlKu2MdQujb/etTs7d7FSfiW7NNtL5XyIrwC3sSrwlIJIMu@6XCHGvgAGa9v/T2qddCrzLq314KrnFdd0FctxHs42xkWvCzh3f9/BcVDSdJtrfu7Xb/pMs8WYldN7zZdReP/wI Try it online!] counting CPU-Cycles 64 vs 76 one Ryzen Zen1
<lang pascal>Program Rule30;
<lang pascal>Program Rule30;
//http://en.wikipedia.org/wiki/Next_State_Rule_30;
//http://en.wikipedia.org/wiki/Next_State_Rule_30;
//http://mathworld.wolfram.com/Rule30.html
//http://mathworld.wolfram.com/Rule30.html
{$IFDEF FPC}
{$IFDEF FPC}
{$Mode Delphi}
{$Mode Delphi}{$ASMMODE INTEL}
{$OPTIMIZATION ON,ALL}
{$OPTIMIZATION ON,ALL}
{$CODEALIGN proc=8,LOOP=1}
// {$CODEALIGN proc=1}
{$ELSE}
{$ELSE}
{$APPTYPE CONSOLE}
{$APPTYPE CONSOLE}
Line 334: Line 334:
maxRounds = 2*1000*1000;
maxRounds = 2*1000*1000;
rounds = 10;
rounds = 10;
CpuF = 3.2e9; // Ryzen 5 1600 no Turbo 3.7 Ghz on my Linux64


var
var
{$ALIGN 32}
Rule30_State : Uint64;
Rule30_State : Uint64;

function GetCPU_Time: int64;
type
TCpu = record
HiCpu,
LoCpu : Dword;
end;
var
Cput : TCpu;
begin
asm
RDTSC;
MOV Dword Ptr [CpuT.LoCpu],EAX
MOV Dword Ptr [CpuT.HiCpu],EDX
end;
with Cput do
result := int64(HiCPU) shl 32 + LoCpu;
end;


procedure InitRule30_State;inline;
procedure InitRule30_State;inline;
Line 356: Line 372:


function NextRule30Byte:NativeInt;
function NextRule30Byte:NativeInt;
//64-BIT can use many registers
//32-Bit still fast
var
run, prev,next: Uint64;
myOne : UInt64;
Begin
Begin
result := 0;
run := Rule30_State;
result := 0;
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
myOne := 1;
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
//Unrolling and inlining Next_State_Rule_30 by hand
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
Prev := RORQword(run,1);
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
run := (next OR run) XOR prev;
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;

result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;

result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;

result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;

result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;

result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;

result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;

result := (result+result) OR (run AND myOne);
next := ROLQword(run,1);
Prev := RORQword(run,1);
Rule30_State := (next OR run) XOR prev;
end;
end;


procedure Speedtest;
procedure Speedtest;
var
var
T1,T0 : TDateTime;
T1,T0 : INt64;
i: NativeInt;
i: NativeInt;
Begin
Begin
writeln('Speedtest for statesize of ',64,' bits');
writeln('Speedtest for statesize of ',64,' bits');
//Warm up start Turbo of CPU
//Warm up start to wake up CPU takes some time
For i := 10*1000*1000-1 downto 0 do
For i := 100*1000*1000-1 downto 0 do
Next_State_Rule_30;
Next_State_Rule_30;


T0 := GetCPU_Time;
InitRule30_State;
InitRule30_State;
T0 := time;
For i := maxRounds-1 downto 0 do
For i := maxRounds-1 downto 0 do
NextRule30Byte;
NextRule30Byte;
T1 := time;
T1 := GetCPU_Time;
writeln(NextRule30Byte);
writeln(NextRule30Byte);
writeln(maxRounds,' calls take ',FormatDateTime('HH:NN:SS.zzz',T1-T0));
writeln('cycles per Byte : ',(T1-t0)/maxRounds:0:2);
writeln('cycles per Byte : ',((T1-t0)*86400*CpuF)/maxRounds:0:2);
writeln;
writeln;
end;
end;
Line 395: Line 449:
writeln('The task ');
writeln('The task ');
InitRule30_State;
InitRule30_State;
For i := 1 to rounds
For i := 1 to rounds do
write(NextRule30Byte);
write(NextRule30Byte:4);
writeln;
writeln;
end;
end;
Line 406: Line 460:
end.</lang>
end.</lang>
{{out}}
{{out}}
<pre>
<pre>//compiled 64-Bit
//running compiled for 64-BIT
Speedtest for statesize of 64 bits
Speedtest for statesize of 64 bits
44
44
cycles per Byte : 30.95
2000000 calls take 00:00:00.049
cycles per Byte : 78.40


The task
The task
Line 417: Line 469:
<ENTER>
<ENTER>


//running compiled for 32-BIT
//compiled 32-Bit
Speedtest for statesize of 64 bits
Speedtest for statesize of 64 bits
44
44
cycles per Byte : 128.56
2000000 calls take 00:00:00.108
cycles per Byte : 172.80


The task
The task
220 197 147 174 117 97 149 171 100 151
220 197 147 174 117 97 149 171 100 151
<ENTER>
<ENTER></pre>
</pre>


=={{header|Perl}}==
=={{header|Perl}}==