Elementary cellular automaton/Random number generator: Difference between revisions
Content added Content deleted
(Realize in F#) |
m (→{{header|Pascal}}: inline all together to get next byte doubles speed. ~4/5 cycles per Bit.Needs long wake up to get stable results) |
||
Line 318: | Line 318: | ||
=={{header|Pascal}}== |
=={{header|Pascal}}== |
||
{{Works with|Free Pascal}} |
{{Works with|Free Pascal}} |
||
Using ROR and ROL is as fast as assembler and more portable.<BR>[https://tio.run/##7VZdb@pGEH33r5iHSEAvYJsQ0kBTifBxawmwC6a9bVVFjr3AKmZtrZdwaZS/Xjq7iwPckOThvvQhSHx45szMmbPD7qZBFgZxZZaG263HkzkPljBexeTcahmmuRAibZomYdU1vacpiWhQTfjclE/miHwVtxMRCHIrI26PQpaBWKwTHkfVdRLPMGs1TJamzlxdiGVsPJ45/W6vD32v82QAPJ4Nk4hAl8Tpgj49nrUnw6Hb7YEz8nsDDXA93xk6f7Z9xx2BOyq3B@gwTenqILQ9cD6PIOVJeG0/YfreYNLTgW3P8//wetBxRxN30FPOUdfpPxmrjGQImWyyqaBx1jLChGUCLcvg6zhZsSiDa6j9YFuWpT5a6OLajq9rsNFiPAQczbo3LQg0YUqZaNTRO1uxUNCEwWciOt701qdL0oSdV2xSgrF@J11hNk7ChEcGHLx@oegqH5kGiUQ3oYv6Rq29izB80lwQIBAh07aMOzKnDI1BtpQ0u/6kI6OG7m86BXiCw18I9asq9d/lXvvLKwBFBwFdCVAFAdZULHTFKFFsOMlWMda/1l0WMcibliBbxHBeg0@6gZahwg25XiRacQIOo@JQxBZlMWVk38ChE5PbL1OcGMk8iRaGr1gZR4Q8lBlC96uUl0A/SOJHPNDuYYiyu@NfpSRFmcguSZdMpF2Db11HIyExRQV2x7JOCb7gD8kl7@N5UmQbOvZmg62OAkEfiMOE/H816pUbx4cwYIDDi3PKNij4nGaC8OydLnGsNy5T0@loy807fe@X0tqHgxYfwDSnjCcxCjyHgEWgtJYPL9cB7jawQMxRzqL@@Ul/laQwUj1oj7q61HsCv7EseUevSf5B5IPIB5H/LZHj0/S9nXN/AkxSQiJBMpHv@L5d9i3c8ZzRbgukTTjYUPMNcM2pIDErFp4TwCzhkMn6Gf2HQDKDQrlRLxfgjoqsUNL73@8BX8IqlTguQCSwDu6JNOCJh4A@pqBqu9zdH9RHxcaTcs0QbeVH5qm7lCRvyeCDi4Os@uKc3BXSlZ4vLq9U2Z8rLaXOiQK5Fsfw0qGrEG7CmGSQEg7SiQIXykXfrgirZD5TaFrN2mHYy@Xyg@w@XymqbkVkTviJZfEXBASCQSv/tga2XIndNW3Xukr0TUfN@ilyeWk1CL6aJNjxzNMU4KceXkvHP0s2nATRLrqK5zNec1MakwjkQU2F8cY8Nepqlox63XgpJ16Try4MI@/bgFrNAvvqEuw6vi/rYNuXAOr5Cp9tOWJgX9hGzs04JHNe@y4ydu3H6kXju9hst/@GsziYZ9uKe76tTB7@Aw Try it online!] counting CPU-Cycles 32 vs 31 on Ryzen Zen1 per Byte -> 100Mb/s |
|||
Using ROR and ROL is nearly as fast as assembler and more portable.<BR>[https://tio.run/##7VVdb@JGFH33r7gPkYCuwXYSrVa4VGLB7FoC2wtOu21VRY49wGiNx5oZh02j/PXSOx7MR8L@gtYPYM8599yPuXOnTESa5N1lme52EWcrnmxgXuXkxnYNy1pLWfYtixS9Lf1GS5LRpMf4ylJfVkC@y/uFTCS5Vxb3ZyabRK63jOdZb8vyJar2UraxtHJvLTe58XzlT8beBCbR6MUAeL6asYzAmOTlmr48Xw0Xs1k49sAPYm@qCWEU@zP/j2HshwGEgTmcImBZChohdTj1PwVQcpYOPpjTMIwGzgt68aYLT9sPoyj@PfJgFAaLcOrVYDD2Jy9GJYhAyuJJ3EmaC9dIWSEkrmyS73NWFZmAAVz/5Ni2Xf@4CHG9js8AHFwxHhOOijqKm2vlUqerawR9uKOFfH@LzGVVpJKyAj4ROYru7mO6IX3Yo/KpJGgbj8oKlTlJGc8MOHk@U4TMs6UpU@w@jLHkmXuESIFfKi4AJEhkKFnXeCArWuBiIjYqzHG8GCmrWfirloBIcvgTqXGvlv7L9IZff0Cow0HCWBFqhwBbKtfaY8bqaDgRVY7@BzrLNhpFdx0Q6xxLBe90Aq5RmxtqC0lWcQJ@QeVpEV1a5LQgxwROQRR33kpc6NJGRBeGV4WJXUMezQKpx11qXCAOKvCzOHA9QpN6PZx/USVpKyGnoyAlpKHpa@isJRSnXZPDufLTga/4omJp8jh0ikpD2358wlSDRNJH4hfSNT42ceoSK03dn4eSt/XrO/3XUc7aZ3EMgzFgeJcO9P86/02d18doURKSSSJkc2xix4xtHCh@UJ8WANqHC1255VSSvGi3DgKwZByE8ibo3wTYElrm@1uzBQ9UilZ9SCzrt4RvoCoVj0uIK/7AFBOHBsITFKAqQcc@juSug7NmW0gGdjN0LuWFwJuhorKxld7JOHb3frSjwzXwAy/Hk1mLORfEmkKc0zunUCt9SnMioCQcFIjVbZnt2OlKu2MdQujb/etTs7d7FSfiW7NNtL5XyIrwC3sSrwlIJIMu@6XCHGvgAGa9v/T2qddCrzLq314KrnFdd0FctxHs42xkWvCzh3f9/BcVDSdJtrfu7Xb/pMs8WYldN7zZdReP/wI Try it online!] counting CPU-Cycles 64 vs 76 one Ryzen Zen1 |
|||
<lang pascal>Program Rule30; |
<lang pascal>Program Rule30; |
||
//http://en.wikipedia.org/wiki/Next_State_Rule_30; |
//http://en.wikipedia.org/wiki/Next_State_Rule_30; |
||
//http://mathworld.wolfram.com/Rule30.html |
//http://mathworld.wolfram.com/Rule30.html |
||
{$IFDEF FPC} |
{$IFDEF FPC} |
||
{$Mode Delphi} |
{$Mode Delphi}{$ASMMODE INTEL} |
||
{$OPTIMIZATION ON,ALL} |
{$OPTIMIZATION ON,ALL} |
||
{$CODEALIGN proc |
// {$CODEALIGN proc=1} |
||
{$ELSE} |
{$ELSE} |
||
{$APPTYPE CONSOLE} |
{$APPTYPE CONSOLE} |
||
Line 334: | Line 334: | ||
maxRounds = 2*1000*1000; |
maxRounds = 2*1000*1000; |
||
rounds = 10; |
rounds = 10; |
||
CpuF = 3.2e9; // Ryzen 5 1600 no Turbo 3.7 Ghz on my Linux64 |
|||
var |
var |
||
{$ALIGN 32} |
|||
Rule30_State : Uint64; |
Rule30_State : Uint64; |
||
function GetCPU_Time: int64; |
|||
type |
|||
TCpu = record |
|||
HiCpu, |
|||
LoCpu : Dword; |
|||
end; |
|||
var |
|||
Cput : TCpu; |
|||
begin |
|||
asm |
|||
RDTSC; |
|||
MOV Dword Ptr [CpuT.LoCpu],EAX |
|||
MOV Dword Ptr [CpuT.HiCpu],EDX |
|||
end; |
|||
with Cput do |
|||
result := int64(HiCPU) shl 32 + LoCpu; |
|||
end; |
|||
procedure InitRule30_State;inline; |
procedure InitRule30_State;inline; |
||
Line 356: | Line 372: | ||
function NextRule30Byte:NativeInt; |
function NextRule30Byte:NativeInt; |
||
//64-BIT can use many registers |
|||
//32-Bit still fast |
|||
var |
|||
run, prev,next: Uint64; |
|||
myOne : UInt64; |
|||
Begin |
Begin |
||
run := Rule30_State; |
|||
⚫ | |||
⚫ | |||
myOne := 1; |
|||
⚫ | |||
//Unrolling and inlining Next_State_Rule_30 by hand |
|||
⚫ | |||
result := (result+result) OR ( |
result := (result+result) OR (run AND myOne); |
||
next := ROLQword(run,1); |
|||
⚫ | |||
Prev := RORQword(run,1); |
|||
⚫ | |||
run := (next OR run) XOR prev; |
|||
⚫ | |||
⚫ | |||
⚫ | |||
next := ROLQword(run,1); |
|||
Prev := RORQword(run,1); |
|||
run := (next OR run) XOR prev; |
|||
⚫ | |||
next := ROLQword(run,1); |
|||
Prev := RORQword(run,1); |
|||
run := (next OR run) XOR prev; |
|||
⚫ | |||
next := ROLQword(run,1); |
|||
Prev := RORQword(run,1); |
|||
run := (next OR run) XOR prev; |
|||
⚫ | |||
next := ROLQword(run,1); |
|||
Prev := RORQword(run,1); |
|||
run := (next OR run) XOR prev; |
|||
⚫ | |||
next := ROLQword(run,1); |
|||
Prev := RORQword(run,1); |
|||
run := (next OR run) XOR prev; |
|||
⚫ | |||
next := ROLQword(run,1); |
|||
Prev := RORQword(run,1); |
|||
run := (next OR run) XOR prev; |
|||
⚫ | |||
next := ROLQword(run,1); |
|||
Prev := RORQword(run,1); |
|||
Rule30_State := (next OR run) XOR prev; |
|||
end; |
end; |
||
procedure Speedtest; |
procedure Speedtest; |
||
var |
var |
||
T1,T0 : |
T1,T0 : INt64; |
||
i: NativeInt; |
i: NativeInt; |
||
Begin |
Begin |
||
writeln('Speedtest for statesize of ',64,' bits'); |
writeln('Speedtest for statesize of ',64,' bits'); |
||
//Warm up start |
//Warm up start to wake up CPU takes some time |
||
For i := |
For i := 100*1000*1000-1 downto 0 do |
||
Next_State_Rule_30; |
Next_State_Rule_30; |
||
T0 := GetCPU_Time; |
|||
InitRule30_State; |
InitRule30_State; |
||
⚫ | |||
For i := maxRounds-1 downto 0 do |
For i := maxRounds-1 downto 0 do |
||
NextRule30Byte; |
NextRule30Byte; |
||
T1 := |
T1 := GetCPU_Time; |
||
writeln(NextRule30Byte); |
writeln(NextRule30Byte); |
||
writeln( |
writeln('cycles per Byte : ',(T1-t0)/maxRounds:0:2); |
||
writeln('cycles per Byte : ',((T1-t0)*86400*CpuF)/maxRounds:0:2); |
|||
writeln; |
writeln; |
||
end; |
end; |
||
Line 395: | Line 449: | ||
writeln('The task '); |
writeln('The task '); |
||
InitRule30_State; |
InitRule30_State; |
||
For i := 1 to rounds |
For i := 1 to rounds do |
||
write(NextRule30Byte); |
write(NextRule30Byte:4); |
||
writeln; |
writeln; |
||
end; |
end; |
||
Line 406: | Line 460: | ||
end.</lang> |
end.</lang> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre>//compiled 64-Bit |
||
//running compiled for 64-BIT |
|||
Speedtest for statesize of 64 bits |
Speedtest for statesize of 64 bits |
||
44 |
44 |
||
⚫ | |||
2000000 calls take 00:00:00.049 |
|||
⚫ | |||
The task |
The task |
||
Line 417: | Line 469: | ||
<ENTER> |
<ENTER> |
||
// |
//compiled 32-Bit |
||
Speedtest for statesize of 64 bits |
Speedtest for statesize of 64 bits |
||
44 |
44 |
||
⚫ | |||
2000000 calls take 00:00:00.108 |
|||
⚫ | |||
The task |
The task |
||
220 197 147 174 117 97 149 171 100 151 |
220 197 147 174 117 97 149 171 100 151 |
||
<ENTER> |
<ENTER></pre> |
||
</pre> |
|||
=={{header|Perl}}== |
=={{header|Perl}}== |