Legendre prime counting function: Difference between revisions

→‎{{header|Nim}}: Added Mojo Partial Sieving version...
(→‎Non-Memoized Version: Nim, adjustments to comments and clarifications...)
(→‎{{header|Nim}}: Added Mojo Partial Sieving version...)
 
(43 intermediate revisions by 9 users not shown)
Line 27:
 
As to "the Legendre method is generally much faster than sieving up to n.", while the number of operations for the Legendre algorithm is about a factor of `log n` squared less than the number of operations for odds-only Sieve of Eratosthenes (SoE) sieving, those operations are "divide" operations which are generally much slower than the simple array access and addition operations used in sieving and the SoE can be optimized further using wheel factorization so that the required time for a given range can be less for a fully optimized SoE than for the common implementation of the Legendre algorithm; the trade off is that a fully optimized SoE is at least 500 lines of code, whereas the basic version of the Legendre prime counting algorithm is only about 40 to 50 lines of code (depending somewhat on the language used).
 
Also note that the Legendre prime counting function was never used practically at the time it was invented other than to demonstrate that it would find the count of primes to a trivial range only knowing the primes up to the square root of that range and there were too many operations (especially long integer division operations) to actually use it for any reasonably range even with this optimization (about 250 thousand divisions to count primes to ten million), but the follow-on work by Meissel in the 1800's definitely would have used this optimization and others in order to hand calculate the number of primes to a billion (1e9) in about ten years. Even with this optimization, Meissel would have had to hand calculate over five million divisions, so certainly used other Look Up Tables (LUT's) although certainly not caching of Phi/φ values in order to reduce the work to something possible in this amount of time. A "TinyPhi" LUT table for the first six primes of thirteen and less would have reduced the amount of work Meissel did to about 600 thousand divisions, but even that would have been perhaps too much and it is very likely that he also used "partial sieving" techniques, although that would have meant that as well as a table of the primes up to a million, he would have also needed 161 other tables of that range to a million sieved by the primes up to 13, 17, 19, to 997; however, that extra work in building these tables (which might have been done mechanically) would pay off in reducing the number of divisions to about seven thousand so the divisions become a minor problem possible to do over months and the majority of the time would be spent producing the partial sieving tables up to a million.
 
The reason that Meissel refined the Legendre method would have been that, even applying all of the optimizations including "partial sieving", he would still have had to do about three and a half million divisions to count the primes to a billion even if the number of primes and "partial sieve tables" only needed to be known to about 32 thousand, where his "Meissel" algorithm reduced the number of divisions to only a few thousand as per the above. Without a computer, he could never have completed the calculation of the number of primes to a billion using an optimized Legendre algorithm where he could using his modification. However, modern computers make (reasonably) quick work of integer divisions so that optimized algorithms of the Legendre type become moderately useful although at the cost of memory use as compared to Meissel type algorithms.
<br>
<br>
 
=={{header|11l}}==
{{trans|Python}}
 
<syntaxhighlight lang="11l">
F primes_up_to_limit(Int limit)
[Int] r
I limit >= 2
r.append(2)
 
V isprime = [1B] * ((limit - 1) I/ 2)
V sieveend = Int(sqrt(limit))
L(i) 0 .< isprime.len
I isprime[i]
Int p = i * 2 + 3
r.append(p)
I i <= sieveend
L(j) ((p * p - 3) >> 1 .< isprime.len).step(p)
isprime[j] = 0B
R r
 
V p = primes_up_to_limit(Int(sqrt(1'000'000'000)))
 
F phi(x, =a)
F phi_cached(x, a)
[(Int, Int) = Int] :cache
I (x, a) C :cache
R :cache[(x, a)]
V r = phi(x, a)
:cache[(x, a)] = r
R r
 
V res = 0
L
I a == 0 | x == 0
R x + res
 
a--
res -= phi_cached(x I/ :p[a], a)
 
F legpi(n)
I n < 2
R 0
 
V a = legpi(Int(sqrt(n)))
R phi(n, a) + a - 1
 
L(e) 10
print(‘10^’e‘ ’legpi(10 ^ e))
</syntaxhighlight>
 
{{out}}
<pre>
10^0 0
10^1 4
10^2 25
10^3 168
10^4 1229
10^5 9592
10^6 78498
10^7 664579
10^8 5761455
10^9 50847534
</pre>
 
=={{header|C}}==
{{trans|Vlang}}
Using fixed width types so requires C99 or later.
 
Surprisingly only half as fast as Go on the same machine (GCC -O3 Ubuntu 22.04) for a billion numbers.
 
However, the position is reversed if we let it run through to a trillion numbers - about 100 ms for C compared to 160 ms for Go.
<syntaxhighlight lang="c">#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
 
const uint8_t masks[8] = {1, 2, 4, 8, 16, 32, 64, 128};
 
#define half(n) ((int64_t)((n) - 1) >> 1)
 
#define divide(nm, d) ((uint64_t)((double)nm / (double)d))
 
int64_t countPrimes(uint64_t n) {
if (n < 9) return (n < 2) ? 0 : ((int64_t)n + 1) / 2;
uint64_t rtlmt = (uint64_t)sqrt((double)n);
int64_t mxndx = (int64_t)((rtlmt - 1) / 2);
int arrlen = (int)(mxndx + 1);
uint32_t *smalls = malloc(arrlen * 4);
uint32_t *roughs = malloc(arrlen * 4);
int64_t *larges = malloc(arrlen * 8);
for (int i = 0; i < arrlen; ++i) {
smalls[i] = (uint32_t)i;
roughs[i] = (uint32_t)(i + i + 1);
larges[i] = (int64_t)((n/(uint64_t)(i + i + 1) - 1) / 2);
}
int cullbuflen = (int)((mxndx + 8) / 8);
uint8_t *cullbuf = calloc(cullbuflen, 1);
int64_t nbps = 0;
int rilmt = arrlen;
for (int64_t i = 1; ; ++i) {
int64_t sqri = (i + i) * (i + 1);
if (sqri > mxndx) break;
if (cullbuf[i >> 3] & masks[i & 7]) continue;
cullbuf[i >> 3] |= masks[i & 7];
uint64_t bp = (uint64_t)(i + i + 1);
for (int64_t c = sqri; c < (int64_t)arrlen; c += (int64_t)bp) {
cullbuf[c >> 3] |= masks[c & 7];
}
int nri = 0;
for (int ori = 0; ori < rilmt; ++ori) {
uint32_t r = roughs[ori];
int64_t rci = (int64_t)(r >> 1);
if (cullbuf[rci >> 3] & masks[rci & 7]) continue;
uint64_t d = (uint64_t)r * bp;
int64_t t = (d <= rtlmt) ? larges[(int64_t)smalls[d >> 1] - nbps] :
(int64_t)smalls[half(divide(n, d))];
larges[nri] = larges[ori] - t + nbps;
roughs[nri] = r;
nri++;
}
int64_t si = mxndx;
for (uint64_t pm = (rtlmt/bp - 1) | 1; pm >= bp; pm -= 2) {
uint32_t c = smalls[pm >> 1];
uint64_t e = (pm * bp) >> 1;
for ( ; si >= (int64_t)e; --si) smalls[si] -= c - (uint32_t)nbps;
}
rilmt = nri;
nbps++;
}
int64_t ans = larges[0] + (int64_t)((rilmt + 2*(nbps - 1)) * (rilmt - 1) / 2);
int ri, sri;
for (ri = 1; ri < rilmt; ++ri) ans -= larges[ri];
for (ri = 1; ; ++ri) {
uint64_t p = (uint64_t)roughs[ri];
uint64_t m = n / p;
int ei = (int)smalls[half((uint64_t)m/p)] - nbps;
if (ei <= ri) break;
ans -= (int64_t)((ei - ri) * (nbps + ri - 1));
for (sri = ri + 1; sri < ei + 1; ++sri) {
ans += (int64_t)smalls[half(divide(m, (uint64_t)roughs[sri]))];
}
}
free(smalls);
free(roughs);
free(larges);
free(cullbuf);
return ans + 1;
}
 
int main() {
uint64_t n;
int i;
clock_t start = clock();
for (i = 0, n = 1; i < 10; ++i, n *= 10) {
printf("10^%d %ld\n", i, countPrimes(n));
}
clock_t end = clock();
printf("\nTook %f seconds\n", (double) (end - start) / CLOCKS_PER_SEC);
return 0;
}</syntaxhighlight>
 
{{out}}
<pre>
10^0 0
10^1 4
10^2 25
10^3 168
10^4 1229
10^5 9592
10^6 78498
10^7 664579
10^8 5761455
10^9 50847534
 
Took 0.003843 seconds
</pre>
 
=={{header|C++}}==
<syntaxhighlight lang="cpp">#include <cmath>
#include <iostream>
#include <unordered_map>
#include <vector>
 
Line 62 ⟶ 243:
int phi(int x, int a);
std::vector<int> primes;
std::unordered_map<int, std::unordered_map<int, int>> phi_cache;
};
 
Line 78 ⟶ 258:
if (a == 0)
return x;
auto&if map(a == phi_cache[x];1)
auto i = map.find return x - (ax >> 1);
ifint (ipa != map.end())primes[a - 1];
if (x <= return i->second;pa)
return 1;
int result = phi(x, a - 1) - phi(x / primes[a - 1], a - 1);
return phi(x, a - 1) - phi(x / pa, a - 1);
map[a] = result;
return result;
}
 
Line 106 ⟶ 285:
10^9 50847534
</pre>
 
=={{header|Chapel}}==
 
There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following Chapel versions are [translated and improved from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:
 
'''The Basic Recursive Legendre Algorithm'''
{{trans|Nim}}
<syntaxhighlight lang="chapel">// compile with --fast for maximum speed...
 
use Time;
 
proc countPrimes(lmt: uint(64)): int(64) {
if lmt < 9 { // when there are no odd primes less than square root...
if lmt < 3 { if lmt < 2 { return 0; } else { return 1; } }
return (lmt - (lmt >> 1)): int(64);
}
 
// Chapel doesn't have closures, so emulate them with a class...
class LegendrePi {
var n: uint(64);
var dom: domain(1);
var oprms: [dom] uint(32);
proc init(n: uint(64)) {
// first, an array of odd primes to the square root of n is generated...
this.n = n;
const sqrtn = sqrt(n: real(64)): int(64);
const rtlmt = (sqrtn - 3) / 2; this.dom = {0 .. rtlmt};
this.oprms = 0;
for i in 0 .. rtlmt do this.oprms[i] = (i + i + 3): uint(32);
var i = 0;
for i in (0 ..) { // cull the array
var ci = (i + i) * (i + 3) + 3; if ci > rtlmt { break; }
const bp = i + i + 3;
while (ci <= rtlmt) { this.oprms[ci] = 0; ci += bp; }
}
var psz = 0;
for ti in 0 .. rtlmt { // compress the odd primes array...
const tv = this.oprms[ti];
if tv != 0 { this.oprms[psz] = tv; psz += 1; }
}
this.dom = { 0 ..< psz };
}
proc phi(x: uint(64), a: int): int(64) {
if a <= 0 { return (x - (x >> 1)): int(64); } // take care of prime of 2
const na = a - 1; const p = this.oprms[na]: uint(64);
if x <= p { return 1: int(64); }
return phi(x, na) - phi(x / p, na);
}
proc this(): int(64) {
return phi(n, this.oprms.size) + this.oprms.size: int(64);
}
}
return (new LegendrePi(lmt))();
}
 
proc main() {
var timer: Timer;
timer.start();
 
for i in 0 .. 9 {
writeln("π(10**", i, ") = ", countPrimesx(10: uint(64) ** i));
}
 
timer.stop();
 
writeln("This took ", timer.elapsed(TimeUnits.milliseconds), " milliseconds.");
}</syntaxhighlight>
{{out}}
<pre>π(10**0) = 0
π(10**1) = 4
π(10**2) = 25
π(10**3) = 168
π(10**4) = 1229
π(10**5) = 9592
π(10**6) = 78498
π(10**7) = 664579
π(10**8) = 5761455
π(10**9) = 50847534
This took 276.431 milliseconds.</pre>
The time is as run on an Intel i5-6500 (3.6 GHz single-threaded boost).
 
Although this is about ten times faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).
 
'''The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm'''
 
Just substitute the following Chapel code for the `countPrimes` function above to enjoy the gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="chapel">// tiny Phi Look Up for `a` of small degree...
const tinyPhiPrimes = [ 2, 3, 5, 7, 11, 13 ]; // degree six
const cC = tinyPhiPrimes.size - 1;
proc product(a: [] int): int {
var acc = 1; for v in a { acc *= v; }; return acc >> 1; }
const tinyPhiOddCirc = product(tinyPhiPrimes);
proc tot(a: [] int): int {
var acc = 1; for v in a { acc *= v - 1; }; return acc; }
const tinyPhiOddTot = tot(tinyPhiPrimes);
proc makeTinyLUT(ps: [] int, sz: int): [] uint(32) {
var arr: [0 .. sz - 1] uint(32) = 1;
for p in ps {
if p <= 2 { continue; }
arr[p >> 1] = 0;
for c in ((p * p) >> 1) ..< sz by p { arr[c] = 0; }
}
var acc = 0: uint(32);
for i in 0 ..< sz { acc += arr[i]; arr[i] = acc; }
return arr;
}
const tinyPhiLUT = makeTinyLUT(tinyPhiPrimes, tinyPhiOddCirc);
inline proc tinyPhi(x: uint(64)): int(64) {
const ndx = (x - 1) >> 1; const numtot = ndx / tinyPhiOddCirc: uint(64);
return (numtot * tinyPhiOddTot +
tinyPhiLUT[(ndx - numtot * tinyPhiOddCirc): int]): int(64);
}
 
proc countPrimes(lmt: uint(64)): int(64) {
if lmt < 169 { // below 169 whose sqrt is 13 is where TinyPhi doesn't work...
if lmt < 3 { if lmt < 2 { return 0; } else { return 1; } }
// adjust for the missing "degree" base primes
if lmt <= 13 {
return ((lmt - 1): int(64) >> 1) + (if (lmt < 9) then 1 else 0); }
return 5 + tinyPhiLUT[(lmt - 1): int >> 1]: int(64);
}
 
// Chapel doesn't have closures, so emulate them with a class...
class LegendrePi {
var n: uint(64);
var dom: domain(1);
var oprms: [dom] uint(32);
proc init(n: uint(64)) {
// first, an array of odd primes to the square root of n is generated...
this.n = n;
const sqrtn = sqrt(n: real(64)): int(64);
const rtlmt = (sqrtn - 3) / 2; this.dom = {0 .. rtlmt};
this.oprms = 0;
for i in 0 .. rtlmt do this.oprms[i] = (i + i + 3): uint(32);
var i = 0;
for i in (0 ..) { // cull the array
var ci = (i + i) * (i + 3) + 3; if ci > rtlmt { break; }
const bp = i + i + 3;
while (ci <= rtlmt) { this.oprms[ci] = 0; ci += bp; }
}
var psz = 0;
for ti in 0 .. rtlmt { // compress the odd primes array...
const tv = this.oprms[ti];
if tv != 0 { this.oprms[psz] = tv; psz += 1; }
}
this.dom = { 0 ..< psz };
}
proc lvl(pilmt: int, m: uint(64)): int(64) {
var acc = 0: int(64);
for pi in cC ..< pilmt {
const p = this.oprms[pi]: uint(64); const nm = m * p;
if this.n <= nm * p { return acc + (pilmt - pi); }
if pi > cC { acc -= this.lvl(pi, nm); }
const q = this.n / nm; acc += tinyPhi(q);
}
return acc;
}
proc this(): int(64) {
return tinyPhi(this.n) - this.lvl(this.oprms.size, 1)
+ this.oprms.size: int(64);
}
}
return (new LegendrePi(lmt))();
}</syntaxhighlight>
 
Use of the above function gets a gain in speed of about a further ten times over the above version due to less recursion and the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a few tens of seconds.
 
'''The Non-Recursive Partial Sieving Algorithm'''
 
Just substitute the following Chapel code for the `countPrimes` function above to enjoy the further gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="chapel">const masks = for i in 0 .. 7 do (1 << i): uint(8); // faster bit twiddling
 
proc countPrimes(lmt: uint(64)): int(64) {
if lmt < 3 { if lmt < 2 { return 0; } else { return 1; } } // odds only!
inline proc half(x: int): int { return (x - 1) >> 1; } // convenience function
inline proc divide(nm: uint(64), d: uint(64)): int {
return (nm: real(64) / d: real(64)): int; } // floating point div faster
const sqrtn = sqrt(lmt: real(64)): uint(64);
const mxndx = (sqrtn - 1): int / 2;
const dom = {0 .. mxndx}; const csz = (mxndx + 8) / 8;
var smalls = for i in dom do i: uint(32);
var roughs = for i in dom do (i + i + 1): uint(32);
var larges = for i in dom do ((lmt / (i + i + 1)) - 1) >> 1;
var cullbuf: [0 ..< csz] uint(8);
 
// partial sieve loop, adjusting larges/smalls, compressing larges/roughs...
var nobps = 0; var rilmt = mxndx;
for bp in 3: uint(64) .. by 2 {
const i = (bp >> 1): int; const sqri = (i + i) * (i + 1);
if sqri > mxndx { break; } // up to quad root of counting range
if (cullbuf[i >> 3] & masks[i & 7]) != 0 { continue; } // loop not prime
cullbuf[i >> 3] |= masks[i & 7]; // cull bp itself as not a rough
for ci in sqri .. mxndx by bp { // do partial sieving pass for `bp`...
cullbuf[ci >> 3] |= masks[ci & 7]; } // cull all multiples of `bp`
 
// now adjust `larges` for latest partial sieve pass...
var ori = 0; // compress input rough index to output one
for iri in 0 .. rilmt {
const r = roughs[iri]: uint(64); const rci = (r >> 1): int;
if (cullbuf[rci >> 3] & masks[rci & 7]) != 0 {
continue; } // skip culled roughs in last partial sieving pass
const d = bp: uint(64) * r;
larges[ori] = larges[iri] -
(if d <= sqrtn then
larges[smalls[(d >> 1): int] - nobps]
else smalls[half(divide(lmt, d))]: uint(64)) + nobps;
roughs[ori] = r: uint(32); ori += 1;
}
 
var si = mxndx; // and adjust `smalls` for latest partial sieve pass...
for bpm in bp .. (sqrtn / bp - 1) | 1 by -2 {
const c = smalls[(bpm >> 1): int] - nobps: uint(32);
const e = ((bpm * bp) >> 1): int;
while si >= e { smalls[si] -= c; si -= 1; }
}
 
nobps += 1; rilmt = ori - 1;
}
 
var ans = larges[0]; // answer from larges, adjusting for over subtraction...
for i in 1 .. rilmt { ans -= larges[i]; } // combine!
ans += (rilmt + 1 + 2 * (nobps - 1)) * rilmt / 2; // adjust!
 
// add final adjustment for pairs of current roughs to cube root of range...
for ri in (1 ..) { // break when reaches cube root of counting range...
const p = roughs[ri]: uint(64); const q = lmt / p;
const ei = smalls[half(divide(q, p))]: int - nobps;
if ei <= ri { break; } // break here when no more pairs!
for ori in ri + 1 .. ei { // for all pairs never the same prime!
ans += smalls[half(divide(q, roughs[ori]))]: int(64); }
// adjust for over subtractions above...
ans -= (ei - ri): uint(64) * (nobps: uint(64) + ri: uint(64) - 1);
}
 
return ans: int(64) + 1; // add one for only even prime of two!
}</syntaxhighlight>
 
The above function enjoys quite a large gain in speed of about a further ten times over the immediately preceding version due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division; for instance, this version can calculate the number of primes to 1e11 in about 22 milliseconds. This version of prime counting function might be considered to be reasonably useful for counting primes up to a 1e16 in under a minute as long as the computer used has the required free memory of about 800 Megabytes. This Chapel version is about the same speed as the Nim version from which it was translated other than the LLVM back-end used here produces code that is a few percent slower than as that produced by the GCC compiler used as a back-end by Nim, at least for this algorithm.
 
=={{header|CoffeeScript}}==
Line 157 ⟶ 576:
10^9 50847534
</pre>
 
=={{header|Crystal}}==
 
There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following Crystal versions are [translated from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:
 
'''The Basic Recursive Legendre Algorithm'''
 
<syntaxhighlight lang="crystal">require "bit_array"
 
def count_primes(n : Int64)
if n < 3_i64
return 0_i64 if n < 2_i64
return 1_i64
end
rtlmt = Math.sqrt(n.to_f64).to_i32
mxndx = (rtlmt - 3) // 2
cmpsts = BitArray.new(mxndx + 1)
i = 0
while true
c = (i + i) * (i + 3) + 3
break if c > mxndx
unless cmpsts[i]
bp = i + i + 3
until c > mxndx
cmpsts[c] = true
c += bp
end
end
i += 1
end
oprms = Array(Int32).new(cmpsts.count { |e| !e }, 0)
pi = 0
cmpsts.each_with_index do |e, i|
unless e
oprms[pi] = (i + i + 3).to_i32; pi += 1
end
end
phi = uninitialized Proc(Int64, Int32, Int64) # recursion target!
phi = ->(x : Int64, a : Int32) {
return x - (x >> 1) if a < 1
p = oprms.unsafe_fetch(a - 1)
return 1_i64 if x <= p
phi.call(x, a - 1) - phi.call((x.to_f64 / p.to_f64).to_i64, a - 1)
}
phi.call(n, oprms.size) + oprms.size
end
 
start_time = Time.monotonic
(0 .. 9).each { |i| puts "π(10**#{i}) = #{count_primes(10_i64**i)}" }
elpsd = (Time.monotonic - start_time).total_milliseconds
 
puts "This took #{elpsd} milliseconds."</syntaxhighlight>
 
{{out}}
<pre>π(10**0) = 0
π(10**1) = 4
π(10**2) = 25
π(10**3) = 168
π(10**4) = 1229
π(10**5) = 9592
π(10**6) = 78498
π(10**7) = 664579
π(10**8) = 5761455
π(10**9) = 50847534
This took 272.428954 milliseconds.</pre>
 
The time is as run on an Intel i5-6500 (3.6 GHz single-threaded boost).
 
Although this is about ten times faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).
 
'''The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm'''
 
Just substitute the following Crystal code for the `count_primes` function above to enjoy the gain in speed:
<syntaxhighlight lang="crystal">Tiny_Phi_Primes = [ 2, 3, 5, 7, 11, 13 ]
Tiny_Phi_Odd_Circ = Tiny_Phi_Primes.product // 2
Tiny_Phi_Tot = Tiny_Phi_Primes.reduce(1) { |acc, p| acc * (p - 1) }
CC = Tiny_Phi_Primes.size - 1
def make_Tiny_Phi_LUT()
rslt = Array(UInt16).new(Tiny_Phi_Odd_Circ, 1_u16)
Tiny_Phi_Primes.skip(1).each { |bp|
i = (bp - 1) >> 1; rslt[i] = 0; c = (i + i) * (i + 1)
while c < Tiny_Phi_Odd_Circ
rslt[c] = 0; c += bp
end }
acc = 0_u16; i = 0
while i < Tiny_Phi_Odd_Circ
acc += rslt[i]; rslt[i] = acc; i += 1
end
rslt
end
Tiny_Phi_LUT = make_Tiny_Phi_LUT()
@[AlwaysInline]
def tiny_Phi(x : Int64) : Int64
ndx = (x - 1) >> 1; numtot = ndx // Tiny_Phi_Odd_Circ.to_i64
tpli = ndx - numtot * Tiny_Phi_Odd_Circ.to_i64
numtot * Tiny_Phi_Tot.to_i64 +
Tiny_Phi_LUT.unsafe_fetch(tpli).to_i64
end
 
def count_primes(n : Int64)
if n < 169_i64 # below 169 whose sqrt is 13 is where TinyPhi doesn't work...
return 0_i64 if n < 2_i64
return 1_i64 if n < 3_i64
# adjust for the missing "degree" base primes
return 1 + (n - 1) // 2 if n < 9_i64
return (n - 1) // 2 if n <= 13_i64
return 5 + Tiny_Phi_LUT[(n - 1).to_i32 // 2].to_i64
end
rtlmt = Math.sqrt(n.to_f64).to_i32
mxndx = (rtlmt - 3) // 2
cmpsts = BitArray.new(mxndx + 1)
i = 0
while true
c = (i + i) * (i + 3) + 3
break if c > mxndx
unless cmpsts[i]
bp = i + i + 3
until c > mxndx
cmpsts[c] = true
c += bp
end
end
i += 1
end
oprms = Array(Int32).new(cmpsts.count { |e| !e }, 0)
opi = 0
cmpsts.each_with_index do |e, i|
unless e
oprms[opi] = (i + i + 3).to_i32; opi += 1
end
end
lvl = uninitialized Proc(Int32, Int32, Int64, Int64) # recursion target!
lvl = ->(pilo : Int32, pilmt : Int32, m : Int64) : Int64 {
pi = pilo; answr = 0_i64
while pi < pilmt
p = oprms.unsafe_fetch(pi).to_i64; nm = p * m
return answr + (pilmt - pi) if n <= nm * p
q = (n.to_f64 / nm.to_f64).to_i64; answr += tiny_Phi(q)
answr -= lvl.call(CC, pi, nm) if pi > CC
pi += 1
end
answr
}
tiny_Phi(n) - lvl.call(CC, oprms.size, 1_i64) + oprms.size
end</syntaxhighlight>
 
Use of the above function gets a gain in speed of about a further ten times over the above version due to less recursion, the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions, and by using floating point division for integer division because it is faster, although it is limited in precision to 53-bits, one wouldn't want to use these algorithms over a range where that would cause a problem. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a few tens of seconds.
 
'''The Non-Recursive Partial Sieving Algorithm'''
 
Just substitute the following Crystal code for the `count_primes` function above to enjoy the further gain in speed:
<syntaxhighlight lang="crystal">def count_primes(n : Int64) : Int64
if n < 3
if n < 2
return 0_i64
else
return 1_i64
end
end
half = ->(n : Int64) : Int64 { (n - 1) >> 1 }
divide = ->(n : Int64, d : Int64) : Int64 { (n.to_f64 / d.to_f64).to_i64 }
rtlmt = Math.sqrt(n.to_f64).to_i32; mxndx = (rtlmt - 1) // 2
cmpsts = BitArray.new(mxndx + 1)
smalls = Array(Int32).new(mxndx + 1) { |i| i }
roughs = Array(Int32).new(mxndx + 1) { |i| i + i + 1 }
larges =
Array(Int64).new(mxndx + 1) { |i| ((n // (i + i + 1)).to_i64 - 1) >> 1 }
i = 1; nbps = 0; mxri = mxndx
while true
c = (i + i) * (i + 1); break if c > mxndx
if !cmpsts.unsafe_fetch(i)
bp = i + i + 1; cmpsts.unsafe_put(i, true)
until c > mxndx
cmpsts.unsafe_put(c, true); c += bp
end # partial sieving for bp completed here!
 
j = 0; ri = 0 # adjust `larges` according to partial sieve...
while j <= mxri
q = roughs.unsafe_fetch(j); qi = q >> 1
if !cmpsts.unsafe_fetch(qi)
d = bp.to_i64 * q.to_i64
larges.unsafe_put(ri, larges.unsafe_fetch(j) -
if d <= rtlmt.to_i64
ndx = smalls.unsafe_fetch(d >> 1) - nbps
larges.unsafe_fetch(ndx)
else
ndx = half.call(divide.call(n, d))
smalls.unsafe_fetch(ndx)
end + nbps)
roughs.unsafe_put(ri, q); ri += 1
end; j += 1
end
 
si = mxndx; bpm = (rtlmt // bp - 1) | 1
while bpm >= bp # adjust smalls according to partial sieve...
c = smalls.unsafe_fetch(bpm >> 1) - nbps; e = (bpm * bp) >> 1
while si >= e
smalls.unsafe_put(si, smalls.unsafe_fetch(si) - c); si -= 1
end
bpm -= 2
end
 
mxri = ri - 1; nbps += 1
end; i += 1
end
 
ans = larges.unsafe_fetch(0); i = 1
while i <= mxri # combine results; adjust for over subtraction base primes...
ans -= larges.unsafe_fetch(i); i += 1
end
ans += (mxri.to_i64 + 1 + 2 * (nbps.to_i64 - 1)) * mxri.to_i64 // 2 # adjust!
 
ri = 1 # do final phi calculation for pairs of larger primes...
while true # break on condition when up to cube root of range!
p = roughs.unsafe_fetch(ri).to_i64; q = n // p
e = smalls.unsafe_fetch(half.call(divide.call(q, p))) - nbps
break if e <= ri; ori = ri + 1
while ori <= e
ndx = half.call(divide.call(q, roughs.unsafe_fetch(ori).to_i64))
ans += smalls.unsafe_fetch(ndx).to_i64; ori += 1
end
ans -= (e - ri).to_i64 * (nbps.to_i64 + ri.to_i64 - 1); ri += 1
end
 
ans + 1 # for only even prime of two!
end</syntaxhighlight>
 
The above function enjoys quite a large gain in speed of about a further ten times over the immediately preceding version due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division. This version of prime counting function might be considered to be reasonably useful for counting primes up to a 1e16 in almost two minutes as long as the computer used has the required free memory of about 800 Megabytes. This Crystal version is about twice as slow as the Nim version from which it was translated because Nim's default GCC back-end is better at optimization (at least for this algorithm) than the LLVM back-end used by Crystal and also there is more numeric calculation checking such as numeric overflow used in Crystal that can't be turned off. At that, this version is about the same speed as when translated to Rust, which also uses a LLVM back-end.
 
=={{header|Elm}}==
 
Currently, Elm does not have linear arrays and has no mutation whatsoever, even array content mutation protected by "monadic function chains" as for Haskell, so many algorithms can't be easily implemented such as the Legendre Partial Sieving algorithm which requires random access to linear arrays whose contents are mutated according to previous passes. However, given a prime number generator based on persistent data structures such as the List version [[https://rosettacode.org/wiki/Sieve_of_Eratosthenes#Elm|from the Sieve of Eratosthenes task page]], one can translate the algorithm of the "bottom-up" recursive version without the "TinyPhi" Look Up Table (LUT) optmization which would need a linear array to implement, as in the following code:
<syntaxhighlight lang="elm">module Main exposing (main)
 
import Browser exposing (element)
import Task exposing (Task, succeed, perform, andThen)
import Html exposing (div, text)
import Time exposing (now, posixToMillis)
 
type CIS a = CIS a (() -> CIS a) -- infinite Co-Inductive Stream...
 
uptoCIS2List : comparable -> CIS comparable -> List comparable
uptoCIS2List n cis =
let loop (CIS hd tl) lst =
if hd > n then List.reverse lst
else loop (tl()) (hd :: lst)
in loop cis []
 
-- require a range of primes by Sieve of Eratosthenes...
primesTreeFolding : () -> CIS Int
primesTreeFolding() =
let
merge (CIS x xtl as xs) (CIS y ytl as ys) =
case compare x y of
LT -> CIS x <| \ () -> merge (xtl()) ys
EQ -> CIS x <| \ () -> merge (xtl()) (ytl())
GT -> CIS y <| \ () -> merge xs (ytl())
pmult bp =
let adv = bp + bp
pmlt p = CIS p <| \ () -> pmlt (p + adv)
in pmlt (bp * bp)
allmlts (CIS bp bptl) =
CIS (pmult bp) <| \ () -> allmlts (bptl())
pairs (CIS frst tls) =
let (CIS scnd tlss) = tls()
in CIS (merge frst scnd) <| \ () -> pairs (tlss())
cmpsts (CIS (CIS hd tl) tls) =
CIS hd <| \ () -> merge (tl()) <| cmpsts <| pairs (tls())
testprm n (CIS hd tl as cs) =
if n < hd then CIS n <| \ () -> testprm (n + 2) cs
else testprm (n + 2) (tl())
oddprms() =
CIS 3 <| \ () -> testprm 5 <| cmpsts <| allmlts <| oddprms()
in CIS 2 <| \ () -> oddprms()
 
countPrimesTo : Float -> Float -- only use integral values!
countPrimesTo n =
if n < 3 then if n < 2 then 0 else 1 else
let nnf = toFloat (floor n) -- erase fractional part!
sqrtn = sqrt nnf |> truncate
oprms = primesTreeFolding() |> uptoCIS2List sqrtn |> List.drop 1
opsz = List.length oprms
lvl opi opilmt plst m acc =
if opi >= opilmt then acc else
case plst of
[] -> acc -- should never happen
(op :: optl) ->
let opl = toFloat op
nm = m * opl in
if nnf <= nm * opl then acc + toFloat (opilmt - opi) else
let q = nnf / nm |> floor |> toFloat
nacc = acc + q - toFloat (floor (q / 2))
sacc = if opi <= 0 then 0 else lvl 0 opi oprms nm 0
in lvl (opi + 1) opilmt optl m (nacc - sacc)
in nnf - toFloat (floor (nnf / 2)) - lvl 0 opsz oprms 1 0 + toFloat opsz
 
-- run the required task tests...
timemillis : () -> Task Never Int -- a side effect function
timemillis() = now |> andThen (\ t -> succeed (posixToMillis t))
 
test : () -> Cmd Msg -- side effect function chain (includes "perform")...
test() =
timemillis()
|> andThen (\ strt ->
let rsltstrs = List.range 0 9 |> List.map ( \ n ->
"π(10^" ++ String.fromInt n ++ ") = " ++
String.fromFloat (countPrimesTo (toFloat (10^n))))
in timemillis()
|> andThen (\ stop ->
succeed (List.append rsltstrs ["This took "
++ String.fromInt (stop - strt)
++ " milliseconds."])))
|> perform Done
 
-- following code has to do with outputting to a web page using MUV/TEA...
type alias Model = List String
 
type Msg = Done Model
 
main : Program () Model Msg
main = -- starts with empty list of strings; views model of filled list...
element { init = \ _ -> ( [], test() )
, update = \ (Done mdl) _ -> ( mdl , Cmd.none )
, subscriptions = \ _ -> Sub.none
, view = div [] << List.map (div [] << List.singleton << text) }</syntaxhighlight>
{{out}}
<pre>π(10^0) = 0
π(10^1) = 4
π(10^2) = 25
π(10^3) = 168
π(10^4) = 1229
π(10^5) = 9592
π(10^6) = 78498
π(10^7) = 664579
π(10^8) = 5761455
π(10^9) = 50847534
This took 341 milliseconds.</pre>
 
=={{header|Erlang}}==
<syntaxhighlight lang="erlang">
Line 226 ⟶ 983:
</pre>
 
=={{header|F#}}==
 
There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following F# versions are [translated from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:
 
'''The Basic Recursive Legendre Algorithm'''
{{trans|Nim}}
<syntaxhighlight lang="fsharp">let countPrimes (lmt: uint64) =
if lmt < 3UL then (if lmt < 2UL then 0L else 1L) else
let sqrtlmt = lmt |> float |> sqrt |> uint64
let mxndx = (sqrtlmt - 3UL) / 2UL |> int
let oprms =
let cb = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 3)
let rec loopi i =
let sqri = (i + i) * (i + 3) + 3
if sqri > mxndx then () else
if cb.[i] = 0u then loopi (i + 1) else
let bp = i + i + 3
let rec cull c = if c > mxndx then () else cb.[c] <- 0u; cull (c + bp)
cull sqri; loopi (i + 1)
loopi 0; cb |> Array.filter ((<>) 0u)
let rec phi x a =
if a <= 0 then x - (x >>> 1) |> int64 else
let na = a - 1 in let p = uint64 oprms.[na]
if x <= p then 1L else phi x na - phi (x / p) na
phi lmt oprms.Length + int64 oprms.Length
 
let strt = System.DateTime.Now.Ticks
 
{ 0 .. 9 } |> Seq.iter (fun i ->
printfn "π(10**%d) = %d" i (countPrimes (uint64(10. ** i))))
 
let elpsd = (System.DateTime.Now.Ticks - strt) / 10000L
printfn "This took %d milliseconds." elpsd</syntaxhighlight>
{{out}}
<pre>π(10**0) = 0
π(10**1) = 4
π(10**2) = 25
π(10**3) = 168
π(10**4) = 1229
π(10**5) = 9592
π(10**6) = 78498
π(10**7) = 664579
π(10**8) = 5761455
π(10**9) = 50847534
This took 525 milliseconds.</pre>
 
The above time as run on an Intel i5-6500 (3.6 GHz single-threaded boost) isn't particularly fast but includes some DotNet initialization and JIT compilation overhead, and is about as fast as a highly optimized page-segmented wheel-factorized Sieve of Eratosthenes; Although this is about ten times faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).
 
'''The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm'''
 
Just substitute the following F# code for the `countPrimes` function above to enjoy the gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="fsharp">let TinyPhiPrimes = [| 2; 3; 5; 7; 11; 13 |]
let TinyPhiDeg = TinyPhiPrimes.Length - 1
let TinyPhiOddCirc = (TinyPhiPrimes |> Seq.reduce (*)) / 2
let TinyPhiTot = TinyPhiPrimes |> Seq.fold (fun s p -> s * (p - 1)) 1
let TinyPhiLUT =
let cb = Array.init TinyPhiOddCirc (fun i -> 1u)
TinyPhiPrimes |> Seq.skip 1
|> Seq.iter (fun bp ->
cb.[(bp - 1) >>> 1] <- 0u
{ (bp * bp - 1) >>> 1 .. bp .. TinyPhiOddCirc - 1}
|> Seq.iter (fun c -> cb.[c] <- 0u) )
let rec loopi i acc =
if i >= TinyPhiOddCirc then () else
let nacc = acc + cb[i] in cb.[i] <- nacc; loopi (i + 1) nacc
loopi 0 0u; cb
let tinyPhi (x: uint64): int64 =
let ndx = (x - 1UL) >>> 1 |> int64
let numtots = ndx / int64 TinyPhiOddCirc
let li = ndx - numtots * int64 TinyPhiOddCirc |> int
numtots * int64 TinyPhiTot + int64 TinyPhiLUT.[li]
 
let countPrimes (lmt: uint64) =
if lmt < 169UL then // below 169 whose sqrt is 13 is where TinyPhi doesn't work...
( if lmt < 2UL then 0L else
if lmt < 3UL then 1L else
// adjust for the missing "degree" base primes
if lmt < 9UL then 1L + int64 (lmt - 1UL) / 2L else
if lmt <= 13UL then int64 (lmt - 1UL) / 2L else
5L + int64 TinyPhiLUT.[int (lmt - 1UL) / 2]) else
let sqrtlmt = lmt |> float |> sqrt |> uint64
let mxndx = (sqrtlmt - 3UL) / 2UL |> int
let oprms =
let cb = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 3)
let rec loopi i =
let sqri = (i + i) * (i + 3) + 3
if sqri > mxndx then () else
if cb.[i] = 0u then loopi (i + 1) else
let bp = i + i + 3
let rec cull c = if c > mxndx then () else cb.[c] <- 0u; cull (c + bp)
cull sqri; loopi (i + 1)
loopi 0; cb |> Array.filter ((<>) 0u)
let rec lvl pilmt m =
let rec looppi pi acc =
if pi >= pilmt then acc else
let p = uint64 oprms.[pi] in let nm = p * m
if lmt <= nm * p then acc + int64 (pilmt - pi) else
let nacc = if pi <= TinyPhiDeg then acc else acc - lvl pi nm
looppi (pi + 1) (nacc + tinyPhi (lmt / nm))
looppi TinyPhiDeg 0L
tinyPhi lmt - lvl oprms.Length 1UL + int64 oprms.Length</syntaxhighlight>
 
Use of the above function gets a gain in speed of about a further ten times for far larger ranges than this over the above version due to less recursion, the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a few tens of seconds.
 
'''The Non-Recursive Partial Sieving Algorithm'''
 
Just substitute the following F# code for the `countPrimes` function above to enjoy the further gain in speed; note that rather than using a recursive function loop for the partial sieving culling pass as per the above two version, this version uses a Seq iteration which is slower but more concise in expression, but as sieving is a negligible part of the total execution time, it doesn't matter and conciseness was considered to be more important; this version may appear to be recursive to those unfamiliar with Functional Programming (FP) implementations but FP generally turns all function recursions in tail-call position (which all of these are) into the same compiled code as a simple loop in other languages:
{{trans|Nim}}
<syntaxhighlight lang="fsharp">let masks = Array.init 8 ((<<<) 1uy) // quick bit twiddling
 
let countPrimes (lmt: uint64): int64 =
if lmt < 3UL then (if lmt < 2UL then 0L else 1L) else
let inline half x = (x - 1) >>> 1
let inline divide nm d = (float nm) / (float d) |> int
let sqrtlmt = lmt |> float |> sqrt |> uint64
let mxndx = (sqrtlmt - 1UL) / 2UL |> int in let cbsz = (mxndx + 8) / 8
let cullbuf = Array.zeroCreate cbsz
let smalls = Array.init (mxndx + 1) uint32
let roughs = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 1)
let larges = Array.init (mxndx + 1) <| fun i ->
int64 (lmt / uint64 (i + i + 1) - 1UL) / 2L
 
let rec loopbp bp nobps rilmt =
let i = int (bp - 1UL) >>> 1 in let sqri = (i + i) * (i + 1)
if sqri > mxndx then nobps, rilmt else
if (cullbuf.[i >>> 3] &&& masks.[i &&& 7]) <> 0uy then
loopbp (bp + 2UL) nobps rilmt else
let w = i >>> 3 in cullbuf.[w] <- cullbuf.[w] ||| masks.[i &&& 7] // cull bp
{ sqri .. int bp .. mxndx } |> Seq.iter (fun c -> // cull multiples of bp...
let w = c >>> 3 in cullbuf.[w] <- cullbuf.[w] ||| masks.[c &&& 7] )
 
// adjust `larges for last partial loop pass;
// compress larges/roughs for current partial sieve pass...
let rec loopri iri ori =
if iri > rilmt then ori - 1 else
let r = uint64 roughs.[iri] in let sri = int (r >>> 1)
if (cullbuf.[sri >>> 3] &&& masks.[sri &&& 7]) <> 0uy then
loopri (iri + 1) ori else // skip for roughs culled this pass!
let d = bp * r
larges.[ori] <- larges.[iri] -
( if d <= sqrtlmt then
larges.[int smalls.[int (d >>> 1)] - nobps]
else let ndx = (half << divide lmt) d
int64 smalls.[ndx] ) + int64 nobps
roughs.[ori] <- uint32 r; loopri (iri + 1) (ori + 1)
 
// adjust `smalls` for last partial loop pass...
let rec loopbpm bpm mxsi =
if bpm < bp then () else
let c = smalls.[int (bpm >>> 1)] - uint32 nobps
let ei = (bpm * bp) >>> 1 |> int
let rec loopsi si =
if si < ei then si else smalls.[si] <- smalls.[si] - c; loopsi (si - 1)
loopbpm (bpm - 2UL) (loopsi mxsi)
 
let nrilmt = loopri 0 0
loopbpm ((sqrtlmt / bp - 1UL) ||| 1UL) mxndx
loopbp (bp + 2UL) (nobps + 1) nrilmt
 
// accumulate result so far; compensate for over subtraction...
let numobps, mxri = loopbp 3UL 0 mxndx
let rec smr i acc =
if i > mxri then // adjust accumulated answer!
acc + (int64 mxri + 1L + 2L * int64 (numobps - 1)) * int64 mxri / 2L
else smr (i + 1) (acc - larges.[i])
let ans0 = smr 1 larges.[0]
 
// finally, add result from pairs of rough primes up to cube root of range,
// where they are two different primes; compensating for over addition...
let rec loopri ri acc =
let p = uint64 roughs.[ri] in let q = lmt / p
let ei = int smalls.[(half << divide q) p] - numobps
if ei <= ri then acc else
let rec loopori ori oacc =
if ori > ei then oacc else
let ndx = (half << divide q) (uint64 roughs.[ori])
loopori (ori + 1) (oacc + int64 smalls.[ndx])
let nacc = loopori (ri + 1) acc // subtract over addition of base primes:
loopri (ri + 1) (nacc - int64 (ei - ri) * (int64 numobps + int64 ri - 1L))
 
loopri 1 ans0 + 1L // add one for only even prime of two!</syntaxhighlight>
 
The above function enjoys quite a large gain in speed of about a further ten times over the immediately preceding version (although one can't see that gain for these trivial ranges as it is buried in the constant overheads) due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division; for instance, this version can calculate the number of primes to 1e11 in about a hundred milliseconds. This version of prime counting function might be considered to be reasonably useful for counting primes up to a 1e16 in about a hundred seconds as long as the computer used has the required free memory of about 800 Megabytes. This F# version is about twice as slow as the Nim version from which it was translated due to the benefits of native code compilation and more optimizations for Nim rather than JIT compilation for F#. At that, this version is about the same speed as when translated to Rust and Crystal once the range is increased where constant overheads aren't a factor, which both use native code compilers.
 
=={{header|FreeBASIC}}==
 
{{incorrect|FreeBASIC|This is not a Legendre prime counting function but a very inefficient trial division (`Mod`) sieve.}}
 
<syntaxhighlight lang="vbnet">Function isPrime(n As Uinteger) As Boolean
Dim As Uinteger i
For i = 2 To Sqr(n)
If n Mod i = 0 Then Return False
Next i
Return True
End Function
 
Dim As Uinteger n, i, j, count
Dim Shared As Ulongint primes(1e8)
n = 1
For i = 0 To 9
count = 0
For j = 2 To n
If isPrime(j) Then
count += 1
primes(count) = j
End If
Next j
Print "10^"; i; " "; count
n *= 10
Next i
 
Sleep</syntaxhighlight>
 
=={{header|Go}}==
Line 351 ⟶ 1,321:
Same as first version.
</pre>
===Iterative, partial sieving===
{{trans|Vlang}}
This non-memoized, non-recursive version is much quicker than the first two versions, running in about 2 milliseconds which is the same as V.
<syntaxhighlight lang="ecmascript">package main
 
import (
=={{header|Java}}==
"fmt"
<syntaxhighlight lang="java">import java.util.*;
"math"
"time"
)
 
var masks = [8]uint8{1, 2, 4, 8, 16, 32, 64, 128}
public class LegendrePrimeCounter {
public static void main(String[] args) {
LegendrePrimeCounter counter = new LegendrePrimeCounter(1000000000);
for (int i = 0, n = 1; i < 10; ++i, n *= 10)
System.out.printf("10^%d\t%d\n", i, counter.primeCount((n)));
}
 
func half(n int) int { return (n - 1) >> 1 }
private List<Integer> primes;
private Map<Integer, Map<Integer, Integer>> phiCache = new HashMap<>();
 
func divide(nm, d uint64) int { return int(float64(nm) / float64(d)) }
public LegendrePrimeCounter(int limit) {
primes = generatePrimes((int)Math.sqrt((double)limit));
}
 
func countPrimes(n uint64) int64 {
public int primeCount(int n) {
if (n < 2)9 {
if n < 2 return 0;{
int a = primeCount((int)Math.sqrt((double)n)); return 0
return} phi(n,else a) + a - 1;{
return (int64(n) + 1) / 2
}
}
rtlmt := int(math.Sqrt(float64(n)))
 
privatemxndx int:= phi(intrtlmt x,- int a1) {/ 2
arrlen := mxndx + if (a == 0)1
smalls := make([]uint32, arrlen)
return x;
roughs := make([]uint32, arrlen)
Map<Integer, Integer> map = phiCache.computeIfAbsent(x, k -> new HashMap<>());
larges := make([]int64, arrlen)
Integer value = map.get(a);
for i := uint32(0); ifi < uint32(valuearrlen); !=i++ null){
smalls[i] = return value;i
int resultroughs[i] = phi(x,i a - 1) - phi(x / primes.get(a - 1),+ ai -+ 1);
larges[i] = int64((n/uint64(i+i+1) - 1) / 2)
map.put(a, result);
return result;
}
cullbuflen := (mxndx + 8) / 8
 
cullbuf := make([]uint8, cullbuflen)
private static List<Integer> generatePrimes(int limit) {
nbps := 0
boolean[] sieve = new boolean[limit >> 1];
rilmt := arrlen
Arrays.fill(sieve, true);
for (int pi := 3, s = 91; s < limit; p i++= 2) {
sqri := (i + ifi) * (sieve[pi >>+ 1]) {
if sqri > mxndx {
for (int q = s; q < limit; q += p << 1)
sieve[q >> 1] = false;break
}
if (cullbuf[i>>3] & masks[i&7]) != 0 {
continue
}
cullbuf[i>>3] |= masks[i&7]
bp := i + i + 1
for c := sqri; c < arrlen; c += bp {
cullbuf[c>>3] |= masks[c&7]
}
nri := 0
for ori := 0; ori < rilmt; ori++ {
r := int(roughs[ori])
rci := r >> 1
if (cullbuf[rci>>3] & masks[rci&7]) != 0 {
continue
}
sd +:= (pr +* 1) << 2;bp
t := int64(0)
if d <= rtlmt {
t = larges[int(smalls[d>>1])-nbps]
} else {
t = int64(smalls[half(divide(n, uint64(d)))])
}
larges[nri] = larges[ori] - t + int64(nbps)
roughs[nri] = uint32(r)
nri++
}
List<Integer> primessi := new ArrayList<>();mxndx
iffor pm := (limitrtlmt/bp - 1) | 1; pm >= bp; pm -= 2) {
primes.add(2);c := smalls[pm>>1]
for (int i = 1;e i:= <(pm sieve.length;* ++ibp) {>> 1
iffor (sieve[i]); si >= e; si-- {
primes.add((ismalls[si] <<-= 1)(c +- 1uint32(nbps));
} }
return primes;}
rilmt = nri
nbps++
}
ans := larges[0] + int64(((rilmt + 2*(nbps-1)) * (rilmt - 1) / 2))
for ri := 1; ri < rilmt; ri++ {
ans -= larges[ri]
}
for ri := 1; ; ri++ {
p := uint64(roughs[ri])
m := n / p
ei := int(smalls[half(int(m/p))]) - nbps
if ei <= ri {
break
}
ans -= int64((ei - ri) * (nbps + ri - 1))
for sri := ri + 1; sri < ei+1; sri++ {
ans += int64(smalls[half(divide(m, uint64(roughs[sri])))])
}
}
return ans + 1
}
 
func main() {
start := time.Now()
for i, n := uint64(0), uint64(1); i <= 9; i, n = i+1, n*10 {
fmt.Printf("10^%d %d\n", i, countPrimes(n))
}
elapsed := time.Since(start).Microseconds()
fmt.Printf("\nTook %d microseconds\n", elapsed)
}</syntaxhighlight>
 
{{out}}
<pre>
10^0 0
10^1 4
10^2 25
10^3 168
10^4 1229
10^5 9592
10^6 78498
10^7 664579
10^8 5761455
10^9 50847534
 
Took 1862 microseconds
</pre>
 
=={{header|Haskell}}==
 
With Memoization:
Memoization utilities:
<syntaxhighlight lang="haskell">data{-# MemoOPTIONS_GHC a-O2 =-fllvm Node-Wno-incomplete-patterns a (Memo a) (Memo a)#-}
{-# LANGUAGE DeriveFunctor #-}
 
import Data.Time.Clock.POSIX ( getPOSIXTime ) -- for timing
 
import Data.Int ( Int64 )
import Data.Bits ( Bits( shiftL, shiftR ) )
 
data Memo a = EmptyNode | Node a (Memo a) (Memo a)
deriving Functor
 
Line 447 ⟶ 1,479:
memoList = memo . mkList
where
mkList [] = EmptyNode -- never used; makes complete
mkList (x:xs) = Node x (mkList l) (mkList r)
where (l,r) = split xs
split [] = ([],[])
split [x] = ([x],[])
split (x:y:xs) = let (l,r) = split xs in (x:l, y:r)</syntaxhighlight>
 
isqrt :: Integer -> Integer
Computation of Legendre function:
<syntaxhighlight lang="haskell">isqrt :: Integer -> Integer
isqrt n = go n 0 (q `shiftR` 2)
where
Line 464 ⟶ 1,496:
else go z (r `shiftR` 1) (q `shiftR` 2)
 
primes :: [Integer]
primes = 2 : _Y ((3:) . gaps 5 . _U . map(\p-> [p*p, p*p+2*p..])) where
_Y g = g (_Y g) -- = g (g (g ( ... ))) non-sharing multistage fixpoint combinator
gaps k s@(c:cs) | k < c = k : gaps (k+2) s -- ~= ([k,k+2..] \\ s)
| otherwise = gaps (k+2) cs -- when null(s\\[k,k+2..])
_U ((x:xs):t) = x : (merge xs . _U . pairs) t -- tree-shaped folding big union
pairs (xs:ys:t) = merge xs ys : pairs t
merge xs@(x:xt) ys@(y:yt) | x < y = x : merge xt ys
| y < x = y : merge xs yt
| otherwise = x : merge xt yt
 
phi :: Integer -> Integer -> Integer
phi = memoize2 phiM
where
phiM x 0 = x
phiM x a = phi x (a-1) - phi (x `div` p a) (a - 1)
 
p = memoList (undefined : primes)
 
Line 477 ⟶ 1,521:
where a = legendrePi (floor (sqrt (fromInteger n)))
 
main :: IO ()
main = mapM_ (\n -> putStrLn $ show n ++ "\t" ++ show (legendrePi (10^n))) [1..7]</syntaxhighlight>
main = do
strt <- getPOSIXTime
mapM_ (\n -> putStrLn $ show n ++ "\t" ++ show (legendrePi (10^n))) [0..9]
stop <- getPOSIXTime
let elpsd = round $ 1e3 * (stop - strt) :: Int64
putStrLn $ "This last took " ++ show elpsd ++ " milliseconds."</syntaxhighlight>
 
<pre>λ>0 main 0
1 4
2 25
3 168
4 1229
5 9592
6 78498
7 664579
8 5761455
9 50847534</pre>
This last took 20383 milliseconds.</pre>
 
The above code reveals all of the problems with the usual Haskell developer with overuse of the multi-precision `Integer` type when there is no way that the range of a 64-bit integer will ever be exceeded, overuse of lists, developed and tested using the interpreter, etc., such that, even compiled using the LLVM back-end, the above code is about ten times slower that any other compiled language for a memoized version.
 
===Non Memoized Haskell Versions===
 
There doesn't seem to be much point to improving the above memoized version even though one could gain a factor of about ten in doing so as the need for memoization seems to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following Haskell versions are [translated from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:
 
'''The Basic Recursive Legendre Algorithm'''
 
<syntaxhighlight lang="haskell">{-# OPTIONS_GHC -O2 -fllvm #-}
{-# LANGUAGE FlexibleContexts, BangPatterns #-}
 
import Data.Time.Clock.POSIX ( getPOSIXTime ) -- for timing
 
import Data.Int ( Int64 )
import Data.Word ( Word32, Word64 )
import Data.Bits ( Bits( (.&.), (.|.), shiftL, shiftR ) )
import Control.Monad ( unless, when, forM_ )
import Control.Monad.ST ( ST, runST )
import Data.Array.ST ( runSTUArray )
import Data.Array.Base ( UArray(..), IArray(unsafeAt), listArray, elems, assocs,
MArray( unsafeNewArray_, newArray, unsafeRead, unsafeWrite ),
STUArray, unsafeFreezeSTUArray, castSTUArray )
 
countPrimes :: Word64 -> Int64
countPrimes n =
if n < 3 then (if n < 2 then 0 else 1) else
let sqrtn = truncate $ sqrt $ fromIntegral n
qdrtn = truncate $ sqrt $ fromIntegral sqrtn
rtlmt = (sqrtn - 3) `div` 2
qrtlmt = (qdrtn - 3) `div` 2
oddPrimes@(UArray _ _ psz _) = runST $ do -- UArray of odd primes...
cmpsts <- newArray (0, rtlmt) False :: ST s (STUArray s Int Bool)
forM_ [ 0 .. qrtlmt ] $ \ i -> do
t <- unsafeRead cmpsts i
unless t $ do
let sqri = (i + i) * (i + 3) + 3
bp = i + i + 3
forM_ [ sqri, sqri + bp .. rtlmt ] $ \ c ->
unsafeWrite cmpsts c True
fcmpsts <- unsafeFreezeSTUArray cmpsts
let !numoprms = sum $ [ 1 | False <- elems fcmpsts ]
prms = [ fromIntegral $ i + i + 3 | (i, False) <- assocs fcmpsts ]
return $ listArray (0, numoprms - 1) prms :: ST s (UArray Int Word32)
phi x a =
if a < 1 then x - (x `shiftR` 1) else
let na = a - 1
p = fromIntegral $ unsafeAt oddPrimes na in
if x < p then 1 else
phi x na - phi (x `div` p) na
 
in fromIntegral (phi n psz) + fromIntegral psz
 
main :: IO ()
main = do
strt <- getPOSIXTime
mapM_ (\n -> putStrLn $ show n ++ "\t" ++ show (countPrimesx (10^n))) [ 0 .. 9 ]
stop <- getPOSIXTime
let elpsd = round $ 1e3 * (stop - strt) :: Int64
putStrLn $ "This took " ++ show elpsd ++ " milliseconds."</syntaxhighlight>
{{out}}
<pre>0 0
1 4
2 25
3 168
4 1229
5 9592
6 78498
7 664579
8 5761455
9 50847534
This took 273 milliseconds.</pre>
 
The time is as run on an Intel i5-6500 (3.6 GHz single-threaded boost).
 
Although this is much faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).
 
'''The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm'''
 
Just substitute the following Haskell code for the `countPrimes` function above to enjoy the gain in speed:<syntaxhighlight lang="haskell">cTinyPhiPrimes :: [Int]
cTinyPhiPrimes = [ 2, 3, 5, 7, 11, 13 ]
cC :: Int
cC = length cTinyPhiPrimes - 1
cTinyPhiOddCirc :: Int
cTinyPhiOddCirc = product cTinyPhiPrimes `div` 2
cTinyPhiTot :: Int
cTinyPhiTot = product [ p - 1 | p <- cTinyPhiPrimes ]
cTinyPhiLUT :: UArray Int Word32
cTinyPhiLUT = runSTUArray $ do
ma <- newArray (0, cTinyPhiOddCirc - 1) 1
forM_ (drop 1 cTinyPhiPrimes) $ \ bp -> do
let i = (bp - 1) `shiftR` 1
let sqri = (i + i) * (i + 1)
unsafeWrite ma i 0
forM_ [ sqri, sqri + bp .. cTinyPhiOddCirc - 1 ] $ \ c -> unsafeWrite ma c 0
let tot i acc =
if i >= cTinyPhiOddCirc then return ma else do
v <- unsafeRead ma i
if v == 0 then do unsafeWrite ma i acc; tot (i + 1) acc
else do let nacc = acc + 1
unsafeWrite ma i nacc; tot (i + 1) nacc
tot 0 0
tinyPhi :: Word64 -> Int64
tinyPhi n =
let on = (n - 1) `shiftR` 1
numcyc = on `div` fromIntegral cTinyPhiOddCirc
rem = fromIntegral $ on - numcyc * fromIntegral cTinyPhiOddCirc
in fromIntegral numcyc * fromIntegral cTinyPhiTot +
fromIntegral (unsafeAt cTinyPhiLUT rem)
 
countPrimes :: Word64 -> Int64
countPrimes n =
if n < 3 then (if n < 2 then 0 else 1) else
let sqrtn = truncate $ sqrt $ fromIntegral n
qdrtn = truncate $ sqrt $ fromIntegral sqrtn
rtlmt = (sqrtn - 3) `div` 2
qrtlmt = (qdrtn - 3) `div` 2
oddPrimes@(UArray _ _ psz _) = runST $ do -- UArray of odd primes...
cmpsts <- newArray (0, rtlmt) False :: ST s (STUArray s Int Bool)
forM_ [ 0 .. qrtlmt ] $ \ i -> do
t <- unsafeRead cmpsts i
unless t $ do
let sqri = (i + i) * (i + 3) + 3
bp = i + i + 3
forM_ [ sqri, sqri + bp .. rtlmt ] $ \ c ->
unsafeWrite cmpsts c True
fcmpsts <- unsafeFreezeSTUArray cmpsts
let !numoprms = sum $ [ 1 | False <- elems fcmpsts ]
prms = [ fromIntegral $ i + i + 3 | (i, False) <- assocs fcmpsts ]
return $ listArray (0, numoprms - 1) prms :: ST s (UArray Int Word32)
lvl pi pilmt !m !acc =
if pi >= pilmt then acc else
let p = fromIntegral $ unsafeAt oddPrimes pi
nm = m * p in
if n <= nm * p then acc + fromIntegral (pilmt - pi) else
let !q = fromIntegral $ n `div` nm
!nacc = acc + tinyPhi q
!sacc = if pi <= cC then 0 else lvl cC pi nm 0
in lvl (pi + 1) pilmt m $ nacc - sacc
 
in tinyPhi n - lvl cC psz 1 0 + fromIntegral psz</syntaxhighlight>
 
Use of the above function gets a gain in speed of about a further fifteen times over the above version due to less recursion and the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a little over ten seconds.
 
'''The Non-Recursive Partial Sieving Algorithm'''
 
Just substitute the following Haskell code for the `countPrimes` function above to enjoy the further gain in speed; this version may appear to be recursive to those unfamiliar with Functional Programming (FP) implementations but FP generally turns all function recursions in tail-call position (which all of these are) into the same compiled code as a simple loop in other languages:
<syntaxhighlight lang="haskell">countPrimes :: Word64 -> Int64
countPrimes n =
if n < 3 then (if n < 2 then 0 else 1) else
let
{-# INLINE divide #-}
divide :: Word64 -> Word64 -> Int
divide nm d = truncate $ (fromIntegral nm :: Double) / fromIntegral d
{-# INLINE half #-}
half :: Int -> Int
half x = (x - 1) `shiftR` 1
rtlmt = floor $ sqrt (fromIntegral n :: Double)
mxndx = (rtlmt - 1) `div` 2
(!nbps, !nrs, !smalls, !roughs, !larges) = runST $ do
mss <- unsafeNewArray_ (0, mxndx) :: ST s (STUArray s Int Word32)
let msscst =
castSTUArray :: STUArray s Int Word32 -> ST s (STUArray s Int Int64)
mdss <- msscst mss -- for use in adjing counts LUT
forM_ [ 0 .. mxndx ] $ \ i -> unsafeWrite mss i (fromIntegral i)
mrs <- unsafeNewArray_ (0, mxndx) :: ST s (STUArray s Int Word32)
forM_ [ 0 .. mxndx ] $ \ i -> unsafeWrite mrs i (fromIntegral i * 2 + 1)
mls <- unsafeNewArray_ (0, mxndx) :: ST s (STUArray s Int Int64)
forM_ [ 0 .. mxndx ] $ \ i ->
let d = fromIntegral (i + i + 1)
in unsafeWrite mls i (fromIntegral (divide n d - 1) `div` 2)
cmpsts <- unsafeNewArray_ (0, mxndx) :: ST s (STUArray s Int Bool)
 
let loop i !cbpi !rlmti =
let sqri = (i + i) * (i + 1) in
if sqri > mxndx then do
fss <- unsafeFreezeSTUArray mss
frs <- unsafeFreezeSTUArray mrs
fls <- unsafeFreezeSTUArray mls
return (cbpi, rlmti + 1, fss, frs, fls)
else do
v <- unsafeRead cmpsts i
if v then loop (i + 1) cbpi rlmti else do
unsafeWrite cmpsts i True -- cull current bp so not a "k-rough"!
let bp = i + i + 1
-- partial cull by current base prime, bp...
cull c = if c > mxndx then return () else do
unsafeWrite cmpsts c True; cull (c + bp)
 
-- adjust `larges` according to partial sieve...
part ri nri = -- old "rough" index to new one...
if ri > rlmti then return (nri - 1) else do
r <- unsafeRead mrs ri -- "rough" always odd!
t <- unsafeRead cmpsts (fromIntegral r `shiftR` 1)
if t then part (ri + 1) nri else do -- skip newly culled
olv <- unsafeRead mls ri
let m = fromIntegral r * fromIntegral bp
adjv <- if m <= fromIntegral rtlmt then do
let ndx = fromIntegral m `shiftR` 1
sv <- unsafeRead mss ndx
unsafeRead mls (fromIntegral sv - cbpi)
else do
sv <- unsafeRead mss (half (divide n m))
return (fromIntegral sv)
unsafeWrite mls nri (olv - (adjv - fromIntegral cbpi))
unsafeWrite mrs nri r; part (ri + 1) (nri + 1)
!pm0 = ((rtlmt `div` bp) - 1) .|. 1 -- max base prime mult
 
adjc lmti pm = -- adjust smalls according to partial sieve:
if pm < bp then return () else do
c <- unsafeRead mss (pm `shiftR` 1)
let ac = c - fromIntegral cbpi -- correction
bi = (pm * bp) `shiftR` 1 -- start array index
adj si = if si > lmti then adjc (bi - 1) (pm - 2)
else do ov <- unsafeRead mss si
unsafeWrite mss si (ov - ac)
adj (si + 1)
adj bi
dadjc lmti pm =
if pm < bp then return () else do
c <- unsafeRead mss (pm `shiftR` 1)
let ac = c - fromIntegral cbpi -- correction
bi = (pm * bp) `shiftR` 1 -- start array index
ac64 = fromIntegral ac :: Int64
dac = (ac64 `shiftL` 32) .|. ac64
dbi = (bi + 1) `shiftR` 1
dlmti = (lmti - 1) `shiftR` 1
dadj dsi = if dsi > dlmti then return ()
else do dov <- unsafeRead mdss dsi
unsafeWrite mdss dsi (dov - dac)
dadj (dsi + 1)
when (bi .&. 1 /= 0) $ do
ov <- unsafeRead mss bi
unsafeWrite mss bi (ov - ac)
dadj dbi
when (lmti .&. 1 == 0) $ do
ov <- unsafeRead mss lmti
unsafeWrite mss lmti (ov - ac)
adjc (bi - 1) (pm - 2)
cull sqri; nrlmti <- part 0 0
dadjc mxndx pm0
loop (i + 1) (cbpi + 1) nrlmti
loop 1 0 mxndx
 
!ans0 = unsafeAt larges 0 - -- combine all counts; each includes nbps...
sum [ unsafeAt larges i | i <- [ 1 .. nrs - 1 ] ]
-- adjust for all the base prime counts subracted above...
!adj = (nrs + 2 * (nbps - 1)) * (nrs - 1) `div` 2
!adjans0 = ans0 + fromIntegral adj
 
loopr ri !acc = -- o final phi calculation for pairs of larger primes...
let r = fromIntegral (unsafeAt roughs ri)
q = n `div` r
lmtsi = half (fromIntegral (q `div` r))
lmti = fromIntegral (unsafeAt smalls lmtsi) - nbps
addcnt pi !ac =
if pi > lmti then ac else
let p = fromIntegral (unsafeAt roughs pi)
ci = half (fromIntegral (divide q p))
in addcnt (pi + 1) (ac + fromIntegral (unsafeAt smalls ci))
in if lmti <= ri then acc else -- break when up to cube root of range!
-- adjust for the `nbps`'s over added in the `smalls` counts...
let !adj = fromIntegral ((lmti - ri) * (nbps + ri - 1))
in loopr (ri + 1) (addcnt (ri + 1) acc - adj)
in loopr 1 adjans0 + 1 -- add one for only even prime of two!</syntaxhighlight>
 
The above function enjoys quite a large gain in speed of about a further ten times over the immediately preceding version due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division. This version of prime counting function might be considered to be reasonably useful for counting primes up to 1e16 in just over a minute as long as the computer used has the required free memory of about 800 Megabytes, counting the primes to 1e11 in under 30 milliseconds. This Haskell version is about twenty percent slower than the Nim version from which it was translated because Nim's default GCC back-end is better at optimization (at least for this algorithm) than the LLVM back-end used by Haskell here.
 
=={{header|J}}==
Line 514 ⟶ 1,832:
 
But we can simplify that to p:inv when we know that primes are not being tested.
 
=={{header|Java}}==
<syntaxhighlight lang="java">import java.util.*;
 
public class LegendrePrimeCounter {
public static void main(String[] args) {
LegendrePrimeCounter counter = new LegendrePrimeCounter(1000000000);
for (int i = 0, n = 1; i < 10; ++i, n *= 10)
System.out.printf("10^%d\t%d\n", i, counter.primeCount((n)));
}
 
private List<Integer> primes;
 
public LegendrePrimeCounter(int limit) {
primes = generatePrimes((int)Math.sqrt((double)limit));
}
 
public int primeCount(int n) {
if (n < 2)
return 0;
int a = primeCount((int)Math.sqrt((double)n));
return phi(n, a) + a - 1;
}
 
private int phi(int x, int a) {
if (a == 0)
return x;
if (a == 1)
return x - (x >> 1);
int pa = primes.get(a - 1);
if (x <= pa)
return 1;
return phi(x, a - 1) - phi(x / pa, a - 1);
}
 
private static List<Integer> generatePrimes(int limit) {
boolean[] sieve = new boolean[limit >> 1];
Arrays.fill(sieve, true);
for (int p = 3, s = 9; s < limit; p += 2) {
if (sieve[p >> 1]) {
for (int q = s; q < limit; q += p << 1)
sieve[q >> 1] = false;
}
s += (p + 1) << 2;
}
List<Integer> primes = new ArrayList<>();
if (limit > 2)
primes.add(2);
for (int i = 1; i < sieve.length; ++i) {
if (sieve[i])
primes.add((i << 1) + 1);
}
return primes;
}
}</syntaxhighlight>
 
{{out}}
<pre>
10^0 0
10^1 4
10^2 25
10^3 168
10^4 1229
10^5 9592
10^6 78498
10^7 664579
10^8 5761455
10^9 50847534
</pre>
 
=={{header|JavaScript}}==
 
There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following JavaScript versions are [translated and improved from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:
 
'''The Basic Recursive Legendre Algorithm'''
 
{{trans|Nim}}
<syntaxhighlight lang="javascript">"use strict";
 
function countPrimesTo(lmt) {
if (lmt < 3) { if (lmt < 2) return 0; else return 1; }
const sqrtlmt = Math.sqrt(lmt) >>> 0;
const oprms = function() {
const mxndx = (sqrtlmt - 3) >>> 1;
const arr = new Float64Array(mxndx + 1);
for (let i = 0 >>> 0; i <= mxndx; ++i) arr[i] = (i + i + 3) >>> 0;
let bp = 3 >>> 0;
while (true) {
let i = (bp - 3) >>> 1; let sqri = ((i + i) * (i + 3) + 3) >>> 0;
if (sqri > mxndx) break;
if (arr[i] != 0) for (; sqri <= mxndx; sqri += bp) arr[sqri] = 0;
bp += 2;
}
return arr.filter(v => v != 0);
}();
function phi(x, a) {
if (a <= 0) return x - Math.trunc(x / 2);
const na = (a - 1) >>> 0; const p = oprms[na];
if (x <= p) return 1;
return phi(x, na) - phi(Math.trunc(x / p), na);
}
return phi(lmt, oprms.length) + oprms.length;
}
 
const start = Date.now();
for (let i = 0; i <= 9; ++i) console.log(`π(10**${i}) =`, countPrimesTo(10**i));
const elpsd = Date.now() - start;
console.log("This took", elpsd, "milliseconds.")</syntaxhighlight>
{{out}}
<pre>π(10**0) = 0
π(10**1) = 4
π(10**2) = 25
π(10**3) = 168
π(10**4) = 1229
π(10**5) = 9592
π(10**6) = 78498
π(10**7) = 664579
π(10**8) = 5761455
π(10**9) = 50847534
This took 712 milliseconds.</pre>
The time is as run on an Intel i5-6500 (3.6 GHz single-threaded boost) using node.js version 17.3.1.
 
Although this is about ten times faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).
 
'''The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm'''
 
Just substitute the following JavaScript code for the `countPrimesTo` function above to enjoy the gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="javascript">const TinyPhiPrimes = [ 2, 3, 5, 7, 11, 13 ];
const TinyPhiOddDegree = TinyPhiPrimes.length - 1;
const TinyPhiOddCirc = TinyPhiPrimes.reduce((acc, p) => acc * p) / 2;
const TinyPhiTot = TinyPhiPrimes.reduce((acc, p) => acc * (p - 1), 1)
const TinyPhiLUT = function() {
const arr = new Uint16Array(TinyPhiOddCirc); arr.fill(1);
for (const p of TinyPhiPrimes) {
if (p <= 2) continue; arr[p >> 1] = 0;
for (let c = (p * p) >> 1; c < TinyPhiOddCirc; c += p) arr[c] = 0 >>> 0; }
for (let i = 0 | 0, acc = 0 | 0; i < TinyPhiOddCirc; ++i) {
acc += arr[i]; arr[i] = acc; }
return arr; }();
function tinyPhi(x) {
const ndx = Math.trunc(( x - 1) / 2);
const numtots = Math.trunc(ndx / TinyPhiOddCirc);
const rem = (ndx - numtots * TinyPhiOddCirc) >>> 0;
return numtots * TinyPhiTot + TinyPhiLUT[rem];
}
 
function countPrimesTo(lmt) {
if (lmt < 169) {
if (lmt < 3) { if (lmt < 2) return 0; else return 1; }
// adjust for the missing "degree" base primes
if (lmt <= 13) return ((lmt - 1) >>> 1) + (lmt < 9 ? 1 : 0);
return 5 + TinyPhiLUT[(lmt - 1) >>> 1];
}
const sqrtlmt = Math.sqrt(lmt) >>> 0;
const oprms = function() {
const mxndx = (sqrtlmt - 3) >>> 1;
const arr = new Float64Array(mxndx + 1);
for (let i = 0 >>> 0; i <= mxndx; ++i) arr[i] = (i + i + 3) >>> 0;
let bp = 3 >>> 0;
while (true) {
let i = (bp - 3) >>> 1; let sqri = ((i + i) * (i + 3) + 3) >>> 0;
if (sqri > mxndx) break;
if (arr[i] != 0) for (; sqri <= mxndx; sqri += bp) arr[sqri] = 0;
bp += 2;
}
return arr.filter(v => v != 0); }();
function lvl(pilmt, m) {
let ans = 0;
for (let pi = TinyPhiOddDegree; pi < pilmt; ++pi) {
const p = oprms[pi]; const nm = m * p;
if (lmt <= nm * p) return ans + pilmt - pi;
ans += tinyPhi(Math.trunc(lmt / nm));
if (pi > TinyPhiOddDegree) ans -= lvl(pi, nm);
}
return ans;
}
return tinyPhi(lmt) - lvl(oprms.length, 1) + oprms.length;
}</syntaxhighlight>
Use of the above function gets a gain in speed of about a further fifteen times over the above version due to less recursion and the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a few tens of seconds.
 
'''The Non-Recursive Partial Sieving Algorithm'''
 
Just substitute the following JavaScript code for the `countPrimesTo` function above to enjoy the gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="javascript">const masks = new Uint8Array(8); // faster than bit twiddling!
for (let i = 0; i < 8; ++i) masks[i] = (1 << i) >>> 0;
 
function countPrimesTo(lmt) {
if (lmt < 3) { if (lmt < 2) return 0; else return 1; }
function half(x) { return (x - 1) >>> 1; }
function divide(nm, d) { return (nm / d) >>> 0; }
const sqrtlmt = Math.trunc(Math.sqrt(lmt));
const mxndx = (sqrtlmt - 1) >>> 1; const cbsz = (mxndx + 8) >>> 3;
const cullbuf = new Uint8Array(cbsz);
const smalls = new Uint32Array(mxndx + 1);
for (let i = 0; i <= mxndx; ++i) smalls[i] = i >>> 0;
const roughs = new Uint32Array(mxndx + 1);
for (let i = 0; i <= mxndx; ++i) roughs[i] = (i + i + 1) >>> 0;
const larges = new Float64Array(mxndx + 1);
for (let i = 0; i <= mxndx; ++i) larges[i] =
Math.trunc((Math.trunc(lmt / (i + i + 1)) - 1) / 2);
 
// partial sieve loop, adjusting larges/smalls, compressing larges/roughs...
let nobps = 0 >>> 0; let rilmt = mxndx; let bp = 3 >>> 0;
while (true) { // break when square root is reached
const i = bp >>> 1; let sqri = ((i + i) * (i + 1)) >>> 0;
if (sqri > mxndx) break; // partial sieving pass if bp is prime:
if ((cullbuf[i >> 3] & masks[i & 7]) == 0) {
cullbuf[i >> 3] |= masks[i & 7]; // cull bp!
for (; sqri <= mxndx; sqri += bp)
cullbuf[sqri >> 3] |= masks[sqri & 7]; // cull bp mults!
 
// now adjust `larges` for latest partial sieve pass...
var ori = 0 // compress input rough index to output one
for (let iri = 0; iri <= rilmt; ++iri) {
const r = roughs[iri]; const rci = r >>> 1; // skip roughs just culled!
if ((cullbuf[rci >> 3] & masks[rci & 7]) != 0) continue;
const d = bp * r;
larges[ori] = larges[iri] -
( (d <= sqrtlmt) ? larges[smalls[d >>> 1] - nobps]
: smalls[half(divide(lmt, d))] ) +
nobps; // base primes count over subtracted!
roughs[ori++] = r;
}
 
let si = mxndx // and adjust `smalls` for latest partial sieve pass...
for (let bpm = (sqrtlmt / bp - 1) | 1; bpm >= bp; bpm -= 2) {
const c = smalls[bpm >>> 1] - nobps;
const ei = ((bpm * bp) >>> 1);
while (si >= ei) smalls[si--] -= c;
}
 
nobps++; rilmt = ori - 1;
}
bp += 2;
}
 
// combine results to here; correcting for over subtraction in combining...
let ans = larges[0]; for (let i = 1; i <= rilmt; ++i) ans -= larges[i];
ans += Math.trunc((rilmt + 1 + 2 * (nobps - 1)) * rilmt / 2);
 
// add final adjustment for pairs of current roughs to cube root of range...
let ri = 0
while (true) { // break when reaches cube root of counting range...
const p = roughs[++ri]; const q = Math.trunc(lmt / p);
const ei = smalls[half(divide(q, p))] - nobps;
if (ei <= ri) break; // break here when no more pairs!
for (let ori = ri + 1; ori <= ei; ++ori)
ans += smalls[half(divide(q, roughs[ori]))];
ans -= (ei - ri) * (nobps + ri - 1);
}
 
return ans + 1; // add one for only even prime of two!
}</syntaxhighlight>
The above code enjoys quite a large gain in speed of about a further ten times (and increasing with range) over the immediately preceding version for larger "non-trivial" ranges (since there is an appreciable environment overhead in initialization and JIT compilation) due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division; for instance, this version can calculate the number of primes to 1e11 in about 125 milliseconds. This version of prime counting function might be considered to be reasonably useful for counting primes up to a 1e16 in under three minutes as long as the computer used has the required free memory of about 800 Megabytes. This JavaScript version is about three times as slow as the Nim version from which it was translated for large ranges where the initialization overhead is not significant due to being run by the JavaScript engine and JIT compiled.
 
=={{header|jq}}==
Line 685 ⟶ 2,259:
0.003234 seconds (421 allocations: 14.547 KiB)
</pre>
=== nonrecursive method ===
{{trans|Nim}}
<syntaxhighlight lang="julia">
function countprimes(n)
n < 3 && return typeof(n)(n > 1)
rtlmt = isqrt(n)
mxndx = (rtlmt - 1) ÷ 2
smalls::Array{UInt32} = [i for i in 0:mxndx+1]
roughs::Array{UInt32} = [i + i + 1 for i in 0:mxndx+1]
larges::Array{Int64} = [(n ÷ (i + i + 1) - 1) ÷ 2 for i in 0:mxndx+1]
cmpsts = falses(mxndx + 1)
bp, npc, mxri = 3, 0, mxndx
@inbounds while true
i = bp >> 1
sqri = (i + i) * (i + 1)
sqri > mxndx && break
if !cmpsts[i + 1]
cmpsts[i + 1] = true
for c in sqri:bp:mxndx
cmpsts[c + 1] = true
end
ri = 0
for k in 0:mxri
q = roughs[k + 1]
qi = q >> 1
cmpsts[qi + 1] && continue
d = UInt64(bp) * UInt64(q)
larges[ri + 1] = larges[k + 1] + npc -
(d <= rtlmt ? larges[smalls[d >> 1 + 1] - npc + 1]
: smalls[(Int(floor(n / d)) - 1) >> 1 + 1])
roughs[ri + 1] = q
ri += 1
end
m = mxndx
@simd for k in (rtlmt ÷ bp - 1) | 1 : -2 : bp
c = smalls[k >> 1 + 1] - npc
ee = (k * bp) >> 1
while m >= ee
smalls[m + 1] -= c
m -= 1
end
end
mxri = ri - 1
npc += 1
end
bp += 2
end
 
result = larges[1]
@simd for i in 2:mxri+1
result -= larges[i]
end
result += (mxri + 1 + 2 * (npc - 1)) * mxri ÷ 2
 
for j in 1:mxri
p = UInt64(roughs[j + 1])
m = n ÷ p
ee = smalls[(Int(floor(m / p)) - 1) >> 1 + 1] - npc
ee <= j && break
for k in j+1:ee
result += smalls[(Int(floor(m / roughs[k + 1])) - 1) >> 1 + 1]
end
result -= (ee - j) * (npc + j - 1)
end
return result + 1
end
 
for i in 0:14
println("π(10^$i) = ", countprimes(10^i))
end
 
@time countprimes(10^13)
@time countprimes(10^14)
</syntaxhighlight>{{out}}
<pre>
π(10^0) = 0
π(10^1) = 4
π(10^2) = 25
π(10^3) = 168
π(10^4) = 1229
π(10^5) = 9592
π(10^6) = 78498
π(10^7) = 664579
π(10^8) = 5761455
π(10^9) = 50847534
π(10^10) = 455052511
π(10^11) = 4118054813
π(10^12) = 37607912018
π(10^13) = 346065536839
π(10^14) = 3204941750802
0.894891 seconds (13 allocations: 48.442 MiB, 1.06% gc time)
4.479385 seconds (13 allocations: 153.185 MiB, 0.12% gc time)
</pre>
 
=={{header|Kotlin}}==
 
There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following Kotlin versions are [translated and improved from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:
 
'''The Basic Recursive Legendre Algorithm'''
 
{{trans|Nim}}
<syntaxhighlight lang="kotlin">fun countPrimes(lmt: Long): Long {
if (lmt < 3) return if (lmt < 2) 0 else 1
val sqrtlmt = Math.sqrt(lmt.toDouble()).toInt()
fun makePrimes(): IntArray {
val mxndx = (sqrtlmt - 3) / 2
val arr = IntArray(mxndx + 1, { it + it + 3})
var i = 0
while (true) {
val sqri = (i + i) * (i + 3) + 3
if (sqri > mxndx) break
if (arr[i] != 0) {
val bp = i + i + 3
for (c in sqri .. mxndx step bp) arr[c] = 0
}
i++
}
return arr.filter { it != 0 }.toIntArray()
}
val oprms = makePrimes()
fun phi(x: Long, a: Int): Long {
if (a <= 0) return x - (x shr 1)
val na = a - 1; val p = oprms[na].toLong()
if (x <= p) return 1
return phi(x, na) - phi(x / p, na)
}
return phi(lmt, oprms.size) + oprms.size.toLong()
}
 
fun main() {
val strt = System.currentTimeMillis()
 
for (i in 0 .. 9) {
val arg = Math.pow(10.toDouble(), i.toDouble()).toLong()
println("π(10**$i) = ${countPrimes(arg)}")
}
 
val stop = System.currentTimeMillis()
 
println("This took ${stop - strt} milliseconds.")
}</syntaxhighlight>
{{out}}
<pre>π(10**0) = 0
π(10**1) = 4
π(10**2) = 25
π(10**3) = 168
π(10**4) = 1229
π(10**5) = 9592
π(10**6) = 78498
π(10**7) = 664579
π(10**8) = 5761455
π(10**9) = 50847534
This took 480 milliseconds.</pre>
The time is as run on an Intel i5-6500 (3.6 GHz single-threaded boost).
 
Although this is about ten times faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).
 
'''The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm'''
 
Just substitute the following Kotlin code for the `countPrimes` function above to enjoy the gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="kotlin">val cTinyPhiPrimes = intArrayOf(2, 3, 5, 7, 11, 13)
val cTinyPhiDeg = cTinyPhiPrimes.size - 1
val cTinyPhiOddCirc = cTinyPhiPrimes.reduce(Int::times) / 2
val cTinyPhiTot = cTinyPhiPrimes.fold(1) { s, p -> s * (p - 1) }
fun makeTPLUT(): IntArray {
val arr = IntArray(cTinyPhiOddCirc) { _ -> 1 }
for (bp in cTinyPhiPrimes.drop(1)) {
arr[bp shr 1] = 0
for (c in ((bp * bp) shr 1) until cTinyPhiOddCirc step bp) arr[c] = 0 }
var acc = 0
for (i in 0 until cTinyPhiOddCirc) { acc += arr[i]; arr[i] = acc }
return arr
}
val cTinyPhiLUT = makeTPLUT()
fun tinyPhi(x: Long): Long {
val ndx = (x - 1) shr 1; val numtots = ndx / cTinyPhiOddCirc.toLong()
val rem = (ndx - numtots * cTinyPhiOddCirc.toLong()).toInt()
return numtots * cTinyPhiTot.toLong() + cTinyPhiLUT[rem].toLong()
}
 
fun countPrimes(lmt: Long): Long {
if (lmt < 169) {
if (lmt < 3) return if (lmt < 2) 0 else 1
// adjust for the missing "degree" base primes
if (lmt <= 13)
return ((lmt - 1).toLong() shr 1) + (if (lmt < 9) 1 else 0);
return 5.toLong() + cTinyPhiLUT[(lmt - 1).toInt() shr 1].toLong();
}
val sqrtlmt = Math.sqrt(lmt.toDouble()).toInt()
fun makePrimes(): IntArray {
val mxndx = (sqrtlmt - 3) / 2
val arr = IntArray(mxndx + 1, { it + it + 3})
var i = 0
while (true) {
val sqri = (i + i) * (i + 3) + 3
if (sqri > mxndx) break
if (arr[i] != 0) {
val bp = i + i + 3
for (c in sqri .. mxndx step bp) arr[c] = 0
}
i++
}
return arr.filter { it != 0 }.toIntArray()
}
val oprms = makePrimes()
fun lvl(pilmt: Int, m: Long): Long {
var acc = 0.toLong()
for (pi in cTinyPhiDeg until pilmt) {
val p = oprms[pi].toLong(); val nm = m * p
if (lmt <= nm * p) return acc + (pilmt - pi).toLong()
val q = (lmt.toDouble() / nm.toDouble()).toLong(); acc += tinyPhi(q)
if (pi > cTinyPhiDeg) acc -= lvl(pi, nm)
}
return acc
}
return tinyPhi(lmt) - lvl(oprms.size, 1) + oprms.size.toLong()
}</syntaxhighlight>
Use of the above function gets a gain in speed of about a further twenty times over the above version due to less recursion and the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions, which have also been converted to floating point divisions because they are faster although with a lost of precision for counting ranges that one would likely not use this implementation for. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a few tens of seconds.
 
'''The Non-Recursive Partial Sieving Algorithm'''
 
The following Kotlin code enjoys the further gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="kotlin">import java.util.BitSet
 
fun countPrimes(lmt: Long): Long {
if (lmt < 3) return if (lmt < 2) 0 else 1 // odds only!
fun half(x: Int): Int = (x - 1) shr 1
fun divide(nm: Long, d: Long): Int = (nm.toDouble() / d.toDouble()).toInt()
val sqrtlmt = Math.sqrt(lmt.toDouble()).toLong()
val mxndx = ((sqrtlmt - 1) shr 1).toInt()
val cmpsts = BitSet(mxndx + 1)
val smalls = IntArray(mxndx + 1) { it }
val roughs = IntArray(mxndx + 1) { it + it + 1 }
val larges = LongArray(mxndx + 1) { (lmt / (it + it + 1).toLong() - 1) shr 1 }
 
// partial sieve loop, adjusting larges/smalls, compressing larges/roughs...
var nobps = 0; var rilmt = mxndx; var bp = 3.toLong()
while (true) {
val i = (bp shr 1).toInt(); val sqri = (i + i) * (i + 1)
if (sqri > mxndx) break
if (!cmpsts.get(i)) { // condition for partial sieving pass for bp is prime
cmpsts.set(i) // cull bp!
for (c in sqri .. mxndx step bp.toInt()) cmpsts.set(c) // cull bp mults!
 
// now adjust `larges` for latest partial sieve pass...
var ori = 0 // compress input rough index to output one
for (iri in 0 .. rilmt) {
val r = roughs[iri]; val rci = (r shr 1)
if (cmpsts.get(rci)) continue // skip roughs culled in this sieve pass!
val d = bp * r.toLong()
larges[ori] = larges[iri] -
( if (d <= sqrtlmt)
larges[smalls[(d shr 1).toInt()] - nobps]
else smalls[half(divide(lmt, d))].toLong() ) +
nobps.toLong() // base primes count over subtracted!
roughs[ori++] = r
}
 
var si = mxndx // and adjust `smalls` for latest partial sieve pass...
for (bpm in (sqrtlmt / bp - 1) or 1 downTo bp step 2) {
val c = smalls[(bpm shr 1).toInt()] - nobps
val ei = ((bpm * bp) shr 1).toInt()
while (si >= ei) smalls[si--] -= c
}
 
nobps++; rilmt = ori - 1
}
bp += 2
}
 
// combine results to here; correcting for over subtraction in combining...
var ans = larges[0]; for (i in 1 .. rilmt) ans -= larges[i]
ans += (rilmt.toLong() + 1 + 2 * (nobps.toLong() - 1)) * rilmt.toLong() / 2
 
// add final adjustment for pairs of current roughs to cube root of range...
var ri = 0
while (true) { // break when reaches cube root of counting range...
val p = roughs[++ri].toLong(); val q = lmt / p
val ei = smalls[half(divide(q, p))] - nobps
if (ei <= ri) break // break here when no more pairs!
for (ori in ri + 1 .. ei)
ans += smalls[half(divide(q, roughs[ori].toLong()))].toLong()
ans -= (ei - ri).toLong() * (nobps.toLong() + ri.toLong() - 1)
}
 
return ans + 1 // add one for only even prime of two!
}
 
fun main() {
val strt = System.currentTimeMillis()
 
for (i in 0 .. 9) {
val arg = Math.pow(10.toDouble(), i.toDouble()).toLong()
println("π(10**$i) = ${countPrimes(arg)}")
}
 
val stop = System.currentTimeMillis()
 
println("This took ${stop - strt} milliseconds.")
}</syntaxhighlight>
The above code enjoys quite a large gain in speed of about a further ten times (and increasing with range) over the immediately preceding version for larger "non-trivial" ranges (since there is an appreciable environment overhead in initialization and JIT compilation) due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division; for instance, this version can calculate the number of primes to 1e11 in about 82 milliseconds. This version of prime counting function might be considered to be reasonably useful for counting primes up to a 1e16 in about two minutes as long as the computer used has the required free memory of about 800 Megabytes. This Kotlin version is about twice as slow as the Nim version from which it was translated for large ranges where the initialization overhead is not significant due to being run under the Java Virtual Machine (JVM) environment and JIT compiled.
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
Line 704 ⟶ 2,581:
5761455
50847534</pre>
 
=={{header|Mojo}}==
 
Currently, Mojo does not allow many forms of recursion so the simple recursive solutions are not possible; however, those are much slower than the solution by partial sieving anyway, as per the following code:
 
{{trans|Nim}}
 
<syntaxhighlight lang="mojo">from time import (now)
 
alias cLIMIT: UInt64 = 100_000_000_000
 
@always_inline
fn mkMasks() -> DTypePointer[DType.uint8]:
let rslt = DTypePointer[DType.uint8].alloc(8)
for i in range(8): rslt.offset(i).store(1 << i)
return rslt
 
let masksp = mkMasks()
 
fn intsqrt(n: UInt64) -> UInt64:
if n < 4:
if n < 1: return 0 else: return 1
var x: UInt64 = n; var qn: UInt64 = 0; var r: UInt64 = 0
while qn < 64 and (1 << qn) <= n:
qn += 2
var q: UInt64 = 1 << qn
while q > 1:
if qn >= 64:
q = 1 << (qn - 2); qn = 0
else:
q >>= 2
let t: UInt64 = r + q
r >>= 1
if x >= t:
x -= t; r += q
return r
 
fn countPrimes(n: UInt64) -> Int64:
if n < 3:
if n < 2: return 0
else: return 1
let rtlmt: Int = intsqrt(n).to_int() # precision limits range to maybe 1e16!
let mxndx = (rtlmt - 1) >> 1
@always_inline
fn half(n: Int64) -> Int64 : return ((n - 1) // 2)
@always_inline
fn divide(nm: UInt64, d: UInt64) -> Int64: return ((nm * 1.0) / (d * 1.0)).to_int()
let smalls = # current accumulated counts of odd primes 1 to sqrt range
DTypePointer[DType.uint32].alloc(mxndx + 1)
# initialized for no sieving whatsoever other than odds-only - partial sieved by 2:
# 0 odd primes to 1; 1 odd prime to 3, etc....
for i in range(mxndx + 1): smalls.offset(i).store(i)
let roughs = # current odd k-rough numbers up to sqrt of range; k = 2
DTypePointer[DType.uint32].alloc(mxndx + 1)
# initialized to all odd positive numbers 1, 3, 5, ... sqrt range...
for i in range(mxndx + 1): roughs.offset(i).store(i + i + 1)
# array of current phi counts for above roughs...
# these are not strictly `phi`'s since they also include the
# count of base primes in order to match the above `smalls` definition!
let larges = # starts as size of counts just as `roughs` so they align!
DTypePointer[DType.uint64].alloc(mxndx + 1)
# initialized for current roughs after accounting for even prime of two...
for i in range(mxndx + 1): larges.offset(i).store((n // (i + i + 1) - 1) // 2)
# cmpsts is a bit-packed boolean array representing
# odd composite numbers from 1 up to rtlmt used for sieving...
# initialized as "zeros" meaning all odd positives are potentially prime
# note that this array starts at (and keeps) 1 to match the algorithm even
# though 1 is not a prime, as 1 is important in computation of phi...
let cmpsts = DTypePointer[DType.uint8].alloc((mxndx + 8) // 8)
memset_zero(cmpsts, (mxndx + 8) // 8)
 
# number of found base primes and current highest used rough index...
var npc: Int = 0; var mxri: Int = mxndx
for i in range(1, mxndx + 1): # start at index for 3; i will never reach mxndx...
let sqri = (i + i) * (i + 1) # computation of square index!
if sqri > mxndx: break # stop partial sieving due to square index limit!
if (cmpsts.offset(i >> 3).load() & masksp.offset(i & 7).load()) != 0: continue # if not prime
# culling the base prime from cmpsts means it will never be found again
let cp = cmpsts.offset(i >> 3)
cp.store(cp.load() | masksp.offset(i & 7).load()) # cull base prime
let bp = i + i + 1 # base prime from index!
for c in range(sqri, mxndx + 1, bp): # SoE culling of all bp multiples...
let cp = cmpsts.offset(c >> 3); cp.store(cp.load() | masksp.offset(c & 7).load())
# partial sieving to current base prime is now completed!
 
var ri: Int = 0 # to keep track of current used roughs index!
for k in range(mxri + 1): # processing over current roughs size...
# q is not necessarily a prime but may be a
# product of primes not yet culled by partial sieving;
# this is what saves operations compared to recursive Legendre:
let q: UInt64 = roughs.offset(k).load().to_int(); let qi = q >> 1 # index of always odd q!
# skip over values of `q` already culled in the last partial sieve:
if (cmpsts.offset(qi >> 3).load() & masksp.offset(qi & 7).load()) != 0: continue
# since `q` cannot be equal to bp due to cull of bp and above skip;
let d: UInt64 = bp * q # `d` is odd product of some combination of odd primes!
# the following computation is essential to the algorithm's speed:
# see above description in the text for how this works:
larges.offset(ri).store(larges.offset(k).load() -
(larges.offset(smalls.offset(d >> 1).load().to_int() - npc).load() if d <= rtlmt
else smalls.offset(half(divide(n, d))).load().to_int()) + npc)
# eliminate rough values that have been culled in partial sieve:
# note that `larges` and `roughs` indices relate to each other!
roughs.offset(ri).store(q.to_int()); ri += 1 # update rough value; advance rough index
 
var m = mxndx # adjust `smalls` counts for the newly culled odds...
# this is faster than recounting over the `cmpsts` array for each loop...
for k in range(((rtlmt // bp) - 1) | 1, bp - 1, -2): # k always odd!
# `c` is correction from current count to desired count...
# `e` is end limit index no correction is necessary for current cull...
let c = smalls.offset(k >> 1).load() - npc; let e = (k * bp) >> 1
while m >= e:
let cp = smalls.offset(m)
cp.store(cp.load() - c); m -= 1 # correct over range down to `e`
mxri = ri - 1; npc += 1 # set next loop max roughs index; count base prime
# now `smalls` is a LUT of odd prime accumulated counts for all odd primes;
# `roughs` is exactly the "k-roughs" up to the sqrt of range with `k` the
# index of the next prime above the quad root of the range;
# `larges` is the partial prime counts for each of the `roughs` values...
# note that `larges` values include the count of the odd base primes!!!
# `cmpsts` are never used again!
# the following does the top most "phi tree" calculation:
var result: Int64 = larges.load().to_int() # the answer to here is all valid `phis`
for i in range(1, mxri + 1): result -= larges.offset(i).load().to_int() # combined here by subtraction
# compensate for the included odd base prime counts over subracted above:
result += ((mxri + 1 + 2 * (npc - 1)) * mxri // 2)
 
# This loop adds the counts due to the products of the `roughs` primes,
# of which we only use two different ones at a time, as all the
# combinations with lower primes than the cube root of the range have
# already been computed and included with the previous major loop...
# see text description above for how this works...
for j in range(1, mxri + 1): # for all `roughs` (now prime) not including one:
let p: UInt64 = roughs.offset(j).load().to_int()
let m: UInt64 = (n // p) # `m` is the `p` quotient
# so that the end limit `e` can be calculated based on `n`/(`p`^2)
let e: Int = smalls.offset(half((m // p).to_int())).load().to_int() - npc
# following break test equivalent to non-memoization/non-splitting optmization:
if e <= j: break # stop at about `p` of cube root of range!
for k in range(j + 1, e + 1): # for all `roughs` greater than `p` to end limit:
result += smalls.offset(half(divide(m, roughs.offset(k).load().to_int()))).load().to_int()
# compensate for all the extra base prime counts just added!
result -= ((e - j) * (npc + j - 1))
 
result += 1 # include the count for the only even prime of two
smalls.free(); roughs.free(); larges.free(); cmpsts.free()
 
return result
 
fn main():
var pow: Int = 1
for i in range(10):
print('10^', i, '=', countPrimes(pow))
pow *= 10
let start = now()
let answr = countPrimes(cLIMIT)
let elpsd = (now() - start) / 1000000
print("Found", answr, "primes up to", cLIMIT, "in", elpsd, "milliseconds.")</syntaxhighlight>
 
{{out}}
 
<pre>10^ 0 = 0
10^ 1 = 4
10^ 2 = 25
10^ 3 = 168
10^ 4 = 1229
10^ 5 = 9592
10^ 6 = 78498
10^ 7 = 664579
10^ 8 = 5761455
10^ 9 = 50847534
Found 4118054813 primes up to 100000000000 in 16.696860000000001 milliseconds.</pre>
 
This is as run on an AMD 7840HS single-thread boosted to 5.1 GHz. It is over twice as slow as when run from the Nim code it is translated from, likely due to LLVM not handling this algorithm as well as the GCC compiler that did the final "back-end" compilation for the Nim code.
 
=={{header|Nim}}==
Line 740 ⟶ 2,791:
for i in 0..9:
echo "π(10^$1) = $2".format(i, π(n))
n *= 10</syntaxhighlight></syntaxhighlight>
 
This is not quite as fast as the Nim version or other languages whose back end is C as the LLVM-based back ends don't compile to quite the same optimization level for this algorithm than does GCC.
 
{{out}}
Line 935 ⟶ 2,988:
The reason that this algorithm can reduce the number of operations as compared to the "standard" recursive Legendre algorithm in that the product of some combinations of multiple primes is compensated in one operation rather than many "branching"/recursive operations as in the recursive or semi-recursive algorithms.
 
One might question the validity of this "partial sieving" optimization considering the task explanation, but the Legendre prime counting function was likely never used practically at the time it was invented other than for trivial examples showing that it worked as mentioned above since there were no electronic computers; the follow-on work by Meissel was used by him to hand calculate the number of primes to a billion (1e9) over the course of some number of years in about 1870, and he almostmost certainlylikely would have used partial sieving as otherwise he would have done over ninefive million long divisions or over two600 millionthousand long divisions even with use of a "TinyPhi" Look Up Table of degree six, which would not be possible in his lifetime at something like a hundred long division calculations a day. D. H. Lehmer did not use partial sieving in his computer work on prime counting in 1959, but that was because he was focused on fitting a version of the Meissel algorithm into the memory limitations of the computers of that day (a few thousand words of RAM memory) such that he had to pre-calculate and store the primes values required on a digital magnetic tape that had to be played several times as the calculation proceeded and thus didn't fully investigate (or fully understand) all of the optimizations as used by Meissel, using the brute mathematical computational strength of the computer to overcome the lack of optimizations. So partial sieving as an optimization technique would likely have been known (by Meissel) but not applied until Legarias, Miller, and Odlyzko (LMO) investigated a way to apply it to a computer algorithm. It could have been applied to both the Legendre and Meissel algorithms much sooner other than, at least for the Legendre algorithm, the need for memory proportional to the square root of the counting range would have limited the useful range to counting ranges of 1e12 or so when computers of the day had at most several Megabytes of RAM memory. One of the biggest advantages of LMO compared to even this optimization of Legendre is it greatly reduces memory requirements such that in 1985 it was possible to determine the count of primes to 4e16 on a mainframe computer with only a few megabytes of RAM memory, '''which took 1730 minutes''' even multi-threaded (two threads).
 
In short, the defining characteristic of prime counting functions of the Legendre type is only requiring a sieve to the square root of the counting range, of functions of the Meissel type that they require a sieve to the power of two thirds of the counting range, of functions of the Meissel-Lehmer type that they require a sieve to the power of three quarters of the counting range, with later Meissel variants such as that of Deleglise and Gourdon being tuned variants that are between the Legendre requirement and the Meissel requirement in order to balance the time spent counting and the time spent sieving, with all of them decreasing the number of "Phi" operations as the sieving range increases. All of these algorithms can have the improvement of the LMO "partial sieving" technique applied to them (although LMO is as applied to Meissel) in "splitting" the calculation to above some limit and below that limit, with LMO splitting between values above and below the counting range to the power of two thirds and this algorithm splitting at the square root of the counting range. The combination of "partial sieving" and this "splitting" reduces the computational complexity by combining many of the divisors of products of more than one prime into one operation. This algorithm has approximately the computational complexity of Meissel-Lehmer without the increased sieving range because these operations have been combined into the calculation of "Phi"/φ so that that the calculations of "P2" and "P3" are not necessary and therefore the sieving of the counting range to the three quarters power is not necessary.
 
A Nim implementation of the above described algorithm is as follows:
Line 1,066 ⟶ 3,121:
 
As counting range is increased above these trivial ranges, a better algorithm such as that of LMO will likely perform much better than this and use much less memory (to proportional to the cube root of the range), although at the cost of greater code complexity and more lines of code (about 500 LOC). The trade-off between algorithms of the Legendre type and those of the Meissel type is that the latter decreases the time complexity but at the cost of more time spent sieving; modern follow-on work to LMO produces algorithms which are able to tune the balance between Legendre-type and Meissel-type algorithm in trading off the execution time costs between the different parts of the given algorithm for the best in performance, with the latest in Kim Walisch's tuning of Xavier Gourdon's work being about five to ten times faster than LMO (plus includes multi-threading abilities).
 
=={{header|Pascal}}==
<syntaxhighlight lang="pascal">
// Rosetta Code task "Legendre prime counting function".
// Solution for Free Pascal (Lazarus) or Delphi.
program LegendrePrimeCount;
 
{$IFDEF FPC} // Free Pascal
{$MODE Delphi}
{$ELSE} // Delphi
{$APPTYPE CONSOLE}
{$ENDIF}
 
// Optimization needs to be on if the program is to finish in a reasonable
// length of time. See "Comments on Task" on the Rosetta Code website.
{$DEFINE SIMPLE_OPTIMIZATION}
 
uses SysUtils, Types;
 
{-------------------------------------------------------------------
Function to return an array of primes up to the passed-in limit.
Uses a straightforward Eratosthenes sieve.
TIntegerDynArray must be 0-based. To be compatible with Rosetta Code,
the first prime 2 is at result[1], and result[0] is not used.
}
function FindPrimes( limit : integer) : Types.TIntegerDynArray;
var
deleted : array of boolean;
j, k, p, resultSize : integer;
begin
if (limit < 2) then begin
SetLength( result, 1);
exit;
end;
SetLength( deleted, limit + 1); // 0..limit
deleted[0] := true;
for j := 1 to limit do deleted[j] := false;
p := 2;
while (p*p <= limit) do begin
j := 2*p;
while (j <= limit) do begin
deleted[j] := true;
inc( j, p);
end;
repeat inc(p)
until (p > limit) or (not deleted[p]);
end;
resultSize := 0;
for j := 0 to limit do
if not deleted[j] then inc( resultSize);
SetLength( result, resultSize);
k := 0;
for j := 0 to limit do begin
if not deleted[j] then begin
result[k] := j;
inc(k);
end;
end;
end;
 
{-----------------------------------------------------------------------------
Function to count primes up to the passed-in limit, by Legendre's method.
Iterative, using a stack. Each item in the stack is a term phi(x,a) along
with a sign. If the top item on the stack can be evaluated easily, it is
popped off and its value is added to the result. Else the top item is
replaced by two items according to the formual in the task description.
}
function CountPrimes( n : integer) : integer;
type
TPhiTerm = record
IsNeg : boolean;
x : integer;
a : integer;
end;
const
STACK_SIZE = 100; // 10 is enough for n = 10^9
var
primes : Types.TIntegerDynArray;
nrPrimes : integer;
stack : array [0..STACK_SIZE - 1] of TPhiTerm;
sp : integer; // stack pointer, points to first free entry
tos : TPhiTerm; // top of stack
begin
primes := FindPrimes( Trunc( Sqrt( n + 0.5)));
nrPrimes := Length( primes) - 1; // primes[0] is not used
result := nrPrimes - 1; // initialize total
// Push initial entry onto stack
with stack[0] do begin
IsNeg := false;
x := n;
a := nrPrimes;
end;
sp := 1;
while sp > 0 do begin
tos := stack[sp - 1];
{$IFDEF SIMPLE_OPTIMIZATION}
// Using optimization described in "Comments on Task"
if tos.x = 0 then begin // top of stack = 0
dec(sp); // pop top of stack, no change to result
end
else if (tos.a > 0) and (tos.x < primes[tos.a]) then begin // top of stack = 1
dec( sp); // pop top of stack, update result
if tos.IsNeg then dec( result)
else inc( result);
end
else if tos.a = 0 then begin
{$ELSE}
// Using only the task description, i.e. only phi(x,0) = x
if tos.a = 0 then begin
{$ENDIF}
dec( sp); // pop top of stack, update result
if tos.IsNeg then dec( result, tos.x)
else inc( result, tos.x);
end
else begin
// Replace top of stack by two items as in the task description,
// namely phi(x, a - 1) and -phi(x div primes[a], a - 1)
if (sp >= STACK_SIZE) then
raise SysUtils.Exception.Create( 'Legendre phi stack overflow');
with stack[sp - 1] do begin
IsNeg := tos.isNeg;
x := tos.x;
a := tos.a - 1;
end;
with stack[sp] do begin
IsNeg := not tos.IsNeg;
x := tos.x div primes[tos.a];
a := tos.a - 1;
end;
inc(sp);
end;
end;
end;
 
{-----------------------------------------------------------
Main routine
}
var
power, limit, count : integer;
begin
WriteLn( 'Limit Count');
limit := 1;
for power := 0 to 9 do begin
if power > 0 then limit := 10*limit;
count := CountPrimes( limit);
WriteLn( SysUtils.Format( '10^%d %10d', [power, count]))
end;
end.
</syntaxhighlight>
{{out}}
<pre>
Limit Count
10^0 0
10^1 4
10^2 25
10^3 168
10^4 1229
10^5 9592
10^6 78498
10^7 664579
10^8 5761455
10^9 50847534
</pre>
 
=={{header|Perl}}==
Line 1,179 ⟶ 3,397:
</pre>
<small>(It is about 4 times slower under pwa/p2js so output is limited to 10^8, unless you like staring at a blank screen for 52s)</small>
=== Non-recursive partial sieve ===
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #7060A8;">requires</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"1.0.2"</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (for in, tagstart)</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">half</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">((</span><span style="color: #000000;">n</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span> <span style="color: #000080;font-style:italic;">// convenience convert to idx</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">count_primes</span><span style="color: #0000FF;">(</span><span style="color: #004080;">atom</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">// non-recursive Legendre prime counting function for a range `n`...
// has O(n^(3/4)/((log n)^2)) time complexity; O(n^(1/2)) space complexity.</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;"><</span><span style="color: #000000;">3</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;"><</span><span style="color: #000000;">2</span><span style="color: #0000FF;">?</span><span style="color: #000000;">0</span><span style="color: #0000FF;">:</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">// can't odd sieve for n less than 3!</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">sqrtn</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trunc</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">)),</span> <span style="color: #000080;font-style:italic;">// (actual limit)</span>
<span style="color: #000000;">mxndx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">((</span><span style="color: #000000;">sqrtn</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">// odds-only limit
--
-- smalls is the current accumulated counts of odd primes 1 to sqrt(n), initialized
-- to odds-only sieving, ie {0,1,2,3,4...} meaning 0 odd primes to 1, 1 o.p to 3,...
--
-- roughs is the current odd k-rough numbers up to sqrt of range; k = 2
-- initialized to all odd positive numbers 1, 3, 5, 7, 9, 11, ... sqrt(n)
--
-- larges is an array of current phi counts for the above roughs... except they are
-- not strictly `phi`'s since they also include primes, to match `smalls` above!
-- initialized for current roughs after accounting for the even prime of two...
--
-- composite is a flag array representing odd numbers 1..sqrtn, for sieving.
-- initialized false, meaning all positive odd numbers are potentially prime
-- note that this array starts at (and keeps) 1 to match the algorithm even
-- though 1 is not actually a prime, as 1 is important in computation of phi...
--</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">smalls</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #000000;">mxndx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">roughs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">tagstart</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">mxndx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">larges</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_floor_div</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_sub</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_div</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">roughs</span><span style="color: #0000FF;">),</span><span style="color: #000000;">1</span><span style="color: #0000FF;">),</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">composite</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">false</span><span style="color: #0000FF;">,</span><span style="color: #000000;">mxndx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">bp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">3</span><span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">// 'current' base prime</span>
<span style="color: #000000;">nbp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">// number of base primes found </span>
<span style="color: #000000;">mxri</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mxndx</span><span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">// current highest used rough index</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">2</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">sqri</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">4</span> <span style="color: #000080;font-style:italic;">// index and square (index-1) limit
// partial sieve loop, adjusting larges/smalls, compressing larges/roughs...</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">sqri</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">mxndx</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">// partial sieve to square index limit</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">composite</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #000080;font-style:italic;">// cull from composite so they will never be found again</span>
<span style="color: #000000;">composite</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span> <span style="color: #000080;font-style:italic;">// cull bp and multiples</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">c</span><span style="color: #0000FF;">=</span><span style="color: #000000;">sqri</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">mxndx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #000000;">bp</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">composite</span><span style="color: #0000FF;">[</span><span style="color: #000000;">c</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #000080;font-style:italic;">// partial sieving to current base prime is now completed!
// now adjust `larges` for latest partial sieve pass...</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ori</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> <span style="color: #000080;font-style:italic;">// compress input rough index(k) to output one</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">,</span><span style="color: #000000;">q</span> <span style="color: #008080;">in</span> <span style="color: #000000;">roughs</span> <span style="color: #008080;">to</span> <span style="color: #000000;">mxri</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #000080;font-style:italic;">// q is not necessarily prime but may be a product of primes not yet
// culled by partial sieving (saves ops cmprd to recursive Legendre)
// skip over values of `q` already culled in the last partial sieve:</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">qi</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #000000;">q</span><span style="color: #0000FF;">/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000080;font-style:italic;">// index of always odd q!</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">composite</span><span style="color: #0000FF;">[</span><span style="color: #000000;">qi</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #000080;font-style:italic;">// since `q` cannot be equal to bp due to cull of bp and above skip;</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">d</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">bp</span><span style="color: #0000FF;">*</span><span style="color: #000000;">q</span><span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">// `d` is odd product of some combination of odd primes!
// the following computation is essential to the algorithm's speed,
// see the Nim entry for the full details of how this works</span>
<span style="color: #000000;">dadj</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">d</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">sqrtn</span> <span style="color: #0000FF;">?</span> <span style="color: #000000;">larges</span><span style="color: #0000FF;">[</span><span style="color: #000000;">smalls</span><span style="color: #0000FF;">[</span><span style="color: #000000;">half</span><span style="color: #0000FF;">(</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)]-</span><span style="color: #000000;">nbp</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
<span style="color: #0000FF;">:</span> <span style="color: #000000;">smalls</span><span style="color: #0000FF;">[</span><span style="color: #000000;">half</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">/</span><span style="color: #000000;">d</span><span style="color: #0000FF;">))])</span>
<span style="color: #000000;">ori</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">larges</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ori</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">larges</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]-</span><span style="color: #000000;">dadj</span><span style="color: #0000FF;">+</span><span style="color: #000000;">nbp</span> <span style="color: #000080;font-style:italic;">// base primes count over subtracted!
// eliminate rough values that have been culled in partial sieve:
// note that `larges` and `roughs` indices relate to each other!</span>
<span style="color: #000000;">roughs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ori</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">q</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mxndx</span> <span style="color: #000080;font-style:italic;">// and adjust `smalls` for latest partial sieve pass...
// this is faster than recounting over the `composite` array for each loop...</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=(</span><span style="color: #000000;">sqrtn</span><span style="color: #0000FF;">/</span><span style="color: #000000;">bp</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)||</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">bp</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">2</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">// k always odd!
// `c` is correction from current count to desired count...
// `e` is end limit index no correction is necessary for current cull...</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">c</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">smalls</span><span style="color: #0000FF;">[</span><span style="color: #000000;">half</span><span style="color: #0000FF;">(</span><span style="color: #000000;">k</span><span style="color: #0000FF;">)]-</span><span style="color: #000000;">nbp</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">e</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">((</span><span style="color: #000000;">k</span><span style="color: #0000FF;">*</span><span style="color: #000000;">bp</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">e</span> <span style="color: #008080;">do</span>
<span style="color: #000080;font-style:italic;">-- smalls[m+1] -= c -- (grr, js! [I have a plan, working on it])</span>
<span style="color: #000000;">m</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">smalls</span><span style="color: #0000FF;">[</span><span style="color: #000000;">m</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">c</span>
<span style="color: #000080;font-style:italic;">-- m -= 1</span>
<span style="color: #000000;">m</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">2</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #000000;">nbp</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">// increase number of found base primes</span>
<span style="color: #000000;">mxri</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ori</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">// advance rough index for later</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">bp</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">2</span>
<span style="color: #000000;">sqri</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)*(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000080;font-style:italic;">// now `smalls` is a LUT of odd prime accumulated counts for all odd primes;
// `roughs` is exactly the "k-roughs" up to the sqrt of range with `k` (erm,
// mxri?) the index of the next prime above the quad root of the range;
// `larges` is the partial prime counts for each of the `roughs` values...
// note that `larges` values include the count of the odd base primes!!!
// - and `composite` is never used again!
// the following does the top-most "phi tree" calculation:
// the answer to here is all valid `phis`, combined here by subtraction,
// + compensate for included odd base prime counts over subracted above:</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">result</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">larges</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">-</span> <span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #000000;">larges</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">..</span><span style="color: #000000;">mxri</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
<span style="color: #0000FF;">+</span> <span style="color: #7060A8;">trunc</span><span style="color: #0000FF;">((</span><span style="color: #000000;">mxri</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">2</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">nbp</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">))*</span><span style="color: #000000;">mxri</span><span style="color: #0000FF;">/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)</span>
<span style="color: #0000FF;">+</span> <span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">// include the only even prime, ie 2
// This loop adds the counts due to the products of the `roughs` primes,
// of which we only use two different ones at a time, as all the
// combinations with lower primes than the cube root of the range have
// already been computed and included with the previous major loop...
// see text description in the Nim entry for how this works...</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">ri</span><span style="color: #0000FF;">,</span><span style="color: #000000;">p</span> <span style="color: #008080;">in</span> <span style="color: #000000;">roughs</span> <span style="color: #008080;">from</span> <span style="color: #000000;">2</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">// for all `roughs` (now prime) bar '1':</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trunc</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">/</span><span style="color: #000000;">p</span><span style="color: #0000FF;">),</span> <span style="color: #000080;font-style:italic;">// `m` is the `p` quotient
// so that the end limit `e` can be calculated based on `n`/(`p`^2)</span>
<span style="color: #000000;">e</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">smalls</span><span style="color: #0000FF;">[</span><span style="color: #000000;">half</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #000000;">m</span><span style="color: #0000FF;">/</span><span style="color: #000000;">p</span><span style="color: #0000FF;">))+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]-</span><span style="color: #000000;">nbp</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
<span style="color: #000080;font-style:italic;">// the following test is equivalent to non-splitting optmization:</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">e</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">ri</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">// quit when no more pairs! - aka stop
// at about `p` of cube root of range!</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">ri</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">e</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">// for all `roughs` greater than `p` to limit:</span>
<span style="color: #000000;">result</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">smalls</span><span style="color: #0000FF;">[</span><span style="color: #000000;">half</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #000000;">m</span><span style="color: #0000FF;">/</span><span style="color: #000000;">roughs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]))];</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #000080;font-style:italic;">// compensate for all the extra base prime counts just added!</span>
<span style="color: #000000;">result</span> <span style="color: #0000FF;">-=</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">e</span><span style="color: #0000FF;">-</span><span style="color: #000000;">ri</span><span style="color: #0000FF;">)*(</span><span style="color: #000000;">nbp</span><span style="color: #0000FF;">+</span><span style="color: #000000;">ri</span><span style="color: #0000FF;">-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">result</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">time</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">expected</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">4</span><span style="color: #0000FF;">,</span><span style="color: #000000;">25</span><span style="color: #0000FF;">,</span><span style="color: #000000;">168</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1229</span><span style="color: #0000FF;">,</span><span style="color: #000000;">9592</span><span style="color: #0000FF;">,</span><span style="color: #000000;">78498</span><span style="color: #0000FF;">,</span><span style="color: #000000;">664579</span><span style="color: #0000FF;">,</span><span style="color: #000000;">5761455</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">50847534</span><span style="color: #0000FF;">,</span><span style="color: #000000;">455052511</span><span style="color: #0000FF;">,</span><span style="color: #000000;">4118054813</span><span style="color: #0000FF;">,</span><span style="color: #000000;">37607912018</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">346065536839</span><span style="color: #0000FF;">,</span><span style="color: #000000;">3204941750802</span><span style="color: #0000FF;">}</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">iff</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">platform</span><span style="color: #0000FF;">()=</span><span style="color: #004600;">JS</span><span style="color: #0000FF;">?</span><span style="color: #000000;">11</span><span style="color: #0000FF;">:</span><span style="color: #000000;">14</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">-- (sp: keep js under 2s)</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">c</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">count_primes</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">power</span><span style="color: #0000FF;">(</span><span style="color: #000000;">10</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">))</span>
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">c</span><span style="color: #0000FF;">==</span><span style="color: #000000;">expected</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">e</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">elapsed</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">time</span><span style="color: #0000FF;">()-</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0.1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" (%s)"</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"10^%d = %d%s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">c</span><span style="color: #0000FF;">,</span><span style="color: #000000;">e</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\nTook %s\n"</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">elapsed</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">time</span><span style="color: #0000FF;">()-</span><span style="color: #000000;">t</span><span style="color: #0000FF;">))</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
10^0 = 0
10^1 = 4
10^2 = 25
10^3 = 168
10^4 = 1229
10^5 = 9592
10^6 = 78498
10^7 = 664579
10^8 = 5761455
10^9 = 50847534
10^10 = 455052511 (0.3s)
10^11 = 4118054813 (1.7s)
10^12 = 37607912018 (7.9s)
10^13 = 346065536839 (40.8s)
10^14 = 3204941750802 (3 minutes and 44s)
 
Took 3 minutes and 44s
</pre>
About 15 times slower than Julia on the same box (gulp, earmarked for further investigation in 2.0.0).<br>
[At the last minute, I nicked the idea of using a simple bool array instead of those bit-fields from Julia.]<br>
A copy of this code [up to 1e9] has also found it's way into tests\t68forin.exw
 
=={{header|Picat}}==
Line 1,317 ⟶ 3,698:
</pre>
 
=={{header|Ruby}}==
{{trans|Wren}}
<syntaxhighlight lang="ruby">require 'prime'
 
def pi(n)
@pr = Prime.each(Integer.sqrt(n)).to_a
a = @pr.size
case n
when 0,1 then 0
when 2 then 1
else phi(n,a) + a - 1
end
end
 
def phi(x,a)
case a
when 0 then x
when 1 then x-(x>>1)
else
pa = @pr[a-1]
return 1 if x <= pa
phi(x, a-1)- phi(x/pa, a-1)
end
end
 
(0..9).each {|n| puts "10E#{n} #{pi(10**n)}" }
</syntaxhighlight>
{{out}}
<pre>10E0 0
10E1 4
10E2 25
10E3 168
10E4 1229
10E5 9592
10E6 78498
10E7 664579
10E8 5761455
10E9 50847534
</pre>
=={{header|Sidef}}==
<syntaxhighlight lang="ruby">func legendre_phi(x, a) is cached {
Line 1,502 ⟶ 3,922:
π(10^8) = 5761455
π(10^9) = 50847534</pre>
 
=={{header|Vlang}}==
 
There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Currently, anonymous functions and closure recursion doesn't work very well (which is why the V language is still in beta) so those slower algorithms don't work properly. However, the "partial sieving" Nim version has been [translated from the non-memoizing Nim version](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how it works:
 
'''The Non-Memoizing Non-Recursive "Partial-Sieving" Algorithm'''
 
<syntaxhighlight lang="vlang">// compile with: v -cflags -march=native -cflags -O3 -prod MagicCount.v
 
import time
import math { sqrt }
 
const masks = [ u8(1), 2, 4, 8, 16, 32, 64, 128 ] // faster than bit twiddling!
 
[inline]
fn half(n u64) i64 { return i64((n - 1) >>> 1) } // convenience function!
[inline] // floating point divide faster than integer divide!
fn divide(nm u64, d u64) u64 { return u64(f64(nm) / f64(d)) }
 
[direct_array_access]
fn count_primes_to(n u64) i64 {
if n < u64(9) { return if n < i64(2) { i64(0) } else { i64((n + 1) / 2) } }
rtlmt := u64(sqrt(f64(n)))
mxndx := i64((rtlmt - 1) / 2)
arrlen := int(mxndx + 1)
mut smalls := []u32{ len: arrlen, cap: arrlen, init: u32(it) }
mut roughs := []u32{ len: arrlen, cap: arrlen, init: u32(it + it + 1) }
mut larges := []i64{ len: arrlen, cap: arrlen,
init: i64(((n / u64(it + it + 1) - 1) / 2)) }
cullbuflen := int((mxndx + 8) / 8)
mut cullbuf := []u8{len: cullbuflen, cap: cullbuflen, init: u8(0)}
 
mut nbps := i64(0) // do partial sieving for each odd base prime to quad root...
mut rilmt := arrlen // number of roughs start with full size!
for i := i64(1); ; i++ { // start with 3; breaks when partial sieving done...
sqri := (i + i) * (i + 1)
if sqri > mxndx { break } // breaks here!
if (cullbuf[i >>> 3] & masks[i & 7]) != u8(0) { continue } // not prime!
cullbuf[i >>> 3] |= masks[i & 7] // cull bp rep!
bp := u64(i + i + 1) // partial sieve by bp...
for c := sqri; c < arrlen; c += i64(bp) { cullbuf[c >>> 3] |= masks[c & 7] }
 
mut nri := 0 // transfer from `ori` to `nri` indexes:
for ori in 0 .. rilmt { // update `roughs` and `larges` for partial sieve...
r := roughs[ori]
rci := i64(r >>> 1) // skip recently culled in last partial sieve...
if (cullbuf[rci >>> 3] & masks[rci & 7]) != u8(0) { continue }
d := u64(r) * u64(bp)
larges[nri] = larges[ori] -
(if d <= rtlmt { larges[i64(smalls[d >>> 1]) - nbps] }
else { i64(smalls[half(divide(n, d))]) })
+ nbps // adjust for over subtraction of base primes!
roughs[nri] = r // compress for culled `larges` and `roughs`!
nri++ // advance output `roughs` index!
}
 
mut si := mxndx // update `smalls` for partial sieve...
for pm := ((rtlmt / bp) - 1) | 1; pm >= bp; pm -= 2 {
c := smalls[pm >>> 1]
e := (pm * bp) >>> 1
for ; si >= e; si-- { smalls[si] -= (c - u32(nbps)) }
}
 
rilmt = nri // new rough size limit!
nbps++ // for one partial sieve base prime pass!
}
 
// combine results so far; adjust for over subtraction of base prime count...
mut ans := larges[0] + i64(((rilmt + 2 * (nbps - 1)) * (rilmt - 1) / 2))
for ri in 1 .. rilmt { ans -= larges[ri] } // combine results so far!
 
// add quotient for product of pairs of primes, quad root to cube root...
for ri := 1; ; ri++ { // break controlled below
p := u64(roughs[ri])
m := n / p
ei := int(smalls[half(u64(m / p))]) - nbps
if ei <= ri { break } // break out when only multiple equal to `p`
ans -= i64((ei - ri) * (nbps + ri - 1)) // adjust for over addition below!
for sri in ri + 1 .. ei + 1 { // add for products of 1st and 2nd primes...
ans += i64(smalls[half(divide(m, u64(roughs[sri])))]) }
}
 
return ans + 1 // add one for only even prime of two!
}
 
start_time := time.now()
mut pow := u64(1)
for i in 0 .. 10 {
println("π(10**$i) = ${count_primes_to(pow)}")
pow *= 10 }
duration := (time.now() - start_time).milliseconds()
println("This took $duration milliseconds.")</syntaxhighlight>
{{out}}
<pre>π(10**0) = 0
π(10**1) = 4
π(10**2) = 25
π(10**3) = 168
π(10**4) = 1229
π(10**5) = 9592
π(10**6) = 78498
π(10**7) = 664579
π(10**8) = 5761455
π(10**9) = 50847534
This took 2 milliseconds.</pre>
 
The above code is about as fast as the same algorithm in the fastest of languages such as C/C++/Nim due to the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)); it can compute the number of primes to 1e11 in about 22 milliseconds on the same machine as shown where it is run on an Intel i5-6500 (3.6 GHz single-thread boosted). The above code can compute the number of primes to 1e16 in about a minute, although there is a precision/accuracy problem for counting ranges above about 9e15 due to the speed-up optimization of using floating point division operations (precise only to 53-bits, which is about this limit).
 
=={{header|Wren}}==
Line 1,509 ⟶ 4,035:
 
To sieve a billion numbers and then count the primes up to 10^k would take about 53 seconds in Wren so, as expected, the Legendre method represents a considerable speed up.
<syntaxhighlight lang="ecmascriptwren">import "./math" for Int
 
var pi = Fn.new { |n|
Line 1,553 ⟶ 4,079:
 
The memoized version requires a cache of around 6.5 million map entries, which at 8 bytes each (all numbers are 64 bit floats in Wren) for both the key and the value, equates in round figures to memory usage of 104 MB on top of that needed for the prime sieve. The following version strips out memoization and, whilst somewhat slower at 5.4 seconds, may be preferred in a constrained memory environment.
<syntaxhighlight lang="ecmascriptwren">import "./math" for Int
 
var pi = Fn.new { |n|
Line 1,574 ⟶ 4,100:
for (i in 0..9) {
System.print("10^%(i) %(pi.call(n))")
n = n * 10
}</syntaxhighlight>
 
{{out}}
<pre>
Same as first version.
</pre>
===Iterative, partial sieving===
{{trans|Vlang}}
This non-memoized, non-recursive version is far quicker than the first two versions, running in about 114 milliseconds. Nowhere near as quick as V, of course, but acceptable for Wren which is interpreted and only has one built-in numeric type, a 64 bit float.
<syntaxhighlight lang="wren">import "./math" for Int
 
var masks = [1, 2, 4, 8, 16, 32, 64, 128]
 
var half = Fn.new { |n| (n - 1) >> 1 }
 
var countPrimes = Fn.new { |n|
if (n < 9) return (n < 2) ? 0 : ((n + 1)/2).floor
var rtlmt = n.sqrt.floor
var mxndx = Int.quo(rtlmt - 1, 2)
var arrlen = mxndx + 1
var smalls = List.filled(arrlen, 0)
var roughs = List.filled(arrlen, 0)
var larges = List.filled(arrlen, 0)
for (i in 0...arrlen) {
smalls[i] = i
roughs[i] = i + i + 1
larges[i] = Int.quo(Int.quo(n, i + i + 1) - 1 , 2)
}
var cullbuflen = Int.quo(mxndx + 8, 8)
var cullbuf = List.filled(cullbuflen, 0)
var nbps = 0
var rilmt = arrlen
var i = 1
while (true) {
var sqri = (i + i) * (i + 1)
if (sqri > mxndx) break
if ((cullbuf[i >> 3] & masks[i & 7]) != 0) {
i = i + 1
continue
}
cullbuf[i >> 3] = cullbuf[i >> 3] | masks[i & 7]
var bp = i + i + 1
var c = sqri
while (c < arrlen) {
cullbuf[c >> 3] = cullbuf[c >> 3] | masks[c & 7]
c = c + bp
}
var nri = 0
for (ori in 0...rilmt) {
var r = roughs[ori]
var rci = r >> 1
if ((cullbuf[rci >> 3] & masks[rci & 7]) != 0) continue
var d = r * bp
var t = (d <= rtlmt) ? larges[smalls[d >> 1] - nbps] :
smalls[half.call(Int.quo(n, d))]
larges[nri] = larges[ori] - t + nbps
roughs[nri] = r
nri = nri + 1
}
var si = mxndx
var pm = ((rtlmt/bp).floor - 1) | 1
while (pm >= bp) {
var c = smalls[pm >> 1]
var e = (pm * bp) >> 1
while (si >= e) {
smalls[si] = smalls[si] - c + nbps
si = si - 1
}
pm = pm - 2
}
rilmt = nri
nbps = nbps + 1
i = i + 1
}
var ans = larges[0] + ((rilmt + 2 * (nbps - 1)) * (rilmt - 1) / 2).floor
for (ri in 1...rilmt) ans = ans - larges[ri]
var ri = 1
while (true) {
var p = roughs[ri]
var m = Int.quo(n, p)
var ei = smalls[half.call(Int.quo(m, p))] - nbps
if (ei <= ri) break
ans = ans - (ei - ri) * (nbps + ri - 1)
for (sri in (ri + 1..ei)) {
ans = ans + smalls[half.call(Int.quo(m, roughs[sri]))]
}
ri = ri + 1
}
return ans + 1
}
 
var n = 1
for (i in 0..9) {
System.print("10^%(i) %(countPrimes.call(n))")
n = n * 10
}</syntaxhighlight>
474

edits