Posit numbers/decoding: Difference between revisions
Content added Content deleted
(→{{header|Wren}}: Rewritten to deal with posit numbers of any size.) |
m (→{{header|Julia}}: add 2's complement handling for negatives) |
||
Line 14: | Line 14: | ||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
<syntaxhighlight lang="julia"> |
<syntaxhighlight lang="julia">""" Posit number, a quotient of integers, variable size and exponent length """ |
||
struct PositType3{T<:Integer} |
|||
numbits::UInt16 |
numbits::UInt16 |
||
es::UInt16 |
es::UInt16 |
||
bits::T |
bits::T |
||
PositType3(nb, ne, i) = new{typeof(i)}(UInt16(nb), UInt16(ne), i) |
PositType3(nb, ne, i) = new{typeof(i)}(UInt16(nb), UInt16(ne), i) |
||
end |
|||
""" |
""" Convert PositType3 to Rational. See also posithub.org/docs/Posits4.pdf """ |
||
function Base.Rational(p::PositType3) |
function Base.Rational(p::PositType3) |
||
s = signbit(signed(p.bits)) # s for S signbit, is 1 if negative |
s = signbit(signed(p.bits)) # s for S signbit, is 1 if negative |
||
pabs = p.bits << 1 # |
pabs = p.bits << 1 # Shift off signbit (adds a 0 to F at LSB) |
||
pabs == 0 && return s ? 1 // 0 : 0 // 1 # |
pabs == 0 && return s ? 1 // 0 : 0 // 1 # If p is 0, return 0 or if s 1 error |
||
s && (pabs = (-p.bits) << 1) # If p is negative, flip to 2's complement |
|||
expsign = signbit(signed(pabs)) # Exponent sign from 2nd bit now MSB |
|||
r = expsign == 1 ? leading_ones(pabs) : leading_zeros(pabs) # r regime R size |
r = expsign == 1 ? leading_ones(pabs) : leading_zeros(pabs) # r regime R size |
||
k = expsign ? r - 1 : -r # k for the exponent calculation |
k = expsign ? r - 1 : -r # k for the exponent calculation |
||
pabs <<= (r + 1) # |
pabs <<= (r + 1) # Shift off unwanted R bits |
||
pabs >>= (r + 2) # |
pabs >>= (r + 2) # Shift back for E, F |
||
fsize = p.numbits - 1 - r - 1 - p.es # |
fsize = p.numbits - 1 - r - 1 - p.es # Check how many F bits explicit |
||
e = fsize < 1 ? pabs : pabs >> fsize # Get E value |
|||
1 + (pabs & (2^fsize-1)) // 2^fsize |
f = fsize < 1 ? 1 : 1 + (pabs & (2^fsize - 1)) // 2^fsize # Get F value (1 by default) |
||
e = fsize < 1 ? pabs : pabs >> fsize # Get E value. |
|||
pw = 2^p.es * k + e |
pw = 2^p.es * k + e |
||
return pw >= 0 ? (-1)^s * f * big"2"^pw // 1 : (-1)^s * f // big"2"^(-pw) |
return pw >= 0 ? (-1)^s * f * big"2"^pw // 1 : (-1)^s * f // big"2"^(-pw) |
||
end |
end |
||
@show Rational(PositType3(16, 3, 0b0000110111011101)) == 477 // 134217728 |
@show Rational(PositType3(16, 3, 0b0000110111011101)) == 477 // 134217728 |
||
const tests = [ |
const tests = [ |
||
(16,3,0b0000110111011101), |
(16, 3, 0b0000110111011101), |
||
(16,3,0b1000000000000000), |
(16, 3, 0b1000000000000000), |
||
(16,3,0b0000000000000000), |
(16, 3, 0b0000000000000000), |
||
(16,1,0b0110110010101000), |
(16, 1, 0b0110110010101000), |
||
(16,1,0b1001001101011000), |
(16, 1, 0b1001001101011000), |
||
(16,2,0b0000000000000001), |
(16, 2, 0b0000000000000001), |
||
(16,0,0b0111111111111111), |
(16, 0, 0b0111111111111111), |
||
(16,6,0b0111111111111110), |
(16, 6, 0b0111111111111110), |
||
(8,1,0b01000000), |
(8, 1, 0b01000000), |
||
(8,1,0b11000000), |
(8, 1, 0b11000000), |
||
(8,1,0b00110000), |
(8, 1, 0b00110000), |
||
(8,1,0b00100000), |
(8, 1, 0b00100000), |
||
(8,2,0b00000001), |
(8, 2, 0b00000001), |
||
(8,2,0b01111111), |
(8, 2, 0b01111111), |
||
(8,7,0b01111110), |
(8, 7, 0b01111110), |
||
(32,2,0b00000000000000000000000000000001), |
(32, 2, 0b00000000000000000000000000000001), |
||
(32,2,0b01111111111111111111111111111111), |
(32, 2, 0b01111111111111111111111111111111), |
||
(32,5,0b01111111111111111111111111111110), |
(32, 5, 0b01111111111111111111111111111110), |
||
] |
] |
||
for t in tests |
for t in tests |
||
r = Rational(PositType3(t...)) |
r = Rational(PositType3(t...)) |
||
println(string(t[3], base=2, pad = t[1]), " => $r = ", float(r)) |
println(string(t[3], base = 2, pad = t[1]), " => $r = ", float(r)) |
||
end |
end |
||
</syntaxhighlight>{{out}} |
</syntaxhighlight>{{out}} |
||
Line 72: | Line 73: | ||
0000000000000000 => 0//1 = 0.0 |
0000000000000000 => 0//1 = 0.0 |
||
0110110010101000 => 405//32 = 12.65625 |
0110110010101000 => 405//32 = 12.65625 |
||
1001001101011000 => - |
1001001101011000 => -405//32 = -12.65625 |
||
0000000000000001 => 1//72057594037927936 = 1.387778780781445675529539585113525390625e-17 |
0000000000000001 => 1//72057594037927936 = 1.387778780781445675529539585113525390625e-17 |
||
0111111111111111 => 16384//1 = 16384.0 |
0111111111111111 => 16384//1 = 16384.0 |
||
Line 83: | Line 84: | ||
01111111 => 16777216//1 = 1.6777216e+07 |
01111111 => 16777216//1 = 1.6777216e+07 |
||
01111110 => 4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603776//1 = 4.562440617622195218641171605700291324893228507248559930579192517899275167208677e+192 |
01111110 => 4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603776//1 = 4.562440617622195218641171605700291324893228507248559930579192517899275167208677e+192 |
||
00000000000000000000000000000001 => 1//1329227995784915872903807060280344576 = 7.523163845262640050999913838222372338039459563341360137656010920181870460510254e-37 |
00000000000000000000000000000001 => 1//1329227995784915872903807060280344576 = 7.523163845262640050999913838222372338039459563341360137656010920181870460510254e-37 |
||
01111111111111111111111111111111 => 1329227995784915872903807060280344576//1 = 1.329227995784915872903807060280344576e+36 |
01111111111111111111111111111111 => 1329227995784915872903807060280344576//1 = 1.329227995784915872903807060280344576e+36 |
||
01111111111111111111111111111110 => 2269007733883335972287082669296112915239349672942191252221331572442536403137824056312817862695551072066953619064625508194663368599769448406663254670871573830845597595897613333042429214224697474472410882236254024057110212260250671521235807709272244389361641091086035023229622419456//1 = 2.269007733883335972287082669296112915239349672942191252221331572442536403137824e+279 |
01111111111111111111111111111110 => 2269007733883335972287082669296112915239349672942191252221331572442536403137824056312817862695551072066953619064625508194663368599769448406663254670871573830845597595897613333042429214224697474472410882236254024057110212260250671521235807709272244389361641091086035023229622419456//1 = 2.269007733883335972287082669296112915239349672942191252221331572442536403137824e+279 |
||
</pre> |
</pre> |
||
Revision as of 04:21, 21 September 2023
Posit numbers/decoding is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Posit is a quantization of the real projective line proposed by John Gustafson in 2015. It is claimed to be an improvement over IEEE 754.
The purpose of this task is to write a program capable of decoding a posit number. You will use the example provided by Gustafson in his paper : 0b0000110111011101, representing a 16-bit long real number with three bits for the exponent. Once decoded, you should obtain either the fraction 477/134217728 or the floating point value 3.55393E−6.
Jeff Johnson from Facebook research, described posit numbers as such:
- A more efficient representation for tapered floating points is the recent posit format by Gustafson. It has no explicit size field; the exponent is encoded using a Golomb-Rice prefix-free code, with the exponent encoded as a Golomb-Rice quotient and remainder with in unary and in binary (in posit terminology, is the regime). Remainder encoding size is defined by the exponent scale , where is the Golomb-Rice divisor. Any space not used by the exponent encoding is used by the significand, which unlike IEEE 754 always has a leading 1; gradual underflow (and overflow) is handled by tapering. A posit number system is characterized by , where is the word length in bits and is the exponent scale. The minimum and maximum positive finite numbers in are Failed to parse (syntax error): {\displaystyle f_\mathrm{min} = 2^{−(N−2)2^s}} and Failed to parse (syntax error): {\displaystyle f_\mathrm{max} = 2^{(N−2)2^s}} . The number line is represented much as the projective reals, with a single point at bounding Failed to parse (syntax error): {\displaystyle −f_\mathrm{max}} and . and 0 have special encodings; there is no NaN. The number system allows any choice of and Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle 0\le s\le N − 3} .
- controls the dynamic range achievable; e.g., 8-bit (8, 5)-posit is larger than in float32. (8, 0) and (8, 1) are more reasonable values to choose for 8-bit floating point representations, with of 64 and 4096 accordingly. Precision is maximized in the range Failed to parse (syntax error): {\displaystyle \pm\left[2^{−(s+1)}, 2^{s+1}\right)} with Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle N − 3 − s} significand fraction bits, tapering to no fraction bits at .
- — Jeff Johnson, Rethinking floating point for deep learning, Facebook research.
Julia
""" Posit number, a quotient of integers, variable size and exponent length """
struct PositType3{T<:Integer}
numbits::UInt16
es::UInt16
bits::T
PositType3(nb, ne, i) = new{typeof(i)}(UInt16(nb), UInt16(ne), i)
end
""" Convert PositType3 to Rational. See also posithub.org/docs/Posits4.pdf """
function Base.Rational(p::PositType3)
s = signbit(signed(p.bits)) # s for S signbit, is 1 if negative
pabs = p.bits << 1 # Shift off signbit (adds a 0 to F at LSB)
pabs == 0 && return s ? 1 // 0 : 0 // 1 # If p is 0, return 0 or if s 1 error
s && (pabs = (-p.bits) << 1) # If p is negative, flip to 2's complement
expsign = signbit(signed(pabs)) # Exponent sign from 2nd bit now MSB
r = expsign == 1 ? leading_ones(pabs) : leading_zeros(pabs) # r regime R size
k = expsign ? r - 1 : -r # k for the exponent calculation
pabs <<= (r + 1) # Shift off unwanted R bits
pabs >>= (r + 2) # Shift back for E, F
fsize = p.numbits - 1 - r - 1 - p.es # Check how many F bits explicit
e = fsize < 1 ? pabs : pabs >> fsize # Get E value
f = fsize < 1 ? 1 : 1 + (pabs & (2^fsize - 1)) // 2^fsize # Get F value (1 by default)
pw = 2^p.es * k + e
return pw >= 0 ? (-1)^s * f * big"2"^pw // 1 : (-1)^s * f // big"2"^(-pw)
end
@show Rational(PositType3(16, 3, 0b0000110111011101)) == 477 // 134217728
const tests = [
(16, 3, 0b0000110111011101),
(16, 3, 0b1000000000000000),
(16, 3, 0b0000000000000000),
(16, 1, 0b0110110010101000),
(16, 1, 0b1001001101011000),
(16, 2, 0b0000000000000001),
(16, 0, 0b0111111111111111),
(16, 6, 0b0111111111111110),
(8, 1, 0b01000000),
(8, 1, 0b11000000),
(8, 1, 0b00110000),
(8, 1, 0b00100000),
(8, 2, 0b00000001),
(8, 2, 0b01111111),
(8, 7, 0b01111110),
(32, 2, 0b00000000000000000000000000000001),
(32, 2, 0b01111111111111111111111111111111),
(32, 5, 0b01111111111111111111111111111110),
]
for t in tests
r = Rational(PositType3(t...))
println(string(t[3], base = 2, pad = t[1]), " => $r = ", float(r))
end
- Output:
Rational(PositType3(16, 3, 0x0ddd)) == 477 // 134217728 = true 0000110111011101 => 477//134217728 = 3.553926944732666015625e-06 1000000000000000 => 1//0 = Inf 0000000000000000 => 0//1 = 0.0 0110110010101000 => 405//32 = 12.65625 1001001101011000 => -405//32 = -12.65625 0000000000000001 => 1//72057594037927936 = 1.387778780781445675529539585113525390625e-17 0111111111111111 => 16384//1 = 16384.0 0111111111111110 => 28638903918474961204418783933674838490721739172170652529441449702311064005352904159345284265824628375429359509218999720074396860757073376700445026041564579620512874307979212102266801261478978776245040008231745247475930553606737583615358787106474295296//1 = 2.86389039184749612044187839336748384907217391721706525294414497023110640053529e+250 01000000 => 1//1 = 1.0 11000000 => -1//1 = -1.0 00110000 => 1//2 = 0.5 00100000 => 1//4 = 0.25 00000001 => 1//16777216 = 5.9604644775390625e-08 01111111 => 16777216//1 = 1.6777216e+07 01111110 => 4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603776//1 = 4.562440617622195218641171605700291324893228507248559930579192517899275167208677e+192 00000000000000000000000000000001 => 1//1329227995784915872903807060280344576 = 7.523163845262640050999913838222372338039459563341360137656010920181870460510254e-37 01111111111111111111111111111111 => 1329227995784915872903807060280344576//1 = 1.329227995784915872903807060280344576e+36 01111111111111111111111111111110 => 2269007733883335972287082669296112915239349672942191252221331572442536403137824056312817862695551072066953619064625508194663368599769448406663254670871573830845597595897613333042429214224697474472410882236254024057110212260250671521235807709272244389361641091086035023229622419456//1 = 2.269007733883335972287082669296112915239349672942191252221331572442536403137824e+279
Phix
with javascript_semantics function twos_compliment_2_on(string bits, integer nbits) for i=2 to nbits do bits[i] = iff(bits[i]='0'?'1':'0') end for for i=nbits to 2 by -1 do if bits[i]='0' then bits[i] = '1' exit end if bits[i] = '0' end for return bits end function function posit_decode(integer nbits, es, object bits) -- -- nbits: number of bits (aka n) -- es: exponent scale -- bits: (binary) integer or string of nbits 0|1 -- if not string(bits) then string fmt = sprintf("%%0%db",nbits) bits = sprintf(fmt,bits) end if assert(length(bits)==nbits) string ibits = bits -- save for return integer s = bits[1]='1' if s then bits = twos_compliment_2_on(bits,nbits) end if integer r = find(xor_bits(bits[2],1),bits,3)-2, b2z = bits[2]='0', exponent = 0, fraction = 0 atom fs = 1, useed = power(2,power(2,es)) if r<0 then if b2z then if s then return {ibits,es,"NaR"} -- aka inf end if return {ibits,es,"zero"} end if r = nbits-1 else integer estart = r+3, efinish = min(r+2+es,nbits) exponent = to_integer(bits[estart..efinish],0,2) fraction = to_integer(bits[efinish+1..$],0,2) fs = power(2,nbits-efinish) end if integer k = iff(b2z?-r:r-1) atom res = iff(s?-1:+1)*power(useed,k)*power(2,exponent)*(1+fraction/fs) return {ibits,es,res} end function constant tests = {{16,3,0b0000110111011101}, {16,3,0b1000000000000000}, {16,3,0b0000000000000000}, {16,1,0b0110110010101000}, {16,1,0b1001001101011000}, {16,2,0b0000000000000001}, {16,0,0b0111111111111111}, {16,2,0b0111111111111111}, {16,6,0b0111111111111110}, {8,1,0b01000000}, {8,1,0b11000000}, {8,1,0b00110000}, {8,1,0b00100000}, {8,2,0b00000001}, {8,2,0b01111111}, {8,7,0b01111110}, {32,2,0b00000000000000000000000000000001}, {32,2,0b01111111111111111111111111111111}, {32,5,0b01111111111111111111111111111110}} for t in tests do printf(1,"%s (es=%d) ==> %v\n",call_func(posit_decode,t)) end for
- Output:
(Still disagrees with Julia on the -12.65625, but I think I'm right)
0000110111011101 (es=3) ==> 3.553926944e-6 1000000000000000 (es=3) ==> "NaR" 0000000000000000 (es=3) ==> "zero" 0110110010101000 (es=1) ==> 12.65625 1001001101011000 (es=1) ==> -12.65625 0000000000000001 (es=2) ==> 1.38777878e-17 0111111111111111 (es=0) ==> 16384 0111111111111111 (es=2) ==> 7.205759404e+16 0111111111111110 (es=6) ==> 2.863890392e+250 01000000 (es=1) ==> 1 11000000 (es=1) ==> -1 00110000 (es=1) ==> 0.5 00100000 (es=1) ==> 0.25 00000001 (es=2) ==> 5.960464478e-8 01111111 (es=2) ==> 16777216 01111110 (es=7) ==> 4.562440618e+192 00000000000000000000000000000001 (es=2) ==> 7.523163846e-37 01111111111111111111111111111111 (es=2) ==> 1.329227996e+36 01111111111111111111111111111110 (es=5) ==> 2.269007734e+279
raku
unit role Posit[UInt $N, UInt $es];
has UInt $.UInt;
method sign { self.UInt > 2**($N - 1) ?? -1 !! +1 }
method FatRat {
return 0 if self.UInt == 0;
my UInt $mask = 2**($N - 1);
return Inf if self.UInt == $mask;
my UInt $n = self.UInt;
my $sign = $n +& $mask ?? -1 !! +1;
my $r = $sign;
$n = ((2**$n - 1) +^ $n) + 1 if self.sign < 0;
my int $count = 0;
$mask +>= 1;
my Bool $first-bit = ?($n +& $mask);
repeat { $count++; $mask +>= 1;
} while ?($n +& $mask) == $first-bit && $mask;
my $m = $count;
my $k = $first-bit ?? $m - 1 !! -$m;
$r *= 2**($k*2**$es);
return $r unless $mask > 1;
$mask +>= 1;
$count = 0;
my UInt $exponent = 0;
while $mask && $count++ < $es {
$exponent +<= 1;
$exponent +|= 1 if $n +& $mask;
$mask +>= 1;
}
$r *= 2**$exponent;
my $fraction = 1.FatRat;
while $mask {
(state $power-of-two = 1) +<= 1;
$fraction += 1/$power-of-two if $n +& $mask;
$mask +>= 1;
}
$r *= $fraction;
return $r;
}
CHECK {
use Test;
# example from L<http://www.johngustafson.net/pdfs/BeatingFloatingPoint.pdf>
is Posit[16, 3]
.new(UInt => 0b0000110111011101)
.FatRat, 477.FatRat/134217728;
}
- Output:
ok 1 -
Wren
import "./fmt" for Conv, Fmt
import "./big" for BigRat, BigInt
var positDecode = Fn.new { |ps, maxExpSize|
var p = ps.map { |c| c == "0" ? 0 : 1 }.toList
// Deal with exceptional values.
if (p[1..-1].all { |i| i == 0 }) {
return (p[0] == 0) ? BigRat.zero : Conv.infinity
}
// Convert bits after sign bit to two's complement if negative.
var n = p.count
if (p[0] == 1) {
for (i in 1...n) p[i] = (p[i] == 0) ? 1 : 0
for (i in n-1..1) {
if (p[i] == 1) {
p[i] = 0
} else {
p[i] = 1
break
}
}
}
var first = p[1]
var rs = n - 1 // regime size
for (i in 2...n) {
if (p[i] != first) {
rs = i - 1
break
}
}
var regime = p[1..rs]
var es = (rs == n - 1) ? 0 : maxExpSize.min(n - 2 -rs) // actual exponent size
var exponent = [0]
if (es > 0) exponent = p[rs + 2...rs + 2 + es]
var fs = (es == 0) ? 0 : n - 2 - rs - es // function size
var s = (p[0] == 0) ? 1 : -1 // sign
var k = regime.all { |i| i == 0 } ? -rs : rs - 1
var u = BigInt.two.pow(2.pow(maxExpSize))
var e = Conv.atoi(exponent.join(""), 2)
var f = BigRat.one
if (fs > 0) {
var fraction = p.join("")[-fs..-1]
f = Conv.atoi(fraction, 2)
f = BigRat.one + BigRat.new(f, 2.pow(fs))
}
return f * BigRat.new(u, 1).pow(k) * s * 2.pow(e)
}
var tests = [
[3, "0000110111011101"],
[3, "1000000000000000"],
[3, "0000000000000000"],
[1, "0110110010101000"],
[1, "1001001101011000"],
[2, "0000000000000001"],
[0, "0111111111111111"],
[6, "0111111111111110"],
[1, "01000000"],
[1, "11000000"],
[1, "00110000"],
[1, "00100000"],
[2, "00000001"],
[2, "01111111"],
[7, "01111110"],
[2, "00000000000000000000000000000001"],
[2, "01111111111111111111111111111111"],
[5, "01111111111111111111111111111110"]
]
for (test in tests) {
var res = positDecode.call(test[1], test[0])
var res2 = (res is BigRat) ? res.toFloat : Num.infinity
Fmt.print("$s(es = $d) -> $s or $n", test[1], test[0], res, res2)
}
- Output:
0000110111011101(es = 3) -> 477/134217728 or 3.5539269447327e-06 1000000000000000(es = 3) -> ∞ or infinity 0000000000000000(es = 3) -> 0/1 or 0 0110110010101000(es = 1) -> 405/32 or 12.65625 1001001101011000(es = 1) -> -405/32 or -12.65625 0000000000000001(es = 2) -> 1/72057594037927936 or 1.3877787807814e-17 0111111111111111(es = 0) -> 16384/1 or 16384 0111111111111110(es = 6) -> 28638903918474961204418783933674838490721739172170652529441449702311064005352904159345284265824628375429359509218999720074396860757073376700445026041564579620512874307979212102266801261478978776245040008231745247475930553606737583615358787106474295296/1 or 2.8638903918475e+250 01000000(es = 1) -> 1/1 or 1 11000000(es = 1) -> -1/1 or -1 00110000(es = 1) -> 1/2 or 0.5 00100000(es = 1) -> 1/4 or 0.25 00000001(es = 2) -> 1/16777216 or 5.9604644775391e-08 01111111(es = 2) -> 16777216/1 or 16777216 01111110(es = 7) -> 4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603776/1 or 4.5624406176222e+192 00000000000000000000000000000001(es = 2) -> 1/1329227995784915872903807060280344576 or 7.5231638452626e-37 01111111111111111111111111111111(es = 2) -> 1329227995784915872903807060280344576/1 or 1.3292279957849e+36 01111111111111111111111111111110(es = 5) -> 2269007733883335972287082669296112915239349672942191252221331572442536403137824056312817862695551072066953619064625508194663368599769448406663254670871573830845597595897613333042429214224697474472410882236254024057110212260250671521235807709272244389361641091086035023229622419456/1 or 2.2690077338833e+279