Posit numbers/decoding
Posit numbers/decoding is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Posit is a quantization of the real projective line proposed by John Gustafson in 2015. It is claimed to be an improvement over IEEE 754.
The purpose of this task is to write a program capable of decoding a posit number. You will use the example provided by Gustafson in his paper : 0b0000110111011101, representing a 16-bit long real number with three bits for the exponent. Once decoded, you should obtain either the fraction 477/134217728 or the floating point value 3.55393E−6.
Jeff Johnson from Facebook research, described posit numbers as such:
- A more efficient representation for tapered floating points is the recent posit format by Gustafson. It has no explicit size field; the exponent is encoded using a Golomb-Rice prefix-free code, with the exponent encoded as a Golomb-Rice quotient and remainder with in unary and in binary (in posit terminology, is the regime). Remainder encoding size is defined by the exponent scale , where is the Golomb-Rice divisor. Any space not used by the exponent encoding is used by the significand, which unlike IEEE 754 always has a leading 1; gradual underflow (and overflow) is handled by tapering. A posit number system is characterized by , where is the word length in bits and is the exponent scale. The minimum and maximum positive finite numbers in are Failed to parse (syntax error): {\displaystyle f_\mathrm{min} = 2^{−(N−2)2^s}} and Failed to parse (syntax error): {\displaystyle f_\mathrm{max} = 2^{(N−2)2^s}} . The number line is represented much as the projective reals, with a single point at bounding Failed to parse (syntax error): {\displaystyle −f_\mathrm{max}} and . and 0 have special encodings; there is no NaN. The number system allows any choice of and Failed to parse (syntax error): {\displaystyle 0\le s\le N − 3} .
- controls the dynamic range achievable; e.g., 8-bit (8, 5)-posit is larger than in float32. (8, 0) and (8, 1) are more reasonable values to choose for 8-bit floating point representations, with of 64 and 4096 accordingly. Precision is maximized in the range Failed to parse (syntax error): {\displaystyle \pm\left[2^{−(s+1)}, 2^{s+1}\right)} with Failed to parse (syntax error): {\displaystyle N − 3 − s} significand fraction bits, tapering to no fraction bits at .
- — Jeff Johnson, Rethinking floating point for deep learning, Facebook research.
Julia
struct PositType3{T<:Integer}
numbits::UInt16
es::UInt16
bits::T
PositType3(nb, ne, i) = new{typeof(i)}(UInt16(nb), UInt16(ne), i)
end
""" From posithub.org/docs/Posits4.pdf """
function Base.Rational(p::PositType3)
s = signbit(p.bits) # s for S signbit, is 1 if negative
pabs = p.bits << 1 # shift off signbit (adds a 0 to F at LSB)
pabs == 0 && return s ? 1 // 0 : 0 // 1 # if p is 0, return 0 if s = 0, error if s = 1
expsign = signbit(pabs) # exponent sign from 2nd bit now in MSB location
k = expsign == 1 ? leading_ones(pabs) : leading_zeros(pabs) # regime R bit count
scaling = 2^p.es * (expsign == 0 ? -1 : 1)
pabs <<= (k + 1) # shift off unwanted R bits
pabs >>= (k + 2) # shift back without the extra LSB bit
fsize = p.numbits - k - p.es - 2 # check how many F bits are actually explicit
fsize <= 0 && return 0 // 1 # missing F is 0
f = (pabs & (2^fsize - 1)) // 2^fsize # Get F value. Can be missing -> 0
e = pabs >> fsize # Get E value.
pw = (1 - 2s) * (scaling * k + e + s)
return pw >= 0 ? ((1 - 3s) + f) * 2^pw // 1 : ((1 - 3s) + f) // 2^(-pw)
end
@show Rational(PositType3(16, 3, 0b0000110111011101)) == 477 // 134217728
- Output:
Rational(PositType3(16, 3, 0x0ddd)) == 477 // 134217728 = true
Phix
with javascript_semantics function twos_compliment_2_on(string bits, integer nbits) for i=2 to nbits do bits[i] = iff(bits[i]='0'?'1':'0') end for for i=nbits to 2 by -1 do if bits[i]='0' then bits[i] = '1' exit end if bits[i] = '0' end for return bits end function function posit_decode(integer nbits, es, object bits) -- -- nbits: number of bits (aka n) -- es: exponent scale -- bits: (binary) integer or string of nbits 0|1 -- if not string(bits) then string fmt = sprintf("%%0%db",nbits) bits = sprintf(fmt,bits) end if assert(length(bits)==nbits) integer s = bits[1]='1' if s then bits = twos_compliment_2_on(bits,nbits) end if integer r = find(xor_bits(bits[2],1),bits,3)-2, b2z = bits[2]='0', k = iff(b2z?-r:r-1) if r<0 and b2z then if s then return {bits,"NaR"} -- aka inf end if return {bits,"zero"} end if integer estart = r+3, efinish = min(r+2+es,nbits), exponent = to_integer(bits[estart..efinish],0,2), fraction = to_integer(bits[efinish+1..$],0,2) atom useed = power(2,power(2,es)), fs = power(2,nbits-efinish) atom res = iff(s?-1:+1)*power(useed,k)*power(2,exponent)*(1+fraction/fs) return {bits,res} end function constant tests = {{16,3,0b0000110111011101}, {16,3,0b1000000000000000}, {16,3,0b0000000000000000}, {16,1,0b0110110010101000}, {16,1,0b1001001101011000}, {16,2,0b0000000000000001}, -- {16,0,0b0111111111111111}, -- nope {8,1,0b01000000}, {8,1,0b11000000}, {8,1,0b00110000}, {8,1,0b00100000}, {8,2,0b00000001}, -- {8,2,0b01111111}, -- nope {32,2,0b00000000000000000000000000000001}} -- {32,2,0b01111111111111111111111111111111}} -- nope for t in tests do printf(1,"%s = %v\n",call_func(posit_decode,t)) end for
- Output:
Be warned I could not get the largest positive values to match the Wikipedia page...
0000110111011101 = 3.553926944e-6 1000000000000000 = "NaR" 0000000000000000 = "zero" 0110110010101000 = 12.65625 1110110010101000 = -12.65625 0000000000000001 = 1.38777878e-17 01000000 = 1 11000000 = -1 00110000 = 0.5 00100000 = 0.25 00000001 = 5.960464478e-8 00000000000000000000000000000001 = 7.523163846e-37
raku
unit role Posit[UInt $N, UInt $es];
has UInt $.UInt;
method sign { self.UInt > 2**($N - 1) ?? -1 !! +1 }
method FatRat {
return 0 if self.UInt == 0;
my UInt $mask = 2**($N - 1);
return Inf if self.UInt == $mask;
my UInt $n = self.UInt;
my $sign = $n +& $mask ?? -1 !! +1;
my $r = $sign;
$n = ((2**$n - 1) +^ $n) + 1 if self.sign < 0;
my int $count = 0;
$mask +>= 1;
my Bool $first-bit = ?($n +& $mask);
repeat { $count++; $mask +>= 1;
} while ?($n +& $mask) == $first-bit && $mask;
my $m = $count;
my $k = $first-bit ?? $m - 1 !! -$m;
$r *= 2**($k*2**$es);
return $r unless $mask > 1;
$mask +>= 1;
$count = 0;
my UInt $exponent = 0;
while $mask && $count++ < $es {
$exponent +<= 1;
$exponent +|= 1 if $n +& $mask;
$mask +>= 1;
}
$r *= 2**$exponent;
my $fraction = 1.FatRat;
while $mask {
(state $power-of-two = 1) +<= 1;
$fraction += 1/$power-of-two if $n +& $mask;
$mask +>= 1;
}
$r *= $fraction;
return $r;
}
CHECK {
use Test;
# example from L<http://www.johngustafson.net/pdfs/BeatingFloatingPoint.pdf>
is Posit[16, 3]
.new(UInt => 0b0000110111011101)
.FatRat, 477.FatRat/134217728;
}
- Output:
ok 1 -
Wren
import "./fmt" for Conv
import "./big" for BigRat
var posit16_decode = Fn.new { |ps, maxExpSize|
var p = ps.map { |c| c == "0" ? 0 : 1 }.toList
// Deal with exceptional values.
if (p[1..-1].all { |i| i == 0 }) {
return (p[0] == 0) ? 0 : Conv.infinity
}
// Convert bits after sign bit to two's complement if negative.
if (p[0] == 1) {
for (i in 1..15) p[i] = (p[i] == 0) ? 1 : 0
for (i in 15..1) {
if (p[i] == 1) {
p[i] = 0
} else {
p[i] = 1
break
}
}
}
var first = p[1]
var rs = 15 // regime size
for (i in 2..15) {
if (p[i] != first) {
rs = i - 1
break
}
}
var regime = p[1..rs]
var es = (rs == 15) ? 0 : maxExpSize.min(14-rs) // actual exponent size
var exponent = [0]
if (es > 0) exponent = p[rs + 2...rs + 2 + es]
var fs = (es == 0) ? 0 : 14 - rs - es // function size
var s = (p[0] == 0) ? 1 : -1 // sign
var k = regime.all { |i| i == 0 } ? -rs : rs - 1
var u = 2.pow(2.pow(maxExpSize))
var e = Conv.atoi(exponent.join(""), 2)
var f = BigRat.one
if (fs > 0) {
var fraction = ps[-fs..-1]
f = Conv.atoi(fraction.join(""), 2)
f = BigRat.one + BigRat.new(f, 2.pow(fs))
}
return f * BigRat.new(u, 1).pow(k) * s * 2.pow(e)
}
var ps = "0000110111011101"
System.print(posit16_decode.call(ps, 3))
- Output:
477/134217728