Posit numbers/decoding: Difference between revisions

Content added Content deleted

Inline

Revision as of 04:21, 21 September 2023

Posit is a quantization of the real projective line proposed by John Gustafson in 2015. It is claimed to be an improvement over IEEE 754.

The purpose of this task is to write a program capable of decoding a posit number. You will use the example provided by Gustafson in his paper : 0b0000110111011101, representing a 16-bit long real number with three bits for the exponent. Once decoded, you should obtain either the fraction 477/134217728 or the floating point value 3.55393E−6.

Jeff Johnson from Facebook research, described posit numbers as such:

A more efficient representation for tapered floating points is the recent posit format by Gustafson. It has no explicit size field; the exponent is encoded using a Golomb-Rice prefix-free code, with the exponent

e

encoded as a Golomb-Rice quotient and remainder

(q,r)

with

q

in unary and

r

in binary (in posit terminology,

q

is the regime). Remainder encoding size is defined by the exponent scale

s

, where

2^{s}

is the Golomb-Rice divisor. Any space not used by the exponent encoding is used by the significand, which unlike IEEE 754 always has a leading 1; gradual underflow (and overflow) is handled by tapering. A posit number system is characterized by

(N,s)

, where

N

is the word length in bits and

s

is the exponent scale. The minimum and maximum positive finite numbers in

(N,s)

are Failed to parse (syntax error): {\displaystyle f_\mathrm{min} = 2^{−(N−2)2^s}} and Failed to parse (syntax error): {\displaystyle f_\mathrm{max} = 2^{(N−2)2^s}} . The number line is represented much as the projective reals, with a single point at

\pm \infty

bounding Failed to parse (syntax error): {\displaystyle −f_\mathrm{max}} and

f_{\mathrm {max} }

.

\pm \infty

and 0 have special encodings; there is no NaN. The number system allows any choice of

N\geq 3

and Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle 0\le s\le N − 3} .

s

controls the dynamic range achievable; e.g., 8-bit (8, 5)-posit

f_{\mathrm {max} }=2^{192}

is larger than

f_{\mathrm {max} }

in float32. (8, 0) and (8, 1) are more reasonable values to choose for 8-bit floating point representations, with

f_{\mathrm {max} }

of 64 and 4096 accordingly. Precision is maximized in the range Failed to parse (syntax error): {\displaystyle \pm\left[2^{−(s+1)}, 2^{s+1}\right)} with Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle N − 3 − s} significand fraction bits, tapering to no fraction bits at

\pm f_{\mathrm {max} }

.

— Jeff Johnson, Rethinking floating point for deep learning, Facebook research.

Julia

""" Posit number, a quotient of integers, variable size and exponent length """
struct PositType3{T<:Integer}
    numbits::UInt16
    es::UInt16
    bits::T
    PositType3(nb, ne, i) = new{typeof(i)}(UInt16(nb), UInt16(ne), i)
end

""" Convert PositType3 to Rational. See also posithub.org/docs/Posits4.pdf """
function Base.Rational(p::PositType3)
    s = signbit(signed(p.bits))              # s for S signbit, is 1 if negative
    pabs = p.bits << 1                       # Shift off signbit (adds a 0 to F at LSB)
    pabs == 0 && return s ? 1 // 0 : 0 // 1  # If p is 0, return 0 or if s 1 error
    s && (pabs = (-p.bits) << 1)             # If p is negative, flip to 2's complement
    expsign = signbit(signed(pabs))          # Exponent sign from 2nd bit now MSB 
    r = expsign == 1 ? leading_ones(pabs) : leading_zeros(pabs) # r regime R size
    k = expsign ? r - 1 : -r                 # k for the exponent calculation
    pabs <<= (r + 1)                         # Shift off unwanted R bits
    pabs >>= (r + 2)                         # Shift back for E, F
    fsize = p.numbits - 1 - r - 1 - p.es     # Check how many F bits explicit
    e = fsize < 1 ? pabs : pabs >> fsize     # Get E value
    f = fsize < 1 ? 1 : 1 + (pabs & (2^fsize - 1)) // 2^fsize # Get F value (1 by default)
    pw = 2^p.es * k + e
    return pw >= 0 ? (-1)^s * f * big"2"^pw // 1 : (-1)^s * f // big"2"^(-pw)
end

@show Rational(PositType3(16, 3, 0b0000110111011101)) == 477 // 134217728
const tests = [
    (16, 3, 0b0000110111011101),
    (16, 3, 0b1000000000000000),
    (16, 3, 0b0000000000000000),
    (16, 1, 0b0110110010101000),
    (16, 1, 0b1001001101011000),
    (16, 2, 0b0000000000000001),
    (16, 0, 0b0111111111111111),
    (16, 6, 0b0111111111111110),
    (8, 1, 0b01000000),
    (8, 1, 0b11000000),
    (8, 1, 0b00110000),
    (8, 1, 0b00100000),
    (8, 2, 0b00000001),
    (8, 2, 0b01111111),
    (8, 7, 0b01111110),
    (32, 2, 0b00000000000000000000000000000001),
    (32, 2, 0b01111111111111111111111111111111),
    (32, 5, 0b01111111111111111111111111111110),
]

for t in tests
    r = Rational(PositType3(t...))
    println(string(t[3], base = 2, pad = t[1]), " => $r = ", float(r))
end

Output:

Rational(PositType3(16, 3, 0x0ddd)) == 477 // 134217728 = true
0000110111011101 => 477//134217728 = 3.553926944732666015625e-06
1000000000000000 => 1//0 = Inf
0000000000000000 => 0//1 = 0.0
0110110010101000 => 405//32 = 12.65625
1001001101011000 => -405//32 = -12.65625
0000000000000001 => 1//72057594037927936 = 1.387778780781445675529539585113525390625e-17
0111111111111111 => 16384//1 = 16384.0
0111111111111110 => 28638903918474961204418783933674838490721739172170652529441449702311064005352904159345284265824628375429359509218999720074396860757073376700445026041564579620512874307979212102266801261478978776245040008231745247475930553606737583615358787106474295296//1 = 2.86389039184749612044187839336748384907217391721706525294414497023110640053529e+250
01000000 => 1//1 = 1.0
11000000 => -1//1 = -1.0
00110000 => 1//2 = 0.5
00100000 => 1//4 = 0.25
00000001 => 1//16777216 = 5.9604644775390625e-08
01111111 => 16777216//1 = 1.6777216e+07
01111110 => 4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603776//1 = 4.562440617622195218641171605700291324893228507248559930579192517899275167208677e+192
00000000000000000000000000000001 => 1//1329227995784915872903807060280344576 = 7.523163845262640050999913838222372338039459563341360137656010920181870460510254e-37
01111111111111111111111111111111 => 1329227995784915872903807060280344576//1 = 1.329227995784915872903807060280344576e+36
01111111111111111111111111111110 => 2269007733883335972287082669296112915239349672942191252221331572442536403137824056312817862695551072066953619064625508194663368599769448406663254670871573830845597595897613333042429214224697474472410882236254024057110212260250671521235807709272244389361641091086035023229622419456//1 = 2.269007733883335972287082669296112915239349672942191252221331572442536403137824e+279

Phix

with javascript_semantics
function twos_compliment_2_on(string bits, integer nbits)
    for i=2 to nbits do
        bits[i] = iff(bits[i]='0'?'1':'0')
    end for
    for i=nbits to 2 by -1 do
        if bits[i]='0' then
            bits[i] = '1'
            exit
        end if
        bits[i] = '0'
    end for
    return bits
end function

function posit_decode(integer nbits, es, object bits)
    --
    -- nbits: number of bits (aka n)
    -- es: exponent scale
    -- bits: (binary) integer or string of nbits 0|1
    --
    if not string(bits) then
        string fmt = sprintf("%%0%db",nbits)
        bits = sprintf(fmt,bits)
    end if
    assert(length(bits)==nbits)
    string ibits = bits -- save for return
    integer s = bits[1]='1'
    if s then bits = twos_compliment_2_on(bits,nbits) end if
    integer r = find(xor_bits(bits[2],1),bits,3)-2,
            b2z = bits[2]='0', exponent = 0, fraction = 0
    atom fs = 1, useed = power(2,power(2,es))
    if r<0 then
        if b2z then
            if s then
                return {ibits,es,"NaR"} -- aka inf
            end if
            return {ibits,es,"zero"}
        end if
        r = nbits-1
    else
        integer estart = r+3,
               efinish = min(r+2+es,nbits)
        exponent = to_integer(bits[estart..efinish],0,2)
        fraction = to_integer(bits[efinish+1..$],0,2)
        fs = power(2,nbits-efinish)
    end if
    integer k = iff(b2z?-r:r-1)
    atom res = iff(s?-1:+1)*power(useed,k)*power(2,exponent)*(1+fraction/fs)
    return {ibits,es,res}
end function

constant tests = {{16,3,0b0000110111011101},
                  {16,3,0b1000000000000000},
                  {16,3,0b0000000000000000},
                  {16,1,0b0110110010101000},
                  {16,1,0b1001001101011000},
                  {16,2,0b0000000000000001},
                  {16,0,0b0111111111111111},
                  {16,2,0b0111111111111111},
                  {16,6,0b0111111111111110},
                  {8,1,0b01000000},
                  {8,1,0b11000000},
                  {8,1,0b00110000},
                  {8,1,0b00100000},
                  {8,2,0b00000001},
                  {8,2,0b01111111},
                  {8,7,0b01111110},
                  {32,2,0b00000000000000000000000000000001},
                  {32,2,0b01111111111111111111111111111111},
                  {32,5,0b01111111111111111111111111111110}}
for t in tests do
    printf(1,"%s (es=%d) ==> %v\n",call_func(posit_decode,t))
end for

Output:

(Still disagrees with Julia on the -12.65625, but I think I'm right)

0000110111011101 (es=3) ==> 3.553926944e-6
1000000000000000 (es=3) ==> "NaR"
0000000000000000 (es=3) ==> "zero"
0110110010101000 (es=1) ==> 12.65625
1001001101011000 (es=1) ==> -12.65625
0000000000000001 (es=2) ==> 1.38777878e-17
0111111111111111 (es=0) ==> 16384
0111111111111111 (es=2) ==> 7.205759404e+16
0111111111111110 (es=6) ==> 2.863890392e+250
01000000 (es=1) ==> 1
11000000 (es=1) ==> -1
00110000 (es=1) ==> 0.5
00100000 (es=1) ==> 0.25
00000001 (es=2) ==> 5.960464478e-8
01111111 (es=2) ==> 16777216
01111110 (es=7) ==> 4.562440618e+192
00000000000000000000000000000001 (es=2) ==> 7.523163846e-37
01111111111111111111111111111111 (es=2) ==> 1.329227996e+36
01111111111111111111111111111110 (es=5) ==> 2.269007734e+279

raku

unit role Posit[UInt $N, UInt $es];

has UInt $.UInt;
method sign { self.UInt > 2**($N - 1) ?? -1 !! +1 }

method FatRat {
  return 0   if self.UInt == 0;
  my UInt $mask = 2**($N - 1);
  return Inf if self.UInt == $mask;
  my UInt $n = self.UInt;
  my $sign = $n +& $mask ?? -1 !! +1;
  my $r = $sign;
  $n = ((2**$n - 1) +^ $n) + 1 if self.sign < 0;
  my int $count = 0;
  $mask +>= 1;
  my Bool $first-bit = ?($n +& $mask);
  repeat { $count++; $mask +>= 1;
  } while ?($n +& $mask) == $first-bit && $mask;
  my $m = $count;
  my $k = $first-bit ?? $m - 1 !! -$m;
  $r *= 2**($k*2**$es);
  return $r unless $mask > 1;
  $mask +>= 1;
  $count = 0;
  my UInt $exponent = 0;
  while $mask && $count++ < $es {
    $exponent +<= 1;
    $exponent +|= 1 if $n +& $mask;
    $mask +>= 1;
  }
  $r *= 2**$exponent;
  my $fraction = 1.FatRat;
  while $mask {
    (state $power-of-two = 1) +<= 1;
    $fraction += 1/$power-of-two if $n +& $mask;
    $mask +>= 1;
  }
  $r *= $fraction;

  return $r;
}

CHECK {
  use Test;
  # example from L<http://www.johngustafson.net/pdfs/BeatingFloatingPoint.pdf>
  is Posit[16, 3]
    .new(UInt => 0b0000110111011101)
    .FatRat, 477.FatRat/134217728;
}

Output:

ok 1 -

Wren

Library: Wren-fmt

Library: Wren-big

import "./fmt" for Conv, Fmt
import "./big" for BigRat, BigInt

var positDecode = Fn.new { |ps, maxExpSize| 
    var p = ps.map { |c| c == "0" ? 0 : 1 }.toList

    // Deal with exceptional values.
    if (p[1..-1].all { |i| i == 0 }) {
        return (p[0] == 0) ? BigRat.zero : Conv.infinity
    }

    // Convert bits after sign bit to two's complement if negative.
    var n = p.count
    if (p[0] == 1) {
        for (i in 1...n) p[i] = (p[i] == 0) ? 1 : 0
        for (i in n-1..1) {
            if (p[i] == 1) {
                p[i] = 0
            } else {
                p[i] = 1
                break
            }
        }
    }
    var first = p[1]
    var rs = n - 1  // regime size
    for (i in 2...n) {
        if (p[i] != first) {
            rs = i - 1
            break
        }
    }
    var regime = p[1..rs]
    var es = (rs == n - 1) ? 0 : maxExpSize.min(n - 2 -rs)  // actual exponent size
    var exponent = [0]
    if (es > 0) exponent = p[rs + 2...rs + 2 + es]
    var fs = (es == 0) ? 0 : n - 2 - rs - es  // function size
    var s = (p[0] == 0) ? 1 : -1  // sign
    var k = regime.all { |i| i == 0 } ? -rs : rs - 1
    var u = BigInt.two.pow(2.pow(maxExpSize))
    var e = Conv.atoi(exponent.join(""), 2)
    var f = BigRat.one
    if (fs > 0) {
        var fraction = p.join("")[-fs..-1]
        f = Conv.atoi(fraction, 2)
        f = BigRat.one + BigRat.new(f, 2.pow(fs))
    }
    return f * BigRat.new(u, 1).pow(k) * s * 2.pow(e)
}

var tests = [
    [3, "0000110111011101"],
    [3, "1000000000000000"],
    [3, "0000000000000000"],
    [1, "0110110010101000"],
    [1, "1001001101011000"],
    [2, "0000000000000001"],
    [0, "0111111111111111"],
    [6, "0111111111111110"],
    [1, "01000000"],
    [1, "11000000"],
    [1, "00110000"],
    [1, "00100000"],
    [2, "00000001"],
    [2, "01111111"],
    [7, "01111110"],
    [2, "00000000000000000000000000000001"],
    [2, "01111111111111111111111111111111"],
    [5, "01111111111111111111111111111110"]
]

for (test in tests) {
    var res = positDecode.call(test[1], test[0])
    var res2 = (res is BigRat) ? res.toFloat : Num.infinity
    Fmt.print("$s(es = $d) -> $s or $n", test[1], test[0], res, res2)
}

Output:

0000110111011101(es = 3) -> 477/134217728 or 3.5539269447327e-06
1000000000000000(es = 3) -> ∞ or infinity
0000000000000000(es = 3) -> 0/1 or 0
0110110010101000(es = 1) -> 405/32 or 12.65625
1001001101011000(es = 1) -> -405/32 or -12.65625
0000000000000001(es = 2) -> 1/72057594037927936 or 1.3877787807814e-17
0111111111111111(es = 0) -> 16384/1 or 16384
0111111111111110(es = 6) -> 28638903918474961204418783933674838490721739172170652529441449702311064005352904159345284265824628375429359509218999720074396860757073376700445026041564579620512874307979212102266801261478978776245040008231745247475930553606737583615358787106474295296/1 or 2.8638903918475e+250
01000000(es = 1) -> 1/1 or 1
11000000(es = 1) -> -1/1 or -1
00110000(es = 1) -> 1/2 or 0.5
00100000(es = 1) -> 1/4 or 0.25
00000001(es = 2) -> 1/16777216 or 5.9604644775391e-08
01111111(es = 2) -> 16777216/1 or 16777216
01111110(es = 7) -> 4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603776/1 or 4.5624406176222e+192
00000000000000000000000000000001(es = 2) -> 1/1329227995784915872903807060280344576 or 7.5231638452626e-37
01111111111111111111111111111111(es = 2) -> 1329227995784915872903807060280344576/1 or 1.3292279957849e+36
01111111111111111111111111111110(es = 5) -> 2269007733883335972287082669296112915239349672942191252221331572442536403137824056312817862695551072066953619064625508194663368599769448406663254670871573830845597595897613333042429214224697474472410882236254024057110212260250671521235807709272244389361641091086035023229622419456/1 or 2.2690077338833e+279