Posit numbers/decoding

Posit is a quantization of the real projective line proposed by John Gustafson in 2015. It is claimed to be an improvement over IEEE 754.

The purpose of this task is to write a program capable of decoding a posit number. You will use the example provided by Gustafson in his paper : 0b0000110111011101, representing a 16-bit long real number with three bits for the exponent. Once decoded, you should obtain either the fraction 477/134217728 or the floating point value 3.55393E−6.

Jeff Johnson from Facebook research, described posit numbers as such:

A more efficient representation for tapered floating points is the recent posit format by Gustafson. It has no explicit size field; the exponent is encoded using a Golomb-Rice prefix-free code, with the exponent

e

encoded as a Golomb-Rice quotient and remainder

(q,r)

with

q

in unary and

r

in binary (in posit terminology,

q

is the regime). Remainder encoding size is defined by the exponent scale

s

, where

2^{s}

is the Golomb-Rice divisor. Any space not used by the exponent encoding is used by the significand, which unlike IEEE 754 always has a leading 1; gradual underflow (and overflow) is handled by tapering. A posit number system is characterized by

(N,s)

, where

N

is the word length in bits and

s

is the exponent scale. The minimum and maximum positive finite numbers in

(N,s)

are Failed to parse (syntax error): {\displaystyle f_\mathrm{min} = 2^{−(N−2)2^s}} and Failed to parse (syntax error): {\displaystyle f_\mathrm{max} = 2^{(N−2)2^s}} . The number line is represented much as the projective reals, with a single point at

\pm \infty

bounding Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle −f_\mathrm{max}} and

f_{\mathrm {max} }

.

\pm \infty

and 0 have special encodings; there is no NaN. The number system allows any choice of

N\geq 3

and Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle 0\le s\le N − 3} .

s

controls the dynamic range achievable; e.g., 8-bit (8, 5)-posit

f_{\mathrm {max} }=2^{192}

is larger than

f_{\mathrm {max} }

in float32. (8, 0) and (8, 1) are more reasonable values to choose for 8-bit floating point representations, with

f_{\mathrm {max} }

of 64 and 4096 accordingly. Precision is maximized in the range Failed to parse (syntax error): {\displaystyle \pm\left[2^{−(s+1)}, 2^{s+1}\right)} with Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle N − 3 − s} significand fraction bits, tapering to no fraction bits at

\pm f_{\mathrm {max} }

.

— Jeff Johnson, Rethinking floating point for deep learning, Facebook research.

Julia

struct PositType3{T<:Integer}
    numbits::UInt16
    es::UInt16
    bits::T
    PositType3(nb, ne, i) = new{typeof(i)}(UInt16(nb), UInt16(ne), i)
end

""" From posithub.org/docs/Posits4.pdf """
function Base.Rational(p::PositType3)
  s = signbit(p.bits)                      # s for S signbit, is 1 if negative
  pabs = p.bits << 1                       # shift off signbit (adds a 0 to F at LSB)
  pabs == 0 && return s ? 1 // 0 : 0 // 1  # if p is 0, return 0 if s = 0, error if s = 1
  expsign = signbit(pabs)                  # exponent sign from 2nd bit now in MSB location
  k = expsign == 1 ? leading_ones(pabs) : leading_zeros(pabs) # regime R bit count
  scaling = 2^p.es * (expsign == 0 ? -1 : 1)
  pabs <<= (k + 1)                         # shift off unwanted R bits
  pabs >>= (k + 2)                         # shift back without the extra LSB bit
  fsize = p.numbits - k - p.es - 2         # check how many F bits are actually explicit
  fsize <= 0 && return 0 // 1              # missing F is 0
  f = (pabs & (2^fsize - 1)) // 2^fsize     # Get F value. Can be missing -> 0
  e = pabs >> fsize                        # Get E value. 
  pw = (1 - 2s) * (scaling * k + e + s)
  return pw >= 0 ? ((1 - 3s) + f) * 2^pw // 1 : ((1 - 3s) + f) // 2^(-pw)
end

@show Rational(PositType3(16, 3, 0b0000110111011101)) == 477 // 134217728

Output:

 Rational(PositType3(16, 3, 0x0ddd)) == 477 // 134217728 = true

Phix

with javascript_semantics
function twos_compliment_2_on(string bits, integer nbits)
    for i=2 to nbits do
        bits[i] = iff(bits[i]='0'?'1':'0')
    end for
    for i=nbits to 2 by -1 do
        if bits[i]='0' then
            bits[i] = '1'
            exit
        end if
        bits[i] = '0'
    end for
    return bits
end function

function posit_decode(integer nbits, es, object bits)
    --
    -- nbits: number of bits (aka n)
    -- es: exponent scale
    -- bits: (binary) integer or string of nbits 0|1
    --
    if not string(bits) then
        string fmt = sprintf("%%0%db",nbits)
        bits = sprintf(fmt,bits)
    end if
    assert(length(bits)==nbits)
    integer s = bits[1]='1'
    if s then bits = twos_compliment_2_on(bits,nbits) end if
    integer r = find(xor_bits(bits[2],1),bits,3)-2,
            b2z = bits[2]='0', k = iff(b2z?-r:r-1)
    if r<0 and b2z then
        if s then
            return {bits,"NaR"} -- aka inf
        end if
        return {bits,"zero"}
    end if
    integer estart = r+3,
           efinish = min(r+2+es,nbits),
          exponent = to_integer(bits[estart..efinish],0,2),
          fraction = to_integer(bits[efinish+1..$],0,2)
    atom useed = power(2,power(2,es)),
            fs = power(2,nbits-efinish)
    atom res = iff(s?-1:+1)*power(useed,k)*power(2,exponent)*(1+fraction/fs)
    return {bits,res}
end function

constant tests = {{16,3,0b0000110111011101},
                  {16,3,0b1000000000000000},
                  {16,3,0b0000000000000000},
                  {16,1,0b0110110010101000},
                  {16,1,0b1001001101011000},
                  {16,2,0b0000000000000001},
--                {16,0,0b0111111111111111}, -- nope
                  {16,6,0b0111111111111110},
                  {8,1,0b01000000},
                  {8,1,0b11000000},
                  {8,1,0b00110000},
                  {8,1,0b00100000},
                  {8,2,0b00000001},
--                {8,2,0b01111111},         -- nope
                  {8,7,0b01111110},
                  {32,2,0b00000000000000000000000000000001},
--                {32,2,0b01111111111111111111111111111111}} -- nope
                  {32,5,0b01111111111111111111111111111110}}
for t in tests do
    printf(1,"%s = %v\n",call_func(posit_decode,t))
end for

Output:

Be warned I could not get the largest positive values to match the Wikipedia page... and instead I've put in some bigger values, which are either wrong or prove that table on the wp page is wrong.

0000110111011101 = 3.553926944e-6
1000000000000000 = "NaR"
0000000000000000 = "zero"
0110110010101000 = 12.65625
1110110010101000 = -12.65625
0000000000000001 = 1.38777878e-17
0111111111111110 = 2.863890392e+250
01000000 = 1
11000000 = -1
00110000 = 0.5
00100000 = 0.25
00000001 = 5.960464478e-8
01111110 = 4.562440618e+192
00000000000000000000000000000001 = 7.523163846e-37
01111111111111111111111111111110 = 2.269007734e+279

raku

unit role Posit[UInt $N, UInt $es];

has UInt $.UInt;
method sign { self.UInt > 2**($N - 1) ?? -1 !! +1 }

method FatRat {
  return 0   if self.UInt == 0;
  my UInt $mask = 2**($N - 1);
  return Inf if self.UInt == $mask;
  my UInt $n = self.UInt;
  my $sign = $n +& $mask ?? -1 !! +1;
  my $r = $sign;
  $n = ((2**$n - 1) +^ $n) + 1 if self.sign < 0;
  my int $count = 0;
  $mask +>= 1;
  my Bool $first-bit = ?($n +& $mask);
  repeat { $count++; $mask +>= 1;
  } while ?($n +& $mask) == $first-bit && $mask;
  my $m = $count;
  my $k = $first-bit ?? $m - 1 !! -$m;
  $r *= 2**($k*2**$es);
  return $r unless $mask > 1;
  $mask +>= 1;
  $count = 0;
  my UInt $exponent = 0;
  while $mask && $count++ < $es {
    $exponent +<= 1;
    $exponent +|= 1 if $n +& $mask;
    $mask +>= 1;
  }
  $r *= 2**$exponent;
  my $fraction = 1.FatRat;
  while $mask {
    (state $power-of-two = 1) +<= 1;
    $fraction += 1/$power-of-two if $n +& $mask;
    $mask +>= 1;
  }
  $r *= $fraction;

  return $r;
}

CHECK {
  use Test;
  # example from L<http://www.johngustafson.net/pdfs/BeatingFloatingPoint.pdf>
  is Posit[16, 3]
    .new(UInt => 0b0000110111011101)
    .FatRat, 477.FatRat/134217728;
}

Output:

ok 1 -

Wren

Library: Wren-fmt

Library: Wren-big

import "./fmt" for Conv
import "./big" for BigRat

var posit16_decode = Fn.new { |ps, maxExpSize|
    var p = ps.map { |c| c == "0" ? 0 : 1 }.toList

    // Deal with exceptional values.
    if (p[1..-1].all { |i| i == 0 }) {
        return (p[0] == 0) ? 0 : Conv.infinity
    }

    // Convert bits after sign bit to two's complement if negative.
    if (p[0] == 1) {
        for (i in 1..15) p[i] = (p[i] == 0) ? 1 : 0
        for (i in 15..1) {
            if (p[i] == 1) {
                p[i] = 0
            } else {
                p[i] = 1
                break
            }
        }
    }
    var first = p[1]
    var rs = 15  // regime size
    for (i in 2..15) {
        if (p[i] != first) {
            rs = i - 1
            break
        }
    }
    var regime = p[1..rs]
    var es = (rs == 15) ? 0 : maxExpSize.min(14-rs)  // actual exponent size
    var exponent = [0]
    if (es > 0) exponent = p[rs + 2...rs + 2 + es]
    var fs = (es == 0) ? 0 : 14 - rs - es  // function size
    var s = (p[0] == 0) ? 1 : -1  // sign
    var k = regime.all { |i| i == 0 } ? -rs : rs - 1
    var u = 2.pow(2.pow(maxExpSize))
    var e = Conv.atoi(exponent.join(""), 2)
    var f = BigRat.one
    if (fs > 0) {
        var fraction = ps[-fs..-1]
        f = Conv.atoi(fraction.join(""), 2)
        f = BigRat.one + BigRat.new(f, 2.pow(fs))
    }
    return f * BigRat.new(u, 1).pow(k) * s * 2.pow(e)
}

var ps = "0000110111011101"
System.print(posit16_decode.call(ps, 3))

Output:

477/134217728