Huffman coding

Huffman encoding is a way to assign binary codes to symbols that reduces the overall number of bits used to encode a typical string of those symbols.

For example, if you use letters as symbols and have details of the frequency of occurence of those letters in typical strings, then you could just encode each letter with a fixed number of bits, such as in ASCII codes. You can do better than this by encoding more frequently occurring letters such as e and a, with smaller bit strings; and less frequently occurring letters such as q and x with longer bit strings.

Any string of letters will be encoded as a string of bits that are no-longer of the same length per letter. To successfully decode such as string, the smaller codes assigned to letters such as 'e' cannot occur as a prefix in the larger codes such as that for 'x'.

If you were to assign a code 01 for 'e' and code 011 for 'x', then if the bits to decode started as 011... then you would not know iif you should decode an 'e' or an 'x'.

The Huffman coding scheme takes each symbol and its weight (or frequency of occurrence), and generates proper encodings for each symbol taking account of the weights of each symbol, so that higher weighted symbols have less bits in their encoding. (See the WP article for more information).

A Huffman encoding can be computed by first creating a tree of nodes:

Create a leaf node for each symbol and add it to the priority queue.
While there is more than one node in the queue:
1. Remove the node of highest priority (lowest probability) twice to get two nodes.
2. Create a new internal node with these two nodes as children and with probability equal to the sum of the two nodes' probabilities.
3. Add the new node to the queue.
The remaining node is the root node and the tree is complete.

Traverse the constructed binary tree from root to leaves assigning and accumulating a '0' for one branch and a '1' for the other at each node. The accumulated zeroes and ones at each leaf constitute a Huffman encoding for those symbols and weights:

Using the characters and their frequency from the string "this is an example for huffman encoding", create a program to generate a Huffman encoding for each character as a table.

C

This code lacks a lot of needed checkings, expecially for memory allocation.

<lang c>#include <stdio.h>

include <stdlib.h>
include <string.h>

define BYTES 256

struct huffcode {

 int nbits;
 int code;

}; typedef struct huffcode huffcode_t;

struct huffheap {

 int *h;
 int n, s, cs;
 long *f;

}; typedef struct huffheap heap_t;

/* heap handling funcs */ static heap_t *_heap_create(int s, long *f) {

 heap_t *h;
 h = malloc(sizeof(heap_t));
 h->h = malloc(sizeof(int)*s);
 h->s = h->cs = s;
 h->n = 0;
 h->f = f;
 return h;

}

static void _heap_destroy(heap_t *heap) {

 free(heap->h);
 free(heap);

}

define swap_(I,J) do { int t_; t_ = a[(I)]; \

     a[(I)] = a[(J)]; a[(J)] = t_; } while(0)

static void _heap_sort(heap_t *heap) {

 int i=1, j=2; /* gnome sort */
 int *a = heap->h;

 while(i < heap->n) { /* smaller values are kept at the end */
   if ( heap->f[a[i-1]] >= heap->f[a[i]] ) {
     i = j; j++;
   } else {
     swap_(i-1, i);
     i--;
     i = (i==0) ? j++ : i;
   }
 }

}

undef swap_

static void _heap_add(heap_t *heap, int c) {

 if ( (heap->n + 1) > heap->s ) {
   heap->h = realloc(heap->h, heap->s + heap->cs);
   heap->s += heap->cs;
 }
 heap->h[heap->n] = c;
 heap->n++;
 _heap_sort(heap);

}

static int _heap_remove(heap_t *heap) {

 if ( heap->n > 0 ) {
   heap->n--;
   return heap->h[heap->n];
 }
 return -1;

}

/* huffmann code generator */ huffcode_t **create_huffman_codes(long *freqs) {

 huffcode_t **codes;
 heap_t *heap;
 long efreqs[BYTES*2];
 int preds[BYTES*2];
 int i, extf=BYTES;
 int r1, r2;

 memcpy(efreqs, freqs, sizeof(long)*BYTES);
 memset(&efreqs[BYTES], 0, sizeof(long)*BYTES);

 heap = _heap_create(BYTES*2, efreqs);
 if ( heap == NULL ) return NULL;

 for(i=0; i < BYTES; i++) if ( efreqs[i] > 0 ) _heap_add(heap, i);

 while( heap->n > 1 )
 {
   r1 = _heap_remove(heap);
   r2 = _heap_remove(heap);
   efreqs[extf] = efreqs[r1] + efreqs[r2];
   _heap_add(heap, extf);
   preds[r1] = extf;
   preds[r2] = -extf;
   extf++;
 }
 r1 = _heap_remove(heap);
 preds[r1] = r1;
 _heap_destroy(heap);

 codes = malloc(sizeof(huffcode_t *)*BYTES);

 int bc, bn, ix;
 for(i=0; i < BYTES; i++) {
   bc=0; bn=0;
   if ( efreqs[i] == 0 ) { codes[i] = NULL; continue; }
   ix = i;
   while( abs(preds[ix]) != ix ) {
     bc |= ((preds[ix] >= 0) ? 1 : 0 ) << bn;
     ix = abs(preds[ix]);
     bn++;
   }
   codes[i] = malloc(sizeof(huffcode_t));
   codes[i]->nbits = bn;
   codes[i]->code = bc;
 }
 return codes;

}

void free_huffman_codes(huffcode_t **c) {

 int i;

 for(i=0; i < BYTES; i++) if (c[i] != NULL) free(c[i]);
 free(c);

}

define MAXBITSPERCODE 100

void inttobits(int c, int n, char *s) {

 s[n] = 0;
 while(n > 0) {
   s[n-1] = (c%2) + '0';
   c >>= 1; n--;
 }

}

const char *test = "this is an example for huffman encoding";

int main() {

 huffcode_t **r;
 int i;
 char strbit[MAXBITSPERCODE];
 const char *p;
 long freqs[BYTES];

 memset(freqs, 0, sizeof freqs);

 p = test;
 while(*p != '\0') freqs[*p++]++;

 r = create_huffman_codes(freqs);

 for(i=0; i < BYTES; i++) {
   if ( r[i] != NULL ) {
     inttobits(r[i]->code, r[i]->nbits, strbit);
     printf("%c (%d) %s\n", i, r[i]->code, strbit);
   }
 }

 free_huffman_codes(r);

 return 0;

}</lang>

Common Lisp

This implementation uses a tree built of huffman-nodes, and a hash table mapping from elements of the input sequence to huffman-nodes. The priority queue is implemented as a sorted list. (For a more efficient implementation of a priority queue, see the Heapsort task.)

<lang lisp>(defstruct huffman-node

 (weight 0 :type number)
 (element nil :type t)
 (encoding nil :type (or null bit-vector))
 (left nil :type (or null huffman-node))
 (right nil :type (or null huffman-node)))

(defun initial-huffman-nodes (sequence &key (test 'eql))

 (let* ((length (length sequence))
        (increment (/ 1 length))
        (nodes (make-hash-table :size length :test test))
        (queue '()))
   (map nil #'(lambda (element)
                (multiple-value-bind (node presentp) (gethash element nodes)
                  (if presentp
                    (incf (huffman-node-weight node) increment)
                    (let ((node (make-huffman-node :weight increment
                                                   :element element)))
                      (setf (gethash element nodes) node
                            queue (list* node queue))))))
        sequence)
   (values nodes (sort queue '< :key 'huffman-node-weight))))

(defun huffman-tree (sequence &key (test 'eql))

 (multiple-value-bind (nodes queue)
     (initial-huffman-nodes sequence :test test)
   (do () ((endp (rest queue)) (values nodes (first queue)))
     (destructuring-bind (n1 n2 &rest queue-rest) queue
       (let ((n3 (make-huffman-node
                  :left n1
                  :right n2
                  :weight (+ (huffman-node-weight n1)
                             (huffman-node-weight n2)))))
         (setf queue (merge 'list (list n3) queue-rest '<
                            :key 'huffman-node-weight)))))))1

(defun huffman-codes (sequence &key (test 'eql))

 (multiple-value-bind (nodes tree)
     (huffman-tree sequence :test test)
   (labels ((hc (node length bits)
              (let ((left (huffman-node-left node))
                    (right (huffman-node-right node)))
                (cond
                 ((and (null left) (null right))
                  (setf (huffman-node-encoding node)
                        (make-array length :element-type 'bit
                                    :initial-contents (reverse bits))))
                 (t (hc left (1+ length) (list* 0 bits))
                    (hc right (1+ length) (list* 1 bits)))))))
     (hc tree 0 '())
     nodes)))

(defun print-huffman-code-table (nodes &optional (out *standard-output*))

 (format out "~&Element~10tWeight~20tCode")
 (loop for node being each hash-value of nodes
       do (format out "~&~s~10t~s~20t~s"
                  (huffman-node-element node)
                  (huffman-node-weight node)
                  (huffman-node-encoding node))))</lang>

Example:

> (print-huffman-code-table 
   (huffman-codes "this is an example for huffman encoding"))
Element   Weight    Code
#\t       1/39      #*10010
#\d       1/39      #*01101
#\m       2/39      #*0100
#\f       1/13      #*1100
#\o       2/39      #*0111
#\x       1/39      #*100111
#\h       2/39      #*1000
#\a       1/13      #*1010
#\s       2/39      #*0101
#\c       1/39      #*00010
#\l       1/39      #*00001
#\u       1/39      #*00011
#\e       1/13      #*1101
#\n       4/39      #*001
#\g       1/39      #*01100
#\p       1/39      #*100110
#\i       1/13      #*1011
#\r       1/39      #*00000
#\Space   2/13      #*111

J

Implementation from http://www.jsoftware.com/jwiki/Essays/Huffman%20Coding

<lang J>hc=: 4 : 0

if. 1=#x do. y
else. ((i{x),+/j{x) hc (i{y),<j{y [ i=. (i.#x) -. j=. 2{./:x end.

)

hcodes=: 4 : 0

assert. x -:&$ y           NB. weights and words have same shape
assert. (0<:x) *. 1=#$x    NB. weights are non-negative
assert. 1 >: L.y           NB. words are boxed not more than once
w=. ,&.> y                 NB. standardized words
assert. w -: ~.w           NB. words are unique
t=. 0 {:: x hc w           NB. minimal weight binary tree
((< S: 0 t) i. w) { <@(1&=)@; S: 1 {:: t

)</lang>

   ;"1":L:0(#/.~ (],.(<'    '),.hcodes) ,&.>@~.)'this is an example for huffman encoding'
t    0 1 0 1 0
h    1 1 1 1 1
i    1 0 0 1  
s    0 0 1 0  
     1 0 1    
a    1 1 0 0  
n    0 0 0    
e    1 1 0 1  
x    0 1 0 1 1
m    0 0 1 1  
p    0 1 1 0 0
l    0 1 1 0 1
f    1 1 1 0  
o    0 1 0 0  
r    0 1 1 1 0
u    0 1 1 1 1
c    1 0 0 0 0
d    1 0 0 0 1
g    1 1 1 1 0

Java

This implementation creates an actual tree structure, and then traverses the tree to recover the code. <lang java>import java.util.*;

abstract class HuffmanTree implements Comparable<HuffmanTree> {

   public int frequency; // the frequency of this tree
   public HuffmanTree(int freq) { frequency = freq; }

   // compares on the frequency
   public int compareTo(HuffmanTree tree) {
       return frequency - tree.frequency;
   }

}

class HuffmanLeaf extends HuffmanTree {

   public char value; // the character this leaf represents
  
   public HuffmanLeaf(int freq, char val) {
       super(freq);
       value = val;
   }

}

class HuffmanNode extends HuffmanTree {

   public HuffmanTree left, right; // subtrees
  
   public HuffmanNode(HuffmanTree l, HuffmanTree r) {
       super(l.frequency + r.frequency);
       left = l;
       right = r;
   }

}

public class HuffmanCode {

   // input is an array of frequencies, indexed by character code
   public static HuffmanTree buildTree(int[] charFreqs) {
       PriorityQueue<HuffmanTree> trees = new PriorityQueue<HuffmanTree>();
       // initially, we have a forest of leaves
       // one for each non-empty character
       for (int i = 0; i < charFreqs.length; i++)
           if (charFreqs[i] > 0)
               trees.offer(new HuffmanLeaf(charFreqs[i], (char)i));

       assert trees.size() > 0;
       // loop until there is only one tree left
       while (trees.size() > 1) {
           // two trees with least frequency
           HuffmanTree a = trees.poll();
           HuffmanTree b = trees.poll();

           // put into new node and re-insert into queue
           trees.offer(new HuffmanNode(a, b));
       }
       return trees.poll();
   }

   public static void printCodes(HuffmanTree tree, Stack<Character> prefix) {
       assert tree != null;
       if (tree instanceof HuffmanLeaf) {
           HuffmanLeaf leaf = (HuffmanLeaf)tree;

           // print out character and frequency
           System.out.print(leaf.value + "\t" + leaf.frequency + "\t");

           // print out code for this leaf, which is just the prefix
           for (char bit : prefix)
               System.out.print(bit);
           System.out.println();

       } else if (tree instanceof HuffmanNode) {
           HuffmanNode node = (HuffmanNode)tree;

           // traverse left
           prefix.push('0');
           printCodes(node.left, prefix);
           prefix.pop();

           // traverse right
           prefix.push('1');
           printCodes(node.right, prefix);
           prefix.pop();
       }
   }

   public static void main(String[] args) {
       String test = "this is an example for huffman encoding";

       // we will assume that all our characters will have
       // code less than 256, for simplicity
       int[] charFreqs = new int[256];
       // read each character and record the frequencies
       for (char c : test.toCharArray())
           charFreqs[c]++;

       // build tree
       HuffmanTree tree = buildTree(charFreqs);

       // print out results
       System.out.println("SYMBOL\tWEIGHT\tHUFFMAN CODE");
       printCodes(tree, new Stack<Character>());
   }

}</lang>

Example output:

SYMBOL	WEIGHT	HUFFMAN CODE
d	1	00000
t	1	00001
h	2	0001
s	2	0010
c	1	00110
x	1	00111
m	2	0100
o	2	0101
n	4	011
u	1	10000
l	1	10001
a	3	1001
r	1	10100
g	1	101010
p	1	101011
e	3	1011
i	3	1100
f	3	1101
 	6	111

Python

A slight modification of the method outlined in the task description allows the code to be accumulated as the heap is manipulated.

The output is sorted first on length of the code, then on the symbols.

<lang python>from heapq import heappush, heappop, heapify import collections

def encode(symbol2weights):

    Huffman encode the given dict mapping symbols to weights 
   heap = [ [float(wt), [sym, ]] for sym, wt in symbol2weights.iteritems() ]
   heapify(heap)
   while len(heap) >1:
       lo = heappop(heap)
       hi = heappop(heap)
       for i in lo[1:]: i[1] = '0' + i[1]
       for i in hi[1:]: i[1] = '1' + i[1]
       lohi = [ lo[0] + hi[0] ] + lo[1:] + hi[1:]
       heappush(heap, lohi)
   return sorted(heappop(heap)[1:], key=lambda x: (len(x[-1]), x))

astring = "this is an example for huffman encoding" symbol2weights = collections.defaultdict(int) for ch in astring:

   symbol2weights[ch] += 1

in Python 3.1+:
symbol2weights = collections.Counter(astring)

huff = encode(symbol2weights) print "\nSYMBOL\tWEIGHT\tHUFFMAN CODE" for h in huff:

   print "%s\t%s\t%s" % (h[0], symbol2weights[h[0]], h[1])</lang>

Example output:

SYMBOL	WEIGHT	HUFFMAN CODE
 	6	101
n	4	010
a	3	1001
e	3	1100
f	3	1101
h	2	0001
i	3	1110
m	2	0010
o	2	0011
s	2	0111
g	1	00000
l	1	00001
p	1	01100
r	1	01101
t	1	10000
u	1	10001
x	1	11110
c	1	111110
d	1	111111

An extension to the method outlined above is given here.

Ruby

Uses a

Library: RubyGems

package PriorityQueue

<lang ruby>require 'priority_queue'

def huffman_encoding(str)

 char_count = Hash.new(0)
 str.each_char {|c| char_count[c] += 1}
 
 pq = CPriorityQueue.new
 # chars with fewest count have highest priority
 char_count.each {|char, count| pq.push(char, count)}
 
 while pq.length > 1
   key1, prio1 = pq.delete_min
   key2, prio2 = pq.delete_min
   pq.push([key1, key2], prio1 + prio2)
 end
 
 Hash[*generate_encoding(pq.min_key)]

end

def generate_encoding(ary, prefix="")

 case ary
 when Array
   generate_encoding(ary[0], "#{prefix}0") + generate_encoding(ary[1], "#{prefix}1")
 else
   [ary, prefix]
 end

end

def encode(str, encoding)

 str.each_char.inject("") {|encoded, char| encoded << encoding[char]}

end

def decode(encoded, encoding)

 rev_enc = Hash[*encoding.to_a.flatten.reverse]
 decoded = ""
 pos = 0
 while pos < encoded.length
   key = ""
   while rev_enc[key].nil?
     key << encoded[pos]
     pos += 1
   end
   decoded << rev_enc[key]
 end
 decoded

end

str = "this is an example for huffman encoding" encoding = huffman_encoding(str) encoding.to_a.sort.each {|x| p x}

enc = encode(str, encoding) dec = decode(enc, encoding) puts "success!" if str == dec</lang>

[" ", "111"]
["a", "1011"]
["c", "00001"]
["d", "00000"]
["e", "1101"]
["f", "1100"]
["g", "00100"]
["h", "1000"]
["i", "1001"]
["l", "01110"]
["m", "10101"]
["n", "010"]
["o", "0001"]
["p", "00101"]
["r", "00111"]
["s", "0110"]
["t", "00110"]
["u", "01111"]
["x", "10100"]
success!

Scala

Works with: scala version 2.8

<lang scala>object Huffman {

 import scala.collection.mutable.{Map, PriorityQueue}
 
 sealed abstract class Tree
 case class Node(left: Tree, right: Tree) extends Tree
 case class Leaf(c: Char) extends Tree
 
 def treeOrdering(m: Map[Tree, Int]) = new Ordering[Tree] { 
    def compare(x: Tree, y: Tree) = m(y).compare(m(x))
 }

 def stringMap(text: String) = text groupBy (x => Leaf(x) : Tree) mapValues (_.length)
 
 def buildNode(queue: PriorityQueue[Tree], map: Map[Tree,Int]) {
   val right = queue.dequeue
   val left = queue.dequeue
   val node = Node(left, right)
   map(node) = map(left) + map(right)
   queue.enqueue(node)
 }

 def codify(tree: Tree, map: Map[Tree, Int]) = {
   def recurse(tree: Tree, prefix: String): List[(Char, (Int, String))] = tree match {
     case Node(left, right) => recurse(left, prefix+"0") ::: recurse(right, prefix+"1")
     case leaf @ Leaf(c) => c -> ((map(leaf), prefix)) :: Nil
   }
   recurse(tree, "")
 }

 def encode(text: String) = {
   val map = Map.empty[Tree,Int] ++= stringMap(text)
   val queue = new PriorityQueue[Tree]()(treeOrdering(map)) ++= map.keysIterator
   
   while(queue.size > 1) {
     buildNode(queue, map)
   }
   codify(queue.dequeue, map)
 }
 
 
 def main(args: Array[String]) {
   val text = "this is an example for huffman encoding"
   val code = encode(text)
   println("Char\tWeight\t\tEncoding")
   code sortBy (_._2._1) foreach { 
     case (c, (weight, encoding)) => println("%c:\t%3d/%-3d\t\t%s" format (c, weight, text.length, encoding)) 
   }
 }

}</lang>

Output:

Char    Weight          Encoding
t:        1/39          011000
p:        1/39          011001
r:        1/39          01101
c:        1/39          01110
x:        1/39          01111
g:        1/39          10110
l:        1/39          10111
u:        1/39          11000
d:        1/39          11001
o:        2/39          1010
s:        2/39          1101
m:        2/39          1110
h:        2/39          1111
f:        3/39          0000
a:        3/39          0001
e:        3/39          0010
i:        3/39          0011
n:        4/39          100
 :        6/39          010

SETL

<lang SETL>var forest := {}, encTab := {};

plaintext := 'this is an example for huffman encoding';

ft := {}; (for c in plaintext)

 ft(c) +:= 1;

end;

forest := {[f, c]: [c, f] in ft}; (while 1 < #forest)

 [f1, n1] := getLFN();
 [f2, n2] := getLFN();
 forest with:= [f1+f2, [n1,n2]];

end; addToTable(, arb range forest);

(for e = encTab(c))

 print(c, ft(c), e);

end;

print(+/ [encTab(c): c in plaintext]);

proc addToTable(prefix, node);

 if is_tuple node then
   addToTable(prefix + '0', node(1));
   addToTable(prefix + '1', node(2));
 else
   encTab(node) := prefix;
 end;

end proc;

proc getLFN();

 f := min/ domain forest;
 n := arb forest{f};
 forest less:= [f, n];
 return [f, n];

end proc;</lang>

Standard ML

Works with: SML/NJ

<lang sml>datatype 'a huffman_tree = Leaf of int * 'a

                        | Node of int * 'a huffman_tree * 'a huffman_tree

fun freq (Leaf (f, _)) = f

 | freq (Node (f, _, _)) = f

structure HuffmanPriority = struct

 type priority = int

(* reverse comparision to achieve min-heap *)

 fun compare (a, b) = Int.compare (b, a)
 type item = char huffman_tree
 val priority = freq

end

structure HPQueue = LeftPriorityQFn (HuffmanPriority)

fun buildTree charFreqs = let

 fun aux trees = let
   val (a, trees) = HPQueue.remove trees
 in
   if HPQueue.isEmpty trees then
     a
   else let
       val (b, trees) = HPQueue.remove trees
       val trees = HPQueue.insert (Node (freq a + freq b, a, b),
                                   trees)
     in
       aux trees
     end
 end
 val trees = HPQueue.fromList (map (fn (c,f) => Leaf (f,c)) charFreqs)

in

 aux trees

end

fun printCodes (revPrefix, Leaf (f, c)) =

     print (String.str c ^ "\t" ^ Int.toString f ^ "\t" ^
            implode (rev revPrefix) ^ "\n")
 | printCodes (revPrefix, Node (_, l, r)) = (
     printCodes (#"0"::revPrefix, l);
     printCodes (#"1"::revPrefix, r)
   );

let

 val test = "this is an example for huffman encoding"
 val charFreqs = HashTable.mkTable
                   (HashString.hashString o String.str, op=)
                   (42, Empty)
 val () =
   app (fn c =>
         let val old = getOpt (HashTable.find charFreqs c, 0)
         in HashTable.insert charFreqs (c, old+1)
         end)
       (explode test)
 val tree = buildTree (HashTable.listItemsi charFreqs)

in

 print "SYMBOL\tWEIGHT\tHUFFMAN CODE\n";
 printCodes ([], tree)

end</lang>

Tcl

uses package struct::prioqueue from

Library: tcllib

<lang tcl>package require Tcl 8.5 package require struct::prioqueue

proc huffmanEncode {str args} {

   array set opts [concat -dump false $args]
   
   set charcount [dict create]
   foreach char [split $str ""] {
       dict incr charcount $char
   }
   
   set pq [struct::prioqueue -dictionary] ;# want lower values to have higher priority
   dict for {char count} $charcount {
       $pq put $char $count
   }
   
   while {[$pq size] > 1} {
       lassign [$pq peekpriority 2] p1 p2
       $pq put [$pq get 2] [expr {$p1 + $p2}]
   }
   
   set encoding [walkTree [$pq get]]
   set map [dict create {*}[lreverse $encoding]]
   
   if {$opts(-dump)} {
       foreach key [lsort -command compare [dict keys $map]] {
           set char [dict get $map $key]
           puts "$char\t[dict get $charcount $char]\t$key"
       }
   }
   
   return $encoding

}

proc walkTree {tree {prefix ""}} {

   if {[llength $tree] < 2} {
       return [list $tree $prefix]
   }
   lassign $tree left right
   return [concat [walkTree $left "${prefix}0"] [walkTree $right "${prefix}1"]]

}

proc compare {a b} {

   if {[string length $a] < [string length $b]} {return -1}
   if {[string length $a] > [string length $b]} {return  1}
   return [string compare $a $b]

}

set str "this is an example for huffman encoding"

set encoding [huffmanEncode $str -dump true]

puts $str puts [string map $encoding $str]</lang> outputs

n	4	000
 	6	101
s	2	0010
m	2	0011
o	2	0100
i	3	1001
a	3	1100
e	3	1101
f	3	1110
t	1	01010
x	1	01011
p	1	01100
l	1	01101
r	1	01110
u	1	01111
c	1	10000
d	1	10001
g	1	11110
h	2	11111
this is an example for huffman encoding
0101011111100100101011001001010111000001011101010111100001101100011011101101111001000111010111111011111110111000111100000101110100010000010010001100100011110

Ursala

following the algorithm given above <lang Ursala>

import std
import nat
import flo

code_table = # takes a training dataset to a table <char: code...>

-+

  *^ ~&v?\~&iNC @v ~&t?\~&h ~&plrDSLrnPlrmPCAS/'01',
  ~&itB->h fleq-<&d; ^C\~&tt @hthPX ^V\~&lrNCC plus@bd,
  ^V(div@rrPlX,~&rlNVNC)^*D(plus:-0.@rS,~&)+ *K2 ^/~&h float+ length+-

cast %csAL

table = code_table 'this is an example for huffman encoding'</lang> output:

<
   `r: '00000',
   `l: '00001',
   `c: '00010',
   `u: '00011',
   `n: '001',
   `m: '0100',
   `h: '0101',
   `g: '01100',
   `d: '01101',
   `o: '0111',
   `s: '1000',
   `t: '10010',
   `p: '100110',
   `x: '100111',
   `a: '1010',
   `f: '1011',
   `i: '1100',
   `e: '1101',
   ` : '111'>