Move-to-front algorithm
Given a symbol table of a zero-indexed array of all possible input symbols
this algorithm reversibly transforms a sequence
of input symbols into an array of output numbers (indices).
The transform in many cases acts to give frequently repeated input symbols
lower indices which is useful in some compression algorithms.
- Encoding algorithm
for each symbol of the input sequence: output the index of the symbol in the symbol table move that symbol to the front of the symbol table
- Decoding algorithm
# Using the same starting symbol table for each index of the input sequence: output the symbol at that index of the symbol table move that symbol to the front of the symbol table
- Example
Encoding the string of character symbols 'broood' using a symbol table of the characters 'a'-to-'z'
Input | Output | SymbolTable |
---|---|---|
broood | 1 | 'abcdefghijklmnopqrstuvwxyz' |
broood | 1 17 | 'bacdefghijklmnopqrstuvwxyz' |
broood | 1 17 15 | 'rbacdefghijklmnopqstuvwxyz' |
broood | 1 17 15 0 | 'orbacdefghijklmnpqstuvwxyz' |
broood | 1 17 15 0 0 | 'orbacdefghijklmnpqstuvwxyz' |
broood | 1 17 15 0 0 5 | 'orbacdefghijklmnpqstuvwxyz' |
Decoding the indices back to the original symbol order:
Input | Output | SymbolTable |
---|---|---|
1 17 15 0 0 5 | b | 'abcdefghijklmnopqrstuvwxyz' |
1 17 15 0 0 5 | br | 'bacdefghijklmnopqrstuvwxyz' |
1 17 15 0 0 5 | bro | 'rbacdefghijklmnopqstuvwxyz' |
1 17 15 0 0 5 | broo | 'orbacdefghijklmnpqstuvwxyz' |
1 17 15 0 0 5 | brooo | 'orbacdefghijklmnpqstuvwxyz' |
1 17 15 0 0 5 | broood | 'orbacdefghijklmnpqstuvwxyz' |
- Task
- Encode and decode the following three strings of characters using the symbol table of the
characters 'a'-to-'z' as above.
- Show the strings and their encoding here.
- Add a check to ensure that the decoded string is the same as the original.
The strings are: 'broood', 'bananaaa', and 'hiphophiphop'. <(Note the spellings).
D
<lang d>import std.stdio, std.string, std.ascii, std.algorithm;
ptrdiff_t[] mtfEncoder(in string data) pure nothrow @safe in {
assert(data.countchars("a-z") == data.length);
} out(result) {
assert(result.length == data.length); assert(result.all!(e => e >= 0 && e < lowercase.length));
} body {
ubyte[lowercase.length] order = lowercase.representation; auto encoded = new typeof(return)(data.length);
size_t i = 0; foreach (immutable b; data) { immutable j = encoded[i++] = order[].countUntil(b); bringToFront(order[0 .. j], order[j .. j + 1]); }
return encoded;
}
string mtfDecoder(in ptrdiff_t[] encoded) pure nothrow @safe in {
assert(encoded.all!(e => e >= 0 && e < lowercase.length));
} out(result) {
assert(result.length == encoded.length); assert(result.countchars("a-z") == result.length);
} body {
ubyte[lowercase.length] order = lowercase.representation; auto decoded = new char[encoded.length];
size_t i = 0; foreach (immutable code; encoded) { decoded[i++] = order[code]; bringToFront(order[0 .. code], order[code .. code + 1]); }
return decoded;
}
void main() {
foreach (immutable word; ["broood", "bananaaa", "hiphophiphop"]) { immutable encoded = word.mtfEncoder; immutable decoded = encoded.mtfDecoder; writefln("'%s' encodes to %s, which decodes back to '%s'", word, encoded, decoded); assert(word == decoded); }
}</lang>
- Output:
'broood' encodes to [1, 17, 15, 0, 0, 5], which decodes back to 'broood' 'bananaaa' encodes to [1, 1, 13, 1, 1, 1, 0, 0], which decodes back to 'bananaaa' 'hiphophiphop' encodes to [7, 8, 15, 2, 15, 2, 2, 3, 2, 2, 3, 2], which decodes back to 'hiphophiphop'
Go
<lang go>package main
import (
"bytes" "fmt"
)
type moveToFront string
func (symbols moveToFront) encode(s string) []int {
seq := make([]int, len(s)) pad := []byte(symbols) c1 := []byte{0} for i := 0; i < len(s); i++ { c := s[i] c1[0] = c x := bytes.Index(pad, c1) seq[i] = x copy(pad[1:], pad[:x]) pad[0] = c } return seq
} func (symbols moveToFront) decode(seq []int) string {
chars := make([]byte, len(seq)) pad := []byte(symbols) for i, x := range seq { c := pad[x] chars[i] = c copy(pad[1:], pad[:x]) pad[0] = c } return string(chars)
}
func main() {
m := moveToFront("abcdefghijklmnopqrstuvwxyz") for _, s := range []string{"broood", "bananaaa", "hiphophiphop"} { enc := m.encode(s) dec := m.decode(enc) fmt.Println(s, enc, dec) if dec != s { panic("Whoops!") } }
}</lang>
- Output:
broood [1 17 15 0 0 5] broood bananaaa [1 1 13 1 1 1 0 0] bananaaa hiphophiphop [7 8 15 2 15 2 2 3 2 2 3 2] hiphophiphop
J
<lang J>spindizzy=:3 :0
'seq table'=. y ndx=.$0 orig=. table for_sym. seq do. ndx=.ndx,table i.sym table=. sym,table-.sym end. ndx assert. seq-:yzzidnips ndx;orig
)
yzzidnips=:3 :0
'ndx table'=. y seq=. for_n. ndx do. seq=.seq,sym=. n{table table=. sym,table-.sym end. seq
)</lang>
Required examples:
<lang J> spindizzy 'broood';'abcdefghijklmnopqrstuvwxyz' 1 17 15 0 0 5
spindizzy 'bananaaa';'abcdefghijklmnopqrstuvwxyz'
1 1 13 1 1 1 0 0
spindizzy 'hiphophiphop';'abcdefghijklmnopqrstuvwxyz'
7 8 15 2 15 2 2 3 2 2 3 2</lang>
It's not clear, though, why anyone would think that this is any better than lookups against an unmodified symbol table.
Python
<lang python>from __future__ import print_function from string import ascii_lowercase
SYMBOLTABLE = list(ascii_lowercase)
def move2front_encode(strng, symboltable):
sequence, pad = [], symboltable[::] for char in strng: indx = pad.index(char) sequence.append(indx) pad = [pad.pop(indx)] + pad return sequence
def move2front_decode(sequence, symboltable):
chars, pad = [], symboltable[::] for indx in sequence: char = pad[indx] chars.append(char) pad = [pad.pop(indx)] + pad return .join(chars)
if __name__ == '__main__':
for s in ['broood', 'bananaaa', 'hiphophiphop']: encode = move2front_encode(s, SYMBOLTABLE) print('%14r encodes to %r' % (s, encode), end=', ') decode = move2front_decode(encode, SYMBOLTABLE) print('which decodes back to %r' % decode) assert s == decode, 'Whoops!'</lang>
- Output:
'broood' encodes to [1, 17, 15, 0, 0, 5], which decodes back to 'broood' 'bananaaa' encodes to [1, 1, 13, 1, 1, 1, 0, 0], which decodes back to 'bananaaa' 'hiphophiphop' encodes to [7, 8, 15, 2, 15, 2, 2, 3, 2, 2, 3, 2], which decodes back to 'hiphophiphop'