Consistent overhead byte stuffing: Difference between revisions

Content added Content deleted

Inline

Revision as of 16:16, 24 September 2023

The Consistent Overhead Byte Stuffing (COBS) algorithm is used to demarcate a byte stream into frames. Examples can be found in its Wikipedia article.

Task

To encode, instances of the byte 0x00 in the unencoded stream are substituted with the number of bytes until the next instance of 0x00 (we'll call this a milestone). If 0x00 does not appear within the next 254 bytes after a milestone the algorithm must emit an extra milestone at the 255th byte. Of the encoded output, the first byte is the first milestone, and the final milestone will be the number of bytes until the final byte - 0x00.

Bonus tasks

Decode an encoded stream, including error-check functionality.
Support a non-zero marker

C

Encoder:

#include <stddef.h>
#include <stdint.h>
#include <assert.h>

/** COBS encode data to buffer
	@param data Pointer to input data to encode
	@param length Number of bytes to encode
	@param buffer Pointer to encoded output buffer
	@return Encoded buffer length in bytes
	@note Does not output delimiter byte
*/
size_t cobsEncode(const void *data, size_t length, uint8_t *buffer)
{
	assert(data && buffer);

	uint8_t *encode = buffer; // Encoded byte pointer
	uint8_t *codep = encode++; // Output code pointer
	uint8_t code = 1; // Code value

	for (const uint8_t *byte = (const uint8_t *)data; length--; ++byte)
	{
		if (*byte) // Byte not zero, write it
			*encode++ = *byte, ++code;

		if (!*byte || code == 0xff) // Input is zero or block completed, restart
		{
			*codep = code, code = 1, codep = encode;
			if (!*byte || length)
				++encode;
		}
	}
	*codep = code; // Write final code value

	return (size_t)(encode - buffer);
}

Decoder:

/** COBS decode data from buffer
	@param buffer Pointer to encoded input bytes
	@param length Number of bytes to decode
	@param data Pointer to decoded output data
	@return Number of bytes successfully decoded
	@note Stops decoding if delimiter byte is found
*/
size_t cobsDecode(const uint8_t *buffer, size_t length, void *data)
{
	assert(buffer && data);

	const uint8_t *byte = buffer; // Encoded input byte pointer
	uint8_t *decode = (uint8_t *)data; // Decoded output byte pointer

	for (uint8_t code = 0xff, block = 0; byte < buffer + length; --block)
	{
		if (block) // Decode block byte
			*decode++ = *byte++;
		else
		{
			if (code != 0xff) // Encoded zero, write it
				*decode++ = 0;
			block = code = *byte++; // Next block length
			if (!code) // Delimiter code found
				break;
		}
	}

	return (size_t)(decode - (uint8_t *)data);
}

Insitux

(function COBS unencoded
  (let chunk   (take-until [0] unencoded)
       length  (len chunk)
       encoded (append (first 254 chunk) (or %1 []))
       more?   (< length (len unencoded)))
  (if (or (and (= length 254) more?) (> length 254))
    (recur (skip 254 unencoded) encoded)
    (if more?
      (recur (skip (inc length) unencoded) encoded)
      (append 0 (flat-map @(.. vec (inc (len %))) encoded)))))

Unit tests:

(for [a b] [
    [[0x00] [0x01 0x01 0x00]]
    [[0x00 0x00] [0x01 0x01 0x01 0x00]]
    [[0x00 0x11 0x00] [0x01 0x02 0x11 0x01 0x00]]
    [[0x11 0x22 0x00 0x33] [0x03 0x11 0x22 0x02 0x33 0x00]]
    [[0x11 0x22 0x33 0x44] [0x05 0x11 0x22 0x33 0x44 0x00]]
    [[0x11 0x00 0x00 0x00] [0x02 0x11 0x01 0x01 0x01 0x00]]
    [(range 1 255) (.. vec 0xFF (range 1 255) 0x00)]
    [(range 255) (.. vec 0x01 0xFF (range 1 255) 0x00)]
    [(range 1 256) (.. vec 0xFF (range 1 255) 0x02 0xFF 0x00)]
    [(.. vec (range 2 256) 0x00) (.. vec 0xFF (range 2 256) 0x01 0x01 0x00)]
    [(.. vec (range 3 256) 0x00 0x01) (.. vec 0xFE (range 3 256) 0x02 0x01 0x00)]
  ]
  (assert (= (COBS a) b)))

Phix

with javascript_semantics
function cobs_encode(sequence data)
    sequence buffer = {-1}
    integer cdx = 1, code = 1, addlast = true
    for b in data do
        if b then
            buffer &= b
            code += 1
        end if
        addlast = true
        if b=0 or code=0xFF then
            addlast = code!=0xFF
            buffer[cdx] = code
            code = 1
            buffer &= -1
            cdx = length(buffer)
        end if
    end for
    buffer[cdx] = iff(addlast?code:0)
    if addlast then
        buffer &= 0
    end if
    return buffer
end function

function cobs_decode(sequence buffer)
    sequence data = {}
    integer bdx = 1, l = length(buffer)
    while bdx<l do
        integer code = buffer[bdx]
        bdx += 1
        for i=1 to code-1 do
            data &= buffer[bdx]
            bdx += 1
        end for
        if code<0xFF and bdx<l then
            data &= 0
        end if
    end while
    return data
end function

constant tests = {{{0x00}, {0x01, 0x01, 0x00}},
                  {{0x00, 0x00}, {0x01, 0x01, 0x01, 0x00}},
                  {{0x00, 0x11, 0x00}, {0x01, 0x02, 0x11, 0x01, 0x00}},
                  {{0x11, 0x22, 0x00, 0x33}, {0x03, 0x11, 0x22, 0x02, 0x33, 0x00}},
                  {{0x11, 0x22, 0x33, 0x44}, {0x05, 0x11, 0x22, 0x33, 0x44, 0x00}},
                  {{0x11, 0x00, 0x00, 0x00}, {0x02, 0x11, 0x01, 0x01, 0x01, 0x00}},
                  {tagset(0xFE,0x01), flatten({0xFF, tagset(0xFE,0x01), 0x00})},
                  {tagset(0xFE,0x00), flatten({0x01, 0xFF, tagset(0xFE,0x01), 0x00})},
                  {tagset(0xFF,0x01), flatten({0xFF, tagset(0xFE,0x01), 0x02, 0xFF, 0x00})},
                  {tagset(0xFF,0x02) & {0x00}, flatten({0xFF, tagset(0xFF,0x02), 0x01, 0x01, 0x00})},
                  {tagset(0xFF,0x03) & {0x00, 0x01}, flatten({0xFE, tagset(0xFF,0x03), 0x02, 0x01, 0x00})}}
for t in tests do
    sequence {orig,expected} = t,
             actual = cobs_encode(orig),
             decoded = cobs_decode(actual)
    string ostr = iff(length(orig)<5?join(orig," ",fmt:="%02x")
                  :join(orig[1..3],fmt:="%02x")&" ... "&join(orig[253..$],fmt:="%02x")),
           astr = iff(length(actual)<7?join(actual,fmt:="%02x")
                  :join(actual[1..4],fmt:="%02x")&" ... "&join(actual[254..$],fmt:="%02x")),
           eOK = iff(actual==expected?"OK":"***???***"),
           dOK = iff(decoded==orig?"OK":"***???***")
    printf(1,"%-23s -> %-33s encode:%s, decode: %s\n",{ostr,astr,eOK,dOK})
end for

Output:

00                      -> 01 01 00                          encode:OK, decode: OK
00 00                   -> 01 01 01 00                       encode:OK, decode: OK
00 11 00                -> 01 02 11 01 00                    encode:OK, decode: OK
11 22 00 33             -> 03 11 22 02 33 00                 encode:OK, decode: OK
11 22 33 44             -> 05 11 22 33 44 00                 encode:OK, decode: OK
11 00 00 00             -> 02 11 01 01 01 00                 encode:OK, decode: OK
01 02 03 ... FD FE      -> FF 01 02 03 ... FD FE 00          encode:OK, decode: OK
00 01 02 ... FC FD FE   -> 01 FF 01 02 ... FC FD FE 00       encode:OK, decode: OK
01 02 03 ... FD FE FF   -> FF 01 02 03 ... FD FE 02 FF 00    encode:OK, decode: OK
02 03 04 ... FE FF 00   -> FF 02 03 04 ... FE FF 01 01 00    encode:OK, decode: OK
03 04 05 ... FF 00 01   -> FE 03 04 05 ... FF 02 01 00       encode:OK, decode: OK

Wren

Library: Wren-fmt

This only supports using a zero marker (see talk page).

import "./fmt" for Fmt

class COBS {
    static isByte(i) { (i is Num) && i.isInteger && i >= 0 && i < 256 }

    static encode(data) {
        if (!((data is List) && data.count > 0 && data.all { |i| isByte(i) })) {
            Fiber.abort("data must be a non-empty list of bytes.")
        }
        var enc = [-1]
        var ins = 0
        var code = 1
        var addLastCode = true
        for (byte in data) {
            if (byte != 0) {
                enc.add(byte)
                code = code + 1
            }
            addLastCode = true
            if (byte == 0|| code == 255) {
                if (code == 255) addLastCode = false
                enc[ins] = code
                code = 1
                ins = enc.count
                enc.add(-1)
            }
        }
        if (addLastCode) {
            enc[ins] = code
            enc.add(0)
        } else {
            enc[ins] = 0
        }
        return enc
    }

    static decode(enc) {
        if (!((enc is List) && enc.count > 0 && enc[-1] == 0 &&
            enc.all { |i| isByte(i) })) {
            Fiber.abort("encoded data must be a non-empty list of zero-terminated bytes.")
        }
        var dec = []
        var code = 255
        var block = 0
        for (byte in enc[0..-2]) {
            if (block != 0) {
                dec.add(byte)
            } else {
                if (code != 255) dec.add(0)
                code = byte
                block = code
                if (code == 0) break
            }
            block = block - 1
        }
        return dec
    }
}

var examples = [
    [0x00],
    [0x00, 0x00],
    [0x00, 0x11, 0x00],
    [0x11, 0x22, 0x00, 0x33],
    [0x11, 0x22, 0x33, 0x44],
    [0x11, 0x00, 0x00, 0x00],
    (0x01..0xfe).toList,
    (0x00..0xfe).toList,
    (0x01..0xff).toList,
    (0x02..0xff).toList + [0x00],
    (0x03..0xff).toList + [0x00, 0x01]
]

var encoded = []
System.print("COBS encoding (hex):")
for (example in examples) {
    var res = COBS.encode(example)
    encoded.add(res)
    if (example.count < 5) {
        Fmt.print("$-33s -> $02X", Fmt.swrite("$02X", example), res)
    } else {
        var e = Fmt.va("Xz", 2, example, 0, " ", "", "", 5, "...")
        var r = Fmt.va("Xz", 2, res, 0, " ", "", "", 5, "...")
        Fmt.print("$s -> $s", e, r)
    }
}
System.print("\nCOBS decoding (hex):")
for (enc in encoded) {
    var res = COBS.decode(enc)
    if (enc.count < 7) {
        Fmt.print("$-33s -> $02X", Fmt.swrite("$02X", enc), res)
    } else {
        var e = Fmt.va("Xz", 2, enc, 0, " ", "", "", 5, "...")
        var r = Fmt.va("Xz", 2, res, 0, " ", "", "", 5, "...")
        Fmt.print("$s -> $s", e, r)
    }
}

Output:

COBS encoding (hex):
00                                -> 01 01 00
00 00                             -> 01 01 01 00
00 11 00                          -> 01 02 11 01 00
11 22 00 33                       -> 03 11 22 02 33 00
11 22 33 44                       -> 05 11 22 33 44 00
11 00 00 00                       -> 02 11 01 01 01 00
01 02 03 04 05 ... FA FB FC FD FE -> FF 01 02 03 04 ... FB FC FD FE 00
00 01 02 03 04 ... FA FB FC FD FE -> 01 FF 01 02 03 ... FB FC FD FE 00
01 02 03 04 05 ... FB FC FD FE FF -> FF 01 02 03 04 ... FD FE 02 FF 00
02 03 04 05 06 ... FC FD FE FF 00 -> FF 02 03 04 05 ... FE FF 01 01 00
03 04 05 06 07 ... FD FE FF 00 01 -> FE 03 04 05 06 ... FE FF 02 01 00

COBS decoding (hex):
01 01 00                          -> 00
01 01 01 00                       -> 00 00
01 02 11 01 00                    -> 00 11 00
03 11 22 02 33 00                 -> 11 22 00 33
05 11 22 33 44 00                 -> 11 22 33 44
02 11 01 01 01 00                 -> 11 00 00 00
FF 01 02 03 04 ... FB FC FD FE 00 -> 01 02 03 04 05 ... FA FB FC FD FE
01 FF 01 02 03 ... FB FC FD FE 00 -> 00 01 02 03 04 ... FA FB FC FD FE
FF 01 02 03 04 ... FD FE 02 FF 00 -> 01 02 03 04 05 ... FB FC FD FE FF
FF 02 03 04 05 ... FE FF 01 01 00 -> 02 03 04 05 06 ... FC FD FE FF 00
FE 03 04 05 06 ... FE FF 02 01 00 -> 03 04 05 06 07 ... FD FE FF 00 01