Consistent overhead byte stuffing

From Rosetta Code
Revision as of 02:13, 23 September 2023 by Phunanon (talk | contribs) (Initial draft task with →‎C: and →‎Insitux)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Consistent overhead byte stuffing is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

The Consistent Overhead Byte Stuffing (COBS) algorithm is used to demarcate a byte stream into frames. Examples can be found in its Wikipedia article.

Task

To encode, instances of the byte 0x00 in the unencoded stream are substituted with the number of bytes until the next instance of 0x00 (we'll call this a milestone). If 0x00 does not appear within the next 254 bytes after a milestone the algorithm must emit an extra milestone at the 255th byte. Of the encoded output, the first byte is the first milestone, and the final milestone will be the number of bytes until the final byte - 0x00.

Bonus tasks
  •   Decode an encoded stream, including error-check functionality.
  •   Support a non-zero marker

C

Encoder:

#include <stddef.h>
#include <stdint.h>
#include <assert.h>

/** COBS encode data to buffer
	@param data Pointer to input data to encode
	@param length Number of bytes to encode
	@param buffer Pointer to encoded output buffer
	@return Encoded buffer length in bytes
	@note Does not output delimiter byte
*/
size_t cobsEncode(const void *data, size_t length, uint8_t *buffer)
{
	assert(data && buffer);

	uint8_t *encode = buffer; // Encoded byte pointer
	uint8_t *codep = encode++; // Output code pointer
	uint8_t code = 1; // Code value

	for (const uint8_t *byte = (const uint8_t *)data; length--; ++byte)
	{
		if (*byte) // Byte not zero, write it
			*encode++ = *byte, ++code;

		if (!*byte || code == 0xff) // Input is zero or block completed, restart
		{
			*codep = code, code = 1, codep = encode;
			if (!*byte || length)
				++encode;
		}
	}
	*codep = code; // Write final code value

	return (size_t)(encode - buffer);
}

Decoder:

/** COBS decode data from buffer
	@param buffer Pointer to encoded input bytes
	@param length Number of bytes to decode
	@param data Pointer to decoded output data
	@return Number of bytes successfully decoded
	@note Stops decoding if delimiter byte is found
*/
size_t cobsDecode(const uint8_t *buffer, size_t length, void *data)
{
	assert(buffer && data);

	const uint8_t *byte = buffer; // Encoded input byte pointer
	uint8_t *decode = (uint8_t *)data; // Decoded output byte pointer

	for (uint8_t code = 0xff, block = 0; byte < buffer + length; --block)
	{
		if (block) // Decode block byte
			*decode++ = *byte++;
		else
		{
			if (code != 0xff) // Encoded zero, write it
				*decode++ = 0;
			block = code = *byte++; // Next block length
			if (!code) // Delimiter code found
				break;
		}
	}

	return (size_t)(decode - (uint8_t *)data);
}

Insitux

(function COBS unencoded
  (let chunk   (take-until [0] unencoded)
       length  (len chunk)
       encoded (append (first 254 chunk) (or %1 []))
       more?   (< length (len unencoded)))
  (if (or (and (= length 254) more?) (> length 254))
    (recur (skip 254 unencoded) encoded)
    (if more?
      (recur (skip (inc length) unencoded) encoded)
      (append 0 (flat-map @(.. vec (inc (len %))) encoded)))))

Unit tests:

(for [a b] [
    [[0x00] [0x01 0x01 0x00]]
    [[0x00 0x00] [0x01 0x01 0x01 0x00]]
    [[0x00 0x11 0x00] [0x01 0x02 0x11 0x01 0x00]]
    [[0x11 0x22 0x00 0x33] [0x03 0x11 0x22 0x02 0x33 0x00]]
    [[0x11 0x22 0x33 0x44] [0x05 0x11 0x22 0x33 0x44 0x00]]
    [[0x11 0x00 0x00 0x00] [0x02 0x11 0x01 0x01 0x01 0x00]]
    [(range 1 255) (.. vec 0xFF (range 1 255) 0x00)]
    [(range 255) (.. vec 0x01 0xFF (range 1 255) 0x00)]
    [(range 1 256) (.. vec 0xFF (range 1 255) 0x02 0xFF 0x00)]
    [(.. vec (range 2 256) 0x00) (.. vec 0xFF (range 2 256) 0x01 0x01 0x00)]
    [(.. vec (range 3 256) 0x00 0x01) (.. vec 0xFE (range 3 256) 0x02 0x01 0x00)]
  ]
  (assert (= (COBS a) b)))