Consistent overhead byte stuffing: Difference between revisions
Content added Content deleted
m (→{{header|Phix}}: duplicated test) |
(Improve bonus task descriptions) |
||
Line 11: | Line 11: | ||
;Bonus tasks: |
;Bonus tasks: |
||
::* Decode an encoded stream |
::* Decode an encoded stream |
||
::* Check for errors during decoding - an unexpected instance of the chosen marker |
|||
::* Support a non-zero marker |
::* Support a non-zero marker |
||
Revision as of 21:23, 27 September 2023
Consistent overhead byte stuffing is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
The Consistent Overhead Byte Stuffing (COBS) algorithm is used to demarcate a byte stream into frames. Examples can be found in its Wikipedia article.
- Task
To encode, instances of the byte 0x00
in the unencoded stream are substituted with the number of bytes until the next instance of 0x00
(we'll call this a milestone).
If 0x00
does not appear within the next 254 bytes after a milestone the algorithm must emit an extra milestone at the 255th byte.
Of the encoded output, the first byte is the first milestone, and the final milestone will be the number of bytes until the final byte - 0x00
.
- Bonus tasks
- Decode an encoded stream
- Check for errors during decoding - an unexpected instance of the chosen marker
- Support a non-zero marker
C
Encoder:
#include <stddef.h>
#include <stdint.h>
#include <assert.h>
/** COBS encode data to buffer
@param data Pointer to input data to encode
@param length Number of bytes to encode
@param buffer Pointer to encoded output buffer
@return Encoded buffer length in bytes
@note Does not output delimiter byte
*/
size_t cobsEncode(const void *data, size_t length, uint8_t *buffer)
{
assert(data && buffer);
uint8_t *encode = buffer; // Encoded byte pointer
uint8_t *codep = encode++; // Output code pointer
uint8_t code = 1; // Code value
for (const uint8_t *byte = (const uint8_t *)data; length--; ++byte)
{
if (*byte) // Byte not zero, write it
*encode++ = *byte, ++code;
if (!*byte || code == 0xff) // Input is zero or block completed, restart
{
*codep = code, code = 1, codep = encode;
if (!*byte || length)
++encode;
}
}
*codep = code; // Write final code value
return (size_t)(encode - buffer);
}
Decoder:
/** COBS decode data from buffer
@param buffer Pointer to encoded input bytes
@param length Number of bytes to decode
@param data Pointer to decoded output data
@return Number of bytes successfully decoded
@note Stops decoding if delimiter byte is found
*/
size_t cobsDecode(const uint8_t *buffer, size_t length, void *data)
{
assert(buffer && data);
const uint8_t *byte = buffer; // Encoded input byte pointer
uint8_t *decode = (uint8_t *)data; // Decoded output byte pointer
for (uint8_t code = 0xff, block = 0; byte < buffer + length; --block)
{
if (block) // Decode block byte
*decode++ = *byte++;
else
{
if (code != 0xff) // Encoded zero, write it
*decode++ = 0;
block = code = *byte++; // Next block length
if (!code) // Delimiter code found
break;
}
}
return (size_t)(decode - (uint8_t *)data);
}
Insitux
(function COBS unencoded
(let chunk (take-until [0] unencoded)
length (len chunk)
encoded (append (first 254 chunk) (or %1 []))
more? (< length (len unencoded)))
(if (or (and (= length 254) more?) (> length 254))
(recur (skip 254 unencoded) encoded)
(if more?
(recur (skip (inc length) unencoded) encoded)
(append 0 (flat-map @(.. vec (inc (len %))) encoded)))))
Unit tests:
(for [a b] [
[[0x00] [0x01 0x01 0x00]]
[[0x00 0x00] [0x01 0x01 0x01 0x00]]
[[0x00 0x11 0x00] [0x01 0x02 0x11 0x01 0x00]]
[[0x11 0x22 0x00 0x33] [0x03 0x11 0x22 0x02 0x33 0x00]]
[[0x11 0x22 0x33 0x44] [0x05 0x11 0x22 0x33 0x44 0x00]]
[[0x11 0x00 0x00 0x00] [0x02 0x11 0x01 0x01 0x01 0x00]]
[(range 1 255) (.. vec 0xFF (range 1 255) 0x00)]
[(range 255) (.. vec 0x01 0xFF (range 1 255) 0x00)]
[(range 1 256) (.. vec 0xFF (range 1 255) 0x02 0xFF 0x00)]
[(.. vec (range 2 256) 0x00) (.. vec 0xFF (range 2 256) 0x01 0x01 0x00)]
[(.. vec (range 3 256) 0x00 0x01) (.. vec 0xFE (range 3 256) 0x02 0x01 0x00)]
]
(assert (= (COBS a) b)))
Julia
From Wren code for encode and tests, from Phix code for decode.
module COBSPackets
export COBSencode, COBSdecode
"""
COBSencode(data)
Return result of encoding `inputdata` into COBS packet format.
"""
function COBSencode(inputdata, marker = 0x00)
output = [0xff]
codeindex, code = 1, 1
addlastcode = true
for byte in inputdata
if byte != marker
push!(output, byte)
code += 1
end
addlastcode = true
if byte == marker || code == 255
code == 255 && (addlastcode = false)
output[codeindex] = code
code = 1
push!(output, 0xff)
codeindex = length(output)
end
end
if addlastcode
output[codeindex] = code
push!(output, marker)
else
output[codeindex] = marker
end
return output
end
"""
COBSdecode(buffer)
Return result of decoding `buffer` from COBS encoded format.
"""
function COBSdecode(buffer::AbstractVector, marker = 0x00)
decoded = UInt8[]
bdx, len = 1, length(buffer)
while bdx < len
code = buffer[bdx]
bdx += 1
for i in 1:code-1
push!(decoded, buffer[bdx])
bdx += 1
bdx >= len && break
end
code < 0xff && bdx < len && push!(decoded, marker)
end
return decoded
end
end # module
using .COBSPackets
const tests = [
[0x00],
[0x00, 0x00],
[0x00, 0x11, 0x00],
[0x11, 0x22, 0x00, 0x33],
[0x11, 0x22, 0x33, 0x44],
[0x11, 0x00, 0x00, 0x00],
collect(0x01:0xfe),
collect(0x00:0xfe),
collect(0x01:0xff),
[collect(0x02:0xff); 0x00],
[collect(0x03:0xff); 0x00; 0x01],
]
function ab(arr)
hx = join(map(c -> string(c, base = 16, pad = 2), arr), " ")
return length(hx) < 20 ? hx : hx[begin:begin+10] * " ... " * hx[end-10:end]
end
for t in tests
println(ab(t), " -> ", ab(COBSencode(t, 3)), " -> ", ab(COBSdecode(COBSencode(t, 3), 3)))
end
- Output:
00 -> 01 01 00 -> 00 00 00 -> 01 01 01 00 -> 00 00 00 11 00 -> 01 02 11 01 00 -> 00 11 00 11 22 00 33 -> 03 11 22 02 33 00 -> 11 22 00 33 11 22 33 44 -> 05 11 22 33 44 00 -> 11 22 33 44 11 00 00 00 -> 02 11 01 01 01 00 -> 11 00 00 00 01 02 03 04 ... fb fc fd fe -> ff 01 02 03 ... fc fd fe 00 -> 01 02 03 04 ... fb fc fd fe 00 01 02 03 ... fb fc fd fe -> 01 ff 01 02 ... fc fd fe 00 -> 00 01 02 03 ... fb fc fd fe 01 02 03 04 ... fc fd fe ff -> ff 01 02 03 ... fe 02 ff 00 -> 01 02 03 04 ... fc fd fe ff 02 03 04 05 ... fd fe ff 00 -> ff 02 03 04 ... ff 01 01 00 -> 02 03 04 05 ... fd fe ff 00 03 04 05 06 ... fe ff 00 01 -> fe 03 04 05 ... ff 02 01 00 -> 03 04 05 06 ... fe ff 00 01
Phix
with javascript_semantics function cobs_encode(sequence data) sequence buffer = {-1} integer cdx = 1, code = 1, addlast = true for b in data do if b then buffer &= b code += 1 end if addlast = true if b=0 or code=0xFF then addlast = code!=0xFF buffer[cdx] = code code = 1 buffer &= -1 cdx = length(buffer) end if end for buffer[cdx] = iff(addlast?code:0) if addlast then buffer &= 0 end if return buffer end function function cobs_decode(sequence buffer) sequence data = {} integer bdx = 1, l = length(buffer) while bdx<l do integer code = buffer[bdx] bdx += 1 for i=1 to code-1 do data &= buffer[bdx] bdx += 1 end for if code<0xFF and bdx<l then data &= 0 end if end while return data end function constant tests = {{{0x00}, {0x01, 0x01, 0x00}}, {{0x00, 0x00}, {0x01, 0x01, 0x01, 0x00}}, {{0x00, 0x11, 0x00}, {0x01, 0x02, 0x11, 0x01, 0x00}}, {{0x11, 0x22, 0x00, 0x33}, {0x03, 0x11, 0x22, 0x02, 0x33, 0x00}}, {{0x11, 0x22, 0x33, 0x44}, {0x05, 0x11, 0x22, 0x33, 0x44, 0x00}}, {{0x11, 0x00, 0x00, 0x00}, {0x02, 0x11, 0x01, 0x01, 0x01, 0x00}}, {tagset(0xFE,0x01), 0xFF & tagset(0xFE,0x01) & 0x00}, {tagset(0xFE,0x00), {0x01, 0xFF} & tagset(0xFE,0x01) & 0x00}, {tagset(0xFF,0x01), 0xFF & tagset(0xFE,0x01) & {0x02, 0xFF, 0x00}}, {tagset(0xFF,0x02) & {0x00}, 0xFF & tagset(0xFF,0x02) & {0x01, 0x01, 0x00}}, {tagset(0xFF,0x03) & {0x00, 0x01}, 0xFE & tagset(0xFF,0x03) & {0x02, 0x01, 0x00}}} function ab(sequence s, integer m,f,l) s = iff(length(s)<m?join(s,fmt:="%02x") :join(s[1..f],fmt:="%02x") & " ... " & join(s[l..$],fmt:="%02x")) return s end function for t in tests do sequence {orig,expected} = t, actual = cobs_encode(orig), decoded = cobs_decode(actual) bool eOK = (actual==expected), dOK = (decoded==orig) printf(1,"%-23s -> %-33s encode:%t, decode:%t\n", {ab(orig,5,3,253),ab(actual,7,4,254),eOK,dOK}) end for
- Output:
00 -> 01 01 00 encode:true, decode:true 00 00 -> 01 01 01 00 encode:true, decode:true 00 11 00 -> 01 02 11 01 00 encode:true, decode:true 11 22 00 33 -> 03 11 22 02 33 00 encode:true, decode:true 11 22 33 44 -> 05 11 22 33 44 00 encode:true, decode:true 11 00 00 00 -> 02 11 01 01 01 00 encode:true, decode:true 01 02 03 ... FD FE -> FF 01 02 03 ... FD FE 00 encode:true, decode:true 00 01 02 ... FC FD FE -> 01 FF 01 02 ... FC FD FE 00 encode:true, decode:true 01 02 03 ... FD FE FF -> FF 01 02 03 ... FD FE 02 FF 00 encode:true, decode:true 02 03 04 ... FE FF 00 -> FF 02 03 04 ... FE FF 01 01 00 encode:true, decode:true 03 04 05 ... FF 00 01 -> FE 03 04 05 ... FF 02 01 00 encode:true, decode:true
Wren
This only supports using a zero marker (see talk page).
import "./fmt" for Fmt
class COBS {
static isByte(i) { (i is Num) && i.isInteger && i >= 0 && i < 256 }
static encode(data) {
if (!((data is List) && data.count > 0 && data.all { |i| isByte(i) })) {
Fiber.abort("data must be a non-empty list of bytes.")
}
var enc = [-1]
var ins = 0
var code = 1
var addLastCode = true
for (byte in data) {
if (byte != 0) {
enc.add(byte)
code = code + 1
}
addLastCode = true
if (byte == 0|| code == 255) {
if (code == 255) addLastCode = false
enc[ins] = code
code = 1
ins = enc.count
enc.add(-1)
}
}
if (addLastCode) {
enc[ins] = code
enc.add(0)
} else {
enc[ins] = 0
}
return enc
}
static decode(enc) {
if (!((enc is List) && enc.count > 0 && enc[-1] == 0 &&
enc.all { |i| isByte(i) })) {
Fiber.abort("encoded data must be a non-empty list of zero-terminated bytes.")
}
var dec = []
var code = 255
var block = 0
for (byte in enc[0..-2]) {
if (block != 0) {
dec.add(byte)
} else {
if (code != 255) dec.add(0)
code = byte
block = code
if (code == 0) break
}
block = block - 1
}
return dec
}
}
var examples = [
[0x00],
[0x00, 0x00],
[0x00, 0x11, 0x00],
[0x11, 0x22, 0x00, 0x33],
[0x11, 0x22, 0x33, 0x44],
[0x11, 0x00, 0x00, 0x00],
(0x01..0xfe).toList,
(0x00..0xfe).toList,
(0x01..0xff).toList,
(0x02..0xff).toList + [0x00],
(0x03..0xff).toList + [0x00, 0x01]
]
var encoded = []
System.print("COBS encoding (hex):")
for (example in examples) {
var res = COBS.encode(example)
encoded.add(res)
if (example.count < 5) {
Fmt.print("$-33s -> $02X", Fmt.swrite("$02X", example), res)
} else {
var e = Fmt.va("Xz", 2, example, 0, " ", "", "", 5, "...")
var r = Fmt.va("Xz", 2, res, 0, " ", "", "", 5, "...")
Fmt.print("$s -> $s", e, r)
}
}
System.print("\nCOBS decoding (hex):")
for (enc in encoded) {
var res = COBS.decode(enc)
if (enc.count < 7) {
Fmt.print("$-33s -> $02X", Fmt.swrite("$02X", enc), res)
} else {
var e = Fmt.va("Xz", 2, enc, 0, " ", "", "", 5, "...")
var r = Fmt.va("Xz", 2, res, 0, " ", "", "", 5, "...")
Fmt.print("$s -> $s", e, r)
}
}
- Output:
COBS encoding (hex): 00 -> 01 01 00 00 00 -> 01 01 01 00 00 11 00 -> 01 02 11 01 00 11 22 00 33 -> 03 11 22 02 33 00 11 22 33 44 -> 05 11 22 33 44 00 11 00 00 00 -> 02 11 01 01 01 00 01 02 03 04 05 ... FA FB FC FD FE -> FF 01 02 03 04 ... FB FC FD FE 00 00 01 02 03 04 ... FA FB FC FD FE -> 01 FF 01 02 03 ... FB FC FD FE 00 01 02 03 04 05 ... FB FC FD FE FF -> FF 01 02 03 04 ... FD FE 02 FF 00 02 03 04 05 06 ... FC FD FE FF 00 -> FF 02 03 04 05 ... FE FF 01 01 00 03 04 05 06 07 ... FD FE FF 00 01 -> FE 03 04 05 06 ... FE FF 02 01 00 COBS decoding (hex): 01 01 00 -> 00 01 01 01 00 -> 00 00 01 02 11 01 00 -> 00 11 00 03 11 22 02 33 00 -> 11 22 00 33 05 11 22 33 44 00 -> 11 22 33 44 02 11 01 01 01 00 -> 11 00 00 00 FF 01 02 03 04 ... FB FC FD FE 00 -> 01 02 03 04 05 ... FA FB FC FD FE 01 FF 01 02 03 ... FB FC FD FE 00 -> 00 01 02 03 04 ... FA FB FC FD FE FF 01 02 03 04 ... FD FE 02 FF 00 -> 01 02 03 04 05 ... FB FC FD FE FF FF 02 03 04 05 ... FE FF 01 01 00 -> 02 03 04 05 06 ... FC FD FE FF 00 FE 03 04 05 06 ... FE FF 02 01 00 -> 03 04 05 06 07 ... FD FE FF 00 01