ASCII art diagram converter: Difference between revisions
m (→{{header|Racket}}: checkpoint) |
({{header|Racket}} implementation added) |
||
Line 351: | Line 351: | ||
* <b><code>test-ascci-art-reader.rkt</code>:</b> gives it all a rigourousish going over |
* <b><code>test-ascci-art-reader.rkt</code>:</b> gives it all a rigourousish going over |
||
<b><code>ascii-art-parser.rkt</code></b> |
|||
Note that this is in the <code>racket/base</code> language so it doesn't overburden the modules that import it, especially since they're at the suntax phase. |
|||
<lang racket> |
|||
<lang racket>#lang racket/base |
|||
</lang> |
|||
(require (only-in racket/list drop-right) |
|||
(only-in racket/string string-trim)) |
|||
(provide ascii-art->struct) |
|||
<lang racket> |
|||
</lang> |
|||
;; reads ascii art from a string or input-port |
|||
===<code>test-ascii-art-reader.rkt</code>=== |
|||
;; returns: |
|||
<lang racket> |
|||
;; list of (word-number highest-bit lowest-bit name-symbol) |
|||
</lang> |
|||
;; bits per word |
|||
(define (ascii-art->struct art) |
|||
(define art-inport |
|||
(cond |
|||
[(string? art) (open-input-string art)] |
|||
[(input-port? art) art] |
|||
[else (raise-argument-error 'ascii-art->struct |
|||
"(or/c string? input-port?)" |
|||
art)])) |
|||
(define lines |
|||
(for/list ((l (in-port (lambda (p) |
|||
(define pk (peek-char p)) |
|||
(case pk ((#\+ #\|) (read-line p)) |
|||
(else eof))) |
|||
art-inport))) |
|||
l)) |
|||
(when (null? lines) |
|||
(error 'ascii-art->struct "no lines")) |
|||
(define bit-re #px"[|+]([^|+]*)") |
|||
(define cell-re #px"[|]([^|]*)") |
|||
(define bit-boundaries (regexp-match-positions* bit-re (car lines))) |
|||
(define bits/word (sub1 (length bit-boundaries))) |
|||
(unless (zero? (modulo bits/word 8)) |
|||
(error 'ascii-art->struct "diagram is not a multiple of 8 bits wide")) |
|||
(define-values (pos->bit-start# pos->bit-end#) |
|||
(for/fold ((s# (hash)) (e# (hash))) |
|||
((box (in-range bits/word)) |
|||
(boundary (in-list bit-boundaries))) |
|||
(define bit (- bits/word box 1)) |
|||
(values (hash-set s# (car boundary) bit) |
|||
(hash-set e# (cdr boundary) bit)))) |
|||
(define fields |
|||
(apply append |
|||
(for/list ((line-number (in-naturals)) |
|||
(line (in-list lines)) |
|||
#:when (odd? line-number)) |
|||
(define word (quotient line-number 2)) |
|||
(define cell-positions (regexp-match-positions* cell-re line)) |
|||
(define cell-contents (regexp-match* cell-re line)) |
|||
(for/list ((cp (in-list (drop-right cell-positions 1))) |
|||
(cnt (in-list cell-contents))) |
|||
(define cell-start-bit (hash-ref pos->bit-start# (car cp))) |
|||
(define cell-end-bit (hash-ref pos->bit-end# (cdr cp))) |
|||
(list word cell-start-bit cell-end-bit (string->symbol (string-trim (substring cnt 1)))))))) |
|||
(values fields bits/word))</lang> |
|||
<b><code>ascii-art-reader.rkt</code></b> |
|||
<lang racket>#lang racket |
|||
(require (for-syntax "ascii-art-parser.rkt")) |
|||
(require (for-syntax racket/syntax)) |
|||
(provide (all-defined-out)) |
|||
(define-syntax (define-ascii-art-structure stx) |
|||
(syntax-case stx () |
|||
[(_ id art) |
|||
(let*-values (((all-fields bits/word) (ascii-art->struct (syntax-e #'art)))) |
|||
(with-syntax |
|||
((bytes->id (format-id stx "bytes->~a" #'id)) |
|||
(id->bytes (format-id stx "~a->bytes" #'id)) |
|||
(word-size (add1 (car (for/last ((f all-fields)) f)))) |
|||
(fld-ids (map cadddr all-fields)) |
|||
(fld-setters |
|||
(cons |
|||
#'id |
|||
(for/list ((fld (in-list all-fields))) |
|||
(let* ((bytes/word (quotient bits/word 8)) |
|||
(start-byte (let ((word-no (car fld))) (* word-no bytes/word)))) |
|||
`(bitwise-bit-field (integer-bytes->integer bs |
|||
#f |
|||
(system-big-endian?) |
|||
,start-byte |
|||
,(+ start-byte bytes/word)) |
|||
,(caddr fld) |
|||
,(add1 (cadr fld))))))) |
|||
(set-fields-bits |
|||
(list* |
|||
'begin |
|||
(for/list ((fld (in-list all-fields))) |
|||
(define val (cadddr fld)) |
|||
(define start-bit (cadr fld)) |
|||
(define end-bit (caddr fld)) |
|||
(define start-byte (let ((word-no (car fld))) (* word-no (quotient bits/word 8)))) |
|||
(define fld-bit-width (- start-bit end-bit -1)) |
|||
(define aligned?/width (and (= end-bit 0) |
|||
(= (modulo start-bit 8) 7) |
|||
(quotient fld-bit-width 8))) |
|||
(case aligned?/width |
|||
[(2 4) |
|||
`(integer->integer-bytes ,val |
|||
,aligned?/width |
|||
#f |
|||
(system-big-endian?) |
|||
rv |
|||
,start-byte)] |
|||
[else |
|||
(define the-byte (+ start-byte (quotient end-bit 8))) |
|||
`(bytes-set! rv |
|||
,the-byte |
|||
(bitwise-ior (arithmetic-shift (bitwise-bit-field ,val 0 ,fld-bit-width) |
|||
,(modulo end-bit 8)) |
|||
(bytes-ref rv ,the-byte)))]))))) |
|||
#`(begin |
|||
(struct id fld-ids #:mutable) |
|||
(define (bytes->id bs) |
|||
fld-setters) |
|||
(define (id->bytes art-in) |
|||
(match-define (id #,@#'fld-ids) art-in) |
|||
(define rv (make-bytes (* word-size #,(quotient bits/word 8)))) |
|||
set-fields-bits |
|||
rv))))]))</lang> |
|||
<b><code>test-ascii-art-reader.rkt</code></b> |
|||
<lang racket>#lang racket |
|||
(require "ascii-art-reader.rkt") |
|||
(require "ascii-art-parser.rkt") |
|||
(require tests/eli-tester) |
|||
(define rfc-1035-header-art |
|||
#<<EOS |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| ID | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
|QR| Opcode |AA|TC|RD|RA| Z | RCODE | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| QDCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| ANCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| NSCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| ARCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
EOS |
|||
) |
|||
(define-values (rslt rslt-b/w) (ascii-art->struct rfc-1035-header-art)) |
|||
(test |
|||
rslt-b/w => 16 |
|||
rslt => |
|||
'((0 15 0 ID) |
|||
(1 15 15 QR) |
|||
(1 14 11 Opcode) |
|||
(1 10 10 AA) |
|||
(1 9 9 TC) |
|||
(1 8 8 RD) |
|||
(1 7 7 RA) |
|||
(1 6 4 Z) |
|||
(1 3 0 RCODE) |
|||
(2 15 0 QDCOUNT) |
|||
(3 15 0 ANCOUNT) |
|||
(4 15 0 NSCOUNT) |
|||
(5 15 0 ARCOUNT))) |
|||
(define-ascii-art-structure rfc-1035-header #<<EOS |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| ID | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
|QR| Opcode |AA|TC|RD|RA| Z | RCODE | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| QDCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| ANCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| NSCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
| ARCOUNT | |
|||
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |
|||
EOS |
|||
) |
|||
(define h-bytes |
|||
(bytes-append |
|||
(integer->integer-bytes #x1234 2 #f) |
|||
(integer->integer-bytes #x5678 2 #f) |
|||
(integer->integer-bytes #x9abc 2 #f) |
|||
(integer->integer-bytes #xdef0 2 #f) |
|||
(integer->integer-bytes #xfedc 2 #f) |
|||
(integer->integer-bytes #xba98 2 #f))) |
|||
(define h-bytes~ |
|||
(bytes-append |
|||
(integer->integer-bytes #x1234 2 #f (not (system-big-endian?))) |
|||
(integer->integer-bytes #x5678 2 #f (not (system-big-endian?))) |
|||
(integer->integer-bytes #x9abc 2 #f (not (system-big-endian?))) |
|||
(integer->integer-bytes #xdef0 2 #f (not (system-big-endian?))) |
|||
(integer->integer-bytes #xfedc 2 #f (not (system-big-endian?))) |
|||
(integer->integer-bytes #xba98 2 #f (not (system-big-endian?))))) |
|||
(define h (bytes->rfc-1035-header h-bytes)) |
|||
(define bytes-h (rfc-1035-header->bytes h)) |
|||
(define h~ (bytes->rfc-1035-header h-bytes~)) |
|||
(define bytes-h~ (rfc-1035-header->bytes h~)) |
|||
(test |
|||
(rfc-1035-header-ID h) => #x1234 |
|||
(rfc-1035-header-ARCOUNT h) => #xBA98 |
|||
(rfc-1035-header-RCODE h) => 8 |
|||
(rfc-1035-header-ID h~) => #x3412 |
|||
(rfc-1035-header-ARCOUNT h~) => #x98BA |
|||
(rfc-1035-header-RCODE h~) => 6 |
|||
h-bytes => bytes-h |
|||
h-bytes~ => bytes-h~) |
|||
(set-rfc-1035-header-RA! h 0) |
|||
(set-rfc-1035-header-Z! h 7) |
|||
(test |
|||
(rfc-1035-header-Z (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 7 |
|||
(rfc-1035-header-RA (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 0) |
|||
(set-rfc-1035-header-Z! h 15) ;; naughty -- might splat RA |
|||
(test |
|||
(rfc-1035-header-Z (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 7 |
|||
(rfc-1035-header-RA (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 0)</lang> |
|||
{{out}} |
{{out}} |
Revision as of 16:55, 18 April 2015
Given the RFC 1035 message diagram from Section 4.1.1 (Header section format) as a string: http://www.ietf.org/rfc/rfc1035.txt
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ID | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |QR| Opcode |AA|TC|RD|RA| Z | RCODE | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | QDCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ANCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | NSCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ARCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
Where (every column of the table is 1 bit):
ID is 16 bits QR = Query (0) or Response (1) Opcode = Four bits defining kind of query: 0: a standard query (QUERY) 1: an inverse query (IQUERY) 2: a server status request (STATUS) 3-15: reserved for future use AA = Authoritative Answer bit TC = Truncation bit RD = Recursion Desired bit RA = Recursion Available bit Z = Reserved RCODE = Response code QC = Question Count ANC = Answer Count AUC = Authority Count ADC = Additional Count
Write a function, member function, class or template that accepts a similar multi-line string as input to define a data structure or something else able to decode or store a header with that specified bit structure.
If your language has macros, introspection, code generation, or powerful enough templates, then accept such string at compile-time to define the header data structure statically.
Such "Header" function or template should accept a table with 8, 16, 32 or 64 columns, and any number of rows. For simplicity the only allowed symbols to define the table are + - | (plus, minus, pipe), and whitespace. Lines of the input string composed just of whitespace should be ignored. Leading and trailing whitespace in the input string should be ignored, as well as before and after each table row. The box for each bit of the diagram takes four chars "+--+". The code should perform a little of validation of the input string, but for brevity a full validation is not required.
Bonus: perform a thoroughly validation of the input string.
D
This solution generates anonymous struct code at compile-time, that can be mixed-in inside a struct or class. <lang d>string makeStructFromDiagram(in string rawDiagram) pure @safe {
import std.conv: text; import std.format: format; import std.string: strip, splitLines, indexOf; import std.array: empty, popFront;
static void commitCurrent(ref uint anonCount, ref uint totalBits, ref size_t currentBits, ref string code, ref string currentName) pure @safe { if (currentBits) { code ~= "\t";
currentName = currentName.strip; if (currentName.empty) { anonCount++; currentName = "anonymous_field_" ~ anonCount.text; }
string type; if (currentBits == 1) type = "bool"; else if (currentBits <= ubyte.sizeof * 8) type = "ubyte"; else if (currentBits <= ushort.sizeof * 8) type = "ushort"; else if (currentBits <= uint.sizeof * 8) type = "uint"; else if (currentBits <= ulong.sizeof * 8) type = "ulong"; //else if (currentBits <= ucent.sizeof * 8) // type = "ucent"; else assert(0, "Too many bits for the item " ~ currentName);
immutable byteOffset = totalBits / 8; immutable bitOffset = totalBits % 8;
// Getter: code ~= "@property " ~ type ~ " " ~ currentName ~ "() const pure nothrow @safe {\n"; code ~= "\t\t"; if (currentBits == 1) { code ~= format("return (_payload[%d] & (1 << (7-%d))) ? true : false;", byteOffset, bitOffset); } else if (currentBits < 8) { auto mask = (1 << currentBits) - 1; mask <<= 7 - bitOffset - currentBits + 1; code ~= format("return (_payload[%d] & 0b%08b) >> %d;", byteOffset, mask, 7 - bitOffset - currentBits + 1); } else { assert(currentBits % 8 == 0); assert(bitOffset == 0); code ~= type ~ " v = 0;\n\t\t";
code ~= "version(LittleEndian) {\n\t\t"; foreach (immutable i; 0 .. currentBits / 8) code ~= "\tv |= (cast(" ~ type ~ ") _payload[" ~ text(byteOffset + i) ~ "]) << (" ~ text((currentBits / 8) - i - 1) ~ " * 8);\n\t\t"; code ~= "} else static assert(0);\n\t\t"; code ~= "return v;"; } code ~= "\n"; code ~= "\t}\n\t";
// Setter: code ~= "@property void " ~ currentName ~ "(in " ~ type ~ " value) pure nothrow @safe {\n"; code ~= "\t\t"; if (currentBits < 8) { auto mask = (1 << currentBits) - 1; mask <<= 7 - bitOffset - currentBits + 1; code ~= format("_payload[%d] &= ~0b%08b;\n\t\t", byteOffset, mask); code ~= "assert(value < " ~ text(1 << currentBits) ~ ");\n\t\t"; code~=format("_payload[%d] |= cast(ubyte) value << %d;", byteOffset, 7 - bitOffset - currentBits + 1); } else { assert(currentBits % 8 == 0); assert(bitOffset == 0);
code ~= "version(LittleEndian) {\n\t\t"; foreach (immutable i; 0 .. currentBits / 8) code ~= "\t_payload[" ~ text(byteOffset + i) ~ "] = (value >> (" ~ text((currentBits / 8) - i - 1) ~ " * 8) & 0xff);\n\t\t"; code ~= "} else static assert(0);"; }
code ~= "\n"; code ~= "\t}\n"; totalBits += currentBits; }
currentBits = 0; currentName = null; }
enum C : char { pipe='|', cross='+' } enum cWidth = 3; // Width of a bit cell in the table. immutable diagram = rawDiagram.strip;
size_t bitCountPerRow = 0, currentBits; uint anonCount = 0, totalBits; string currentName; string code = "struct {\n"; // Anonymous.
foreach (line; diagram.splitLines) { assert(!line.empty); line = line.strip; if (line[0] == C.cross) { commitCurrent(anonCount, totalBits, currentBits, code, currentName); if (bitCountPerRow == 0) bitCountPerRow = (line.length - 1) / cWidth; else assert(bitCountPerRow == (line.length - 1) / cWidth); } else { // A field of some sort. while (line.length > 2) { assert(line[0] != '/', "Variable length data not supported"); assert(line[0] == C.pipe, "Malformed table"); line.popFront; const idx = line[0 .. $ - 1].indexOf(C.pipe); if (idx != -1) { const field = line[0 .. idx]; line = line[idx .. $];
commitCurrent(anonCount, totalBits, currentBits, code, currentName); currentName = field; currentBits = (field.length + 1) / cWidth; commitCurrent(anonCount, totalBits, currentBits, code, currentName); } else { // The full row or a continuation of the last. currentName ~= line[0 .. $ - 1]; // At this point, line does not include the first // C.pipe, but the length will include the last. currentBits += line.length / cWidth;
line = line[$ .. $]; } } } }
// Using bytes to avoid endianness issues. // hopefully the compiler will optimize it, otherwise // maybe we could specialize the properties more. code ~= "\n\tprivate ubyte[" ~ text((totalBits + 7) / 8) ~ "] _payload;\n";
return code ~ "}";
}
void main() { // Testing.
import std.stdio;
enum diagram = " +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ID | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |QR| Opcode |AA|TC|RD|RA| Z | RCODE | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | QDCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ANCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | NSCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ARCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+";
// To debug the code generation: //pragma(msg, diagram.makeStructFromDiagram);
// Usage. static struct Header { mixin(diagram.makeStructFromDiagram); }
Header h; h.ID = 10; h.RA = true; h.ARCOUNT = 255; h.Opcode = 7;
// See the byte representation to test the setter's details. h._payload.writeln;
// Test the getters: assert(h.ID == 10); assert(h.RA == true); assert(h.ARCOUNT == 255); assert(h.Opcode == 7);
}</lang>
- Output:
[0, 10, 56, 128, 0, 0, 0, 0, 0, 0, 0, 255]
Static support for BigEndian is easy to add.
It also supports larger values like this, that is 32 bits long:
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ThirtyTwo | | | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
J
<lang J>require'strings'
soul=: -. {. normalize=: [:soul' ',dltb;._2
mask=: 0: _1} '+' = {. partition=: '|' = mask #"1 soul labels=: ;@(([: <@}: <@dltb;._1)"1~ '|'&=)@soul names=: ;:^:(0 = L.)
unpacker=:1 :0
p=. , partition normalize m p #.;.1 (8#2) ,@:#: ]
)
packer=:1 :0
w=. -#;.1 ,partition normalize m _8 (#.\ ;) w ({. #:)&.> ]
)
getter=:1 :0
nm=. labels normalize m (nm i. names@[) { ]
)
setter=:1 :0
q=. ' n=. q,~q,;:inv labels normalize m 1 :('(',n,' i.&names m)}')
)
starter=:1 :0
0"0 labels normalize m
)</lang>
Sample definition (note the deliberate introduction of extraneous whitespace in locations the task requires us to ignore it.
<lang j>sample=: 0 :0
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
| ID | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |QR| Opcode |AA|TC|RD|RA| Z | RCODE | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | QDCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ANCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | NSCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ARCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
)
unpack=: sample unpacker pack=: sample packer get=: sample getter set=: sample setter start=: sample starter</lang>
Example data for sample definition:
<lang J>
4095 13 5 6144 4096 'ID Opcode RCODE ARCOUNT QDCOUNT' set start
4095 0 13 0 0 0 0 0 5 4096 0 0 6144
pack 4095 13 5 6144 4096 'ID Opcode RCODE ARCOUNT QDCOUNT' set start
15 255 104 5 16 0 0 0 0 0 24 0
unpack 0 10 56 128 0 0 0 0 0 0 0 255
10 0 7 0 0 0 1 0 0 0 0 0 255
'Opcode' get unpack 0 10 56 128 0 0 0 0 0 0 0 255
7</lang>
In other words:
- unpack converts an octet sequence to the corresponding numeric sequence
- pack converts a numeric sequence to the corresponding octet sequence
- get extracts named elements from the numeric sequence
- set updates named elements in the numeric sequence
- start represents the default "all zeros" sequence which may be used to derive other sequences
Note that this implementation assumes that the ascii diagram represents the native word width on a single line, and assumes well formed data.
Racket
Theree files:
ascii-art-parser.rkt
: provides the functionascii-art->struct
, which converts ASCII art from a string (or input port) to a list of word-number, bit range and idascii-art-reader.rkt
: uses this to provide a sytntaxdefine-ascii-art-structure
which defines a structure using the art worktest-ascci-art-reader.rkt
: gives it all a rigourousish going over
ascii-art-parser.rkt
Note that this is in the racket/base
language so it doesn't overburden the modules that import it, especially since they're at the suntax phase.
<lang racket>#lang racket/base
(require (only-in racket/list drop-right)
(only-in racket/string string-trim))
(provide ascii-art->struct)
- reads ascii art from a string or input-port
- returns
- list of (word-number highest-bit lowest-bit name-symbol)
- bits per word
(define (ascii-art->struct art)
(define art-inport (cond [(string? art) (open-input-string art)] [(input-port? art) art] [else (raise-argument-error 'ascii-art->struct "(or/c string? input-port?)" art)])) (define lines (for/list ((l (in-port (lambda (p) (define pk (peek-char p)) (case pk ((#\+ #\|) (read-line p)) (else eof))) art-inport))) l)) (when (null? lines) (error 'ascii-art->struct "no lines")) (define bit-re #px"[|+]([^|+]*)") (define cell-re #px"[|]([^|]*)") (define bit-boundaries (regexp-match-positions* bit-re (car lines))) (define bits/word (sub1 (length bit-boundaries))) (unless (zero? (modulo bits/word 8)) (error 'ascii-art->struct "diagram is not a multiple of 8 bits wide")) (define-values (pos->bit-start# pos->bit-end#) (for/fold ((s# (hash)) (e# (hash))) ((box (in-range bits/word)) (boundary (in-list bit-boundaries))) (define bit (- bits/word box 1)) (values (hash-set s# (car boundary) bit) (hash-set e# (cdr boundary) bit)))) (define fields (apply append (for/list ((line-number (in-naturals)) (line (in-list lines)) #:when (odd? line-number)) (define word (quotient line-number 2)) (define cell-positions (regexp-match-positions* cell-re line)) (define cell-contents (regexp-match* cell-re line)) (for/list ((cp (in-list (drop-right cell-positions 1))) (cnt (in-list cell-contents))) (define cell-start-bit (hash-ref pos->bit-start# (car cp))) (define cell-end-bit (hash-ref pos->bit-end# (cdr cp))) (list word cell-start-bit cell-end-bit (string->symbol (string-trim (substring cnt 1)))))))) (values fields bits/word))</lang>
ascii-art-reader.rkt
<lang racket>#lang racket
(require (for-syntax "ascii-art-parser.rkt"))
(require (for-syntax racket/syntax))
(provide (all-defined-out))
(define-syntax (define-ascii-art-structure stx)
(syntax-case stx () [(_ id art) (let*-values (((all-fields bits/word) (ascii-art->struct (syntax-e #'art)))) (with-syntax ((bytes->id (format-id stx "bytes->~a" #'id)) (id->bytes (format-id stx "~a->bytes" #'id)) (word-size (add1 (car (for/last ((f all-fields)) f)))) (fld-ids (map cadddr all-fields)) (fld-setters (cons #'id (for/list ((fld (in-list all-fields))) (let* ((bytes/word (quotient bits/word 8)) (start-byte (let ((word-no (car fld))) (* word-no bytes/word)))) `(bitwise-bit-field (integer-bytes->integer bs #f (system-big-endian?) ,start-byte ,(+ start-byte bytes/word)) ,(caddr fld) ,(add1 (cadr fld))))))) (set-fields-bits (list* 'begin (for/list ((fld (in-list all-fields))) (define val (cadddr fld)) (define start-bit (cadr fld)) (define end-bit (caddr fld)) (define start-byte (let ((word-no (car fld))) (* word-no (quotient bits/word 8)))) (define fld-bit-width (- start-bit end-bit -1)) (define aligned?/width (and (= end-bit 0) (= (modulo start-bit 8) 7) (quotient fld-bit-width 8))) (case aligned?/width [(2 4) `(integer->integer-bytes ,val ,aligned?/width #f (system-big-endian?) rv ,start-byte)] [else (define the-byte (+ start-byte (quotient end-bit 8))) `(bytes-set! rv ,the-byte (bitwise-ior (arithmetic-shift (bitwise-bit-field ,val 0 ,fld-bit-width) ,(modulo end-bit 8)) (bytes-ref rv ,the-byte)))]))))) #`(begin (struct id fld-ids #:mutable) (define (bytes->id bs) fld-setters) (define (id->bytes art-in) (match-define (id #,@#'fld-ids) art-in) (define rv (make-bytes (* word-size #,(quotient bits/word 8)))) set-fields-bits rv))))]))</lang>
test-ascii-art-reader.rkt
<lang racket>#lang racket
(require "ascii-art-reader.rkt")
(require "ascii-art-parser.rkt")
(require tests/eli-tester)
(define rfc-1035-header-art
#<<EOS
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ID | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |QR| Opcode |AA|TC|RD|RA| Z | RCODE | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | QDCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ANCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | NSCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ARCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ EOS
)
(define-values (rslt rslt-b/w) (ascii-art->struct rfc-1035-header-art))
(test
rslt-b/w => 16 rslt => '((0 15 0 ID) (1 15 15 QR) (1 14 11 Opcode) (1 10 10 AA) (1 9 9 TC) (1 8 8 RD) (1 7 7 RA) (1 6 4 Z) (1 3 0 RCODE) (2 15 0 QDCOUNT) (3 15 0 ANCOUNT) (4 15 0 NSCOUNT) (5 15 0 ARCOUNT)))
(define-ascii-art-structure rfc-1035-header #<<EOS +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ID | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ |QR| Opcode |AA|TC|RD|RA| Z | RCODE | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | QDCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ANCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | NSCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ARCOUNT | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ EOS
)
(define h-bytes
(bytes-append (integer->integer-bytes #x1234 2 #f) (integer->integer-bytes #x5678 2 #f) (integer->integer-bytes #x9abc 2 #f) (integer->integer-bytes #xdef0 2 #f) (integer->integer-bytes #xfedc 2 #f) (integer->integer-bytes #xba98 2 #f)))
(define h-bytes~
(bytes-append (integer->integer-bytes #x1234 2 #f (not (system-big-endian?))) (integer->integer-bytes #x5678 2 #f (not (system-big-endian?))) (integer->integer-bytes #x9abc 2 #f (not (system-big-endian?))) (integer->integer-bytes #xdef0 2 #f (not (system-big-endian?))) (integer->integer-bytes #xfedc 2 #f (not (system-big-endian?))) (integer->integer-bytes #xba98 2 #f (not (system-big-endian?)))))
(define h (bytes->rfc-1035-header h-bytes)) (define bytes-h (rfc-1035-header->bytes h))
(define h~ (bytes->rfc-1035-header h-bytes~)) (define bytes-h~ (rfc-1035-header->bytes h~))
(test
(rfc-1035-header-ID h) => #x1234 (rfc-1035-header-ARCOUNT h) => #xBA98 (rfc-1035-header-RCODE h) => 8 (rfc-1035-header-ID h~) => #x3412 (rfc-1035-header-ARCOUNT h~) => #x98BA (rfc-1035-header-RCODE h~) => 6 h-bytes => bytes-h h-bytes~ => bytes-h~)
(set-rfc-1035-header-RA! h 0)
(set-rfc-1035-header-Z! h 7) (test
(rfc-1035-header-Z (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 7 (rfc-1035-header-RA (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 0)
(set-rfc-1035-header-Z! h 15) ;; naughty -- might splat RA (test
(rfc-1035-header-Z (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 7 (rfc-1035-header-RA (bytes->rfc-1035-header (rfc-1035-header->bytes h))) => 0)</lang>
- Output:
Nothing much to see... all tests pass