GSTrans string conversion
You are encouraged to solve this task according to the task description, using any language you may know.
GSTrans string encoding is a method of encoding all 8-bit character values 0-255 with only printable characters. It originates on Acorn computers to allow command line commands to process non-printable characters.
Character Encoding 0-31 |letter eg |@, |A, |i |[ etc. 32-126 character, except for: " |" | || 127 |? 128-255 |! followed by encoding, eg |!|@ = 128
A string can be surrounded in quotes, eg "ALERT|G".
See http://www.riscos.com/support/developers/prm/conversions.html
Examples:
|LHello|G|J|M encodes CHR$12;"Hello";CHR$7;CHR$10;CHR$13 "|m|j|@|e|!t|m|!|?" encodes 13,10,0,5,244,13,255
- Task
- Write two functions, one to encode a string of characters into a GSTrans string, and one to decode a GSTrans string. Indicate if any error checking is done, and how it is indicated.
BBC BASIC
10 REM > GSTrans.bbc
20 REM GSTrans in BASIC
30 REM J.G.Harston
40 :
50 REPEAT
60 INPUT LINE "GSstring: "A$
70 A$=FNGS_Decode(A$,0)
80 A$=FNGS_Encode(A$)
90 PRINT A$
100 UNTIL FALSE
110 END
120 :
130 :
140 :
150 REM Decode a GSTrans string
160 REM On entry: inp$=GSTransed string
170 REM flg%=0 - parse whole string, *KEY style
180 REM =1 - parse until space, filename style (not implemented)
190 REM Returns: decoded string
200 DEFFNGS_Decode(inp$,flg%)
210 LOCAL out$,byte%,set%
220 IF LEFT$(inp$,1)=" ":REPEAT:inp$=MID$(inp$,2):UNTIL LEFT$(inp$,1)<>" "
230 IF LEFT$(inp$,1)="""":IF RIGHT$(inp$,1)="""":inp$=MID$(inp$,2,LENinp$-2)
240 IF inp$="":=""
250 REPEAT
260 byte%=-1:set%=0
270 IF LEFT$(inp$,2)="|!":set%=128:inp$=MID$(inp$,3)
280 IF LEFT$(inp$,1)="|":byte%=ASCMID$(inp$,2,1)AND31
290 IF LEFT$(inp$,2)="||":byte%=ASC"|"
300 IF LEFT$(inp$,2)="|?":byte%=127
310 IF LEFT$(inp$,2)="|""":byte%=34
320 IF LEFT$(inp$,2)="""""":byte%=34
330 IF byte%<0:byte%=ASC(inp$):inp$=MID$(inp$,2) ELSE inp$=MID$(inp$,3)
340 out$=out$+CHR$(set%+byte%)
350 UNTIL inp$=""
360 =out$
370 :
380 REM Encode into a GSTrans string
390 REM On entry: inp$=raw string
400 REM Returns: GSTrans string
410 DEFFNGS_Encode(inp$)
420 LOCAL out$,byte%
430 IF inp$="":=""""""
440 REPEAT
450 byte%=ASC(inp$):inp$=MID$(inp$,2)
460 IF byte%>127:out$=out$+"|!":byte%=byte% AND 127
470 IF byte%>31 AND byte%<>ASC"""" AND byte%<>ASC"|" AND byte%<>127:out$=out$+CHR$(byte%)
480 IF byte%<32:out$=out$+"|"+CHR$(byte%+64)
490 IF byte%=ASC"""":out$=out$+""""""
500 IF byte%=ASC"|":out$=out$+"||"
510 IF byte%=127:out$=out$+"|?"
520 UNTIL inp$=""
530 =""""+out$+""""
540 :
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance |4 is decoded as 4.
Julia
"""
ASCII code Symbols used
0 |@
1 - 26 |letter eg |A (or |a) = ASCII 1, |M (or |m) = ASCII 13
27 |[ or |{
28 |\
29 |] or |}
30 |^ or |~
31 |_ or |' (grave accent)
32 - 126 keyboard character, except for:
" |"
| ||
< |<
127 |?
128 - 255 |!coded symbol eg ASCII 128 = |!|@ ASCII 129 = |!|A
See also www.riscos.com/support/developers/prm/conversions.html
"""
"""
GSTrans_encode(arr::Vector{Char})
To avoid Unicode multibyte glitches, handle as vector of Chars, but we throw
an assertion error if any are multibyte (so, 0 <= integer value of char <= 255).
"""
function GSTrans_encode(arr::Vector{Char})
function GSTChar_encode(c::Char)
i = Int(c)
@assert 0 <= i <= 255 "Char value of $c, $i, is out of range"
resultchars = Char[]
if 0 <= i <= 31
push!(resultchars, '|', Char(64 + i))
elseif c == '"'
push!(resultchars, '|', '"')
elseif c == '|'
push!(resultchars, '|', '|')
elseif i == 127
push!(resultchars, '|', '?')
elseif 128 <= i <= 255 # |! then recurse after subtracting 128
push!(resultchars, '|', '!', GSTChar_encode(Char(i - 128))...)
else
push!(resultchars, c)
end
return resultchars
end
return String(mapreduce(GSTChar_encode, vcat, arr, init = Char[]))
end
"""
GSTrans_encode(str::AbstractString)
Encode after converting a potentially Unicode string to codeunit bytes and
then to a vector of ascii Chars, then pass this to encoding routine for the vector
"""
GSTrans_encode(str::AbstractString) = GSTrans_encode(Char.(transcode(UInt8, str)))
function GSTrans_decode(str::AbstractString)
result = UInt8[]
gotbar, gotbang, bangadd = false, false, 0
for c in str
if gotbang
if c == '|'
bangadd = 128
gotbar = true
else
push!(result, Char(Int(c) + 128))
end
gotbang = false
elseif gotbar
if c == '?'
push!(result, Char(127 + bangadd))
elseif c == '!'
gotbang = true
elseif c == '|' || c == '"' || c == '<'
push!(result, Char(Int(c) + bangadd))
elseif c == '[' || c == '{'
push!(result, Char(27 + bangadd))
elseif c == '\\'
push!(result, Char(28 + bangadd))
elseif c == ']' || c == '}'
push!(result, Char(29 + bangadd))
elseif c == '^' || c == '~'
push!(result, Char(30 + bangadd))
elseif c == '_' || c == '`'
push!(result, Char(31 + bangadd))
else
push!(result, Char(Int(uppercase(c)) - 64 + bangadd))
end
gotbar, bangadd = false, 0
elseif c == '|'
gotbar = true
else
push!(result, Char(c))
end
end
return String(result)
end
const TESTS = ["ALERT|G", "wert↑"]
const RAND_TESTS = [String(Char.(rand(0:255, 10))) for _ in 1:8]
const DECODE_TESTS = ["|LHello|G|J|M", "|m|j|@|e|!t|m|!|?"]
for t in [TESTS; RAND_TESTS]
encoded = GSTrans_encode(t)
decoded = GSTrans_decode(encoded)
println("String $t encoded is: $encoded, decoded is: $decoded.")
@assert t == decoded
end
for enc in DECODE_TESTS
print("Encoded string $enc decoded is: ")
display(GSTrans_decode(enc))
end
- Output:
String ALERT|G encoded is: ALERT||G, decoded is: ALERT|G. String wert↑ encoded is: wert|!b|!|F|!|Q, decoded is: wert↑. String @♂aN°$ª7Î encoded is: @|KaN|!B|!0$|!B|!*7|!B|!|R|!C|!|N, decoded is: @♂aN°$ª7Î. String ÙC▼æÔt6¤☻Ì encoded is: |!C|!|YC|_|!C|!&|!C|!|Tt6|!B|!$|B|!C|!|L, decoded is: ÙC▼æÔt6¤☻Ì. String "@)Ð♠qhýÌÿ encoded is: |"@)|!C|!|P|Fqh|!C|!=|!C|!|L|!C|!?, decoded is: "@)Ð♠qhýÌÿ. String +☻#o9$u♠©A encoded is: +|B#o9$u|F|!B|!)A, decoded is: +☻#o9$u♠©A. String ♣àlæi6Ú.é encoded is: |E|!C|! l|!B|!|K|!C|!&i6|!C|!|Z.|!C|!), decoded is: ♣àlæi6Ú.é. String ÏÔ♀È♥@ë encoded is: |!C|!|O|!C|!|T|!B|!|[|Lj|!C|!|H|C@|!B|!|I|!C|!+, decoded is: ÏÔ♀È♥@ë. String Rç÷%◄MZûhZ encoded is: R|!C|!'|!C|!7%|QMZ|!C|!;hZ, decoded is: Rç÷%◄MZûhZ. String ç>¾AôVâ♫↓P encoded is: |!C|!'>|!B|!>A|!C|!4V|!C|!|"|N|YP, decoded is: ç>¾AôVâ♫↓P. Encoded string |LHello|G|J|M decoded is: "\fHello\a\n\r" Encoded string |m|j|@|e|!t|m|!|? decoded is: "\r\n\0\x05\xf4\r\xff"
Wren
Strings in Wren are just an immutable array of bytes. They are usually interpreted as UTF-8 but don't have to be. Unicode characters in the example Julia strings are therefore encoded using their constituent UTF-8 bytes which decodes fine but may not give the same encoding as Julia itself.
If an invalid byte (following the "|" flag) is encountered whilst decoding, it is decoded as if the flag were not present.
Where strings contain control characters, their decoded version is printed to the terminal as a byte list.
class GSTrans {
static encode(s, upper) {
if (!(s is String && s.count > 0)) Fiber.abort("Argument must be a non-empty string.")
// remove any outer quotation marks
if (s.count > 1 && s[0] == "\"" && s[-1] == "\"") s = s[1..-2]
// helper function to encode bytes < 128
var f = Fn.new { |b|
if (b >= 1 && b <= 26) {
return "|" + (upper ? String.fromByte(b + 64) : String.fromByte(b + 96))
} else if (b < 32) {
return "|" + String.fromByte(b + 64)
} else if (b == 34) { // quotation mark
return "|\""
} else if (b == 60) { // less than
return "|<"
} else if (b == 124) { // vertical bar
return "||"
} else if (b == 127) { // DEL
return "|?"
} else {
return String.fromByte(b)
}
}
var enc = ""
// iterate through the string's bytes encoding as we go
for (b in s.bytes) {
if (b < 128) {
enc = enc + f.call(b)
} else {
enc = enc + "|!" + f.call(b - 128)
}
}
return enc
}
static decode(s) {
if (!(s is String && s.count > 0)) Fiber.abort("Argument must be a non-empty string.")
// remove any outer quotation marks
if (s.count > 1 && s[0] == "\"" && s[-1] == "\"") s = s[1..-2]
// helper function for decoding bytes after "|"
var f = Fn.new { |b|
if (b == 34) { // quotation mark
return 34
} else if (b == 60) { // less than
return 60
} else if (b == 63) { // question mark
return 127
} else if (b >= 64 && b < 96) { // @ + upper case letter + [\]^_
return b - 64
} else if (b == 96) { // grave accent
return 31
} else if (b == 124) { // vertical bar
return 124
} else if (b >= 97 && b < 127) { // lower case letter + {}~
return b - 96
} else {
return b
}
}
var bytes = s.bytes.toList
var bc = bytes.count
var i = 0
var dec = ""
// iterate through the string's bytes decoding as we go
while (i < bc) {
if (bytes[i] != 124) {
dec = dec + String.fromByte(bytes[i])
i = i + 1
} else {
if (i < bc - 1 && bytes[i+1] != 33) {
dec = dec + String.fromByte(f.call(bytes[i+1]))
i = i + 2
} else {
if (i < bc - 2 && bytes[i+2] != 124) {
dec = dec + String.fromByte(128 + bytes[i+2])
i = i + 3
} else if (i < bc - 3 && bytes[i+2] == 124) {
dec = dec + String.fromByte(128 + f.call(bytes[i+3]))
i = i + 4
} else {
i = i + 1
}
}
}
}
return dec
}
}
var strings = [
"\fHello\a\n\r",
"\r\n\0\x05\xf4\r\xff"
]
var texts = [
"""\fHello\a\n\r""",
"""\r\n\0\x05\xf4\r\xff"""
]
var uppers = [true, false]
for (i in 0...strings.count) {
var s = strings[i]
var t = "\"" + texts[i] +"\""
var u = uppers[i]
var enc = GSTrans.encode(s, u)
var dec = GSTrans.decode(enc)
System.print("string: %(t)")
System.print("encoded (%(u ? "upper" : "lower")) : %(enc)")
System.print("decoded (as byte list): %(dec.bytes.toList)")
System.print("string == decoded ? %(dec == s)\n")
}
var jstrings = [
"ALERT|G",
"wert↑",
"@♂aN°$ª7Î",
"ÙC▼æÔt6¤☻Ì",
"\"@)Ð♠qhýÌÿ",
"+☻#o9$u♠©A",
"♣àlæi6Ú.é",
"ÏÔ♀È♥@ë",
"Rç÷\%◄MZûhZ",
"ç>¾AôVâ♫↓P"
]
System.print("Julia strings: string -> encoded (upper) <- decoded (same or different)\n")
for (s in jstrings) {
var enc = GSTrans.encode(s, true)
var dec = GSTrans.decode(enc)
var same = (s == dec)
System.print(" %(s) -> %(enc) <- %(dec) (%(same ? "same" : "different"))")
}
- Output:
string: "\fHello\a\n\r" encoded (upper) : |LHello|G|J|M decoded (as byte list): [12, 72, 101, 108, 108, 111, 7, 10, 13] string == decoded ? true string: "\r\n\0\x05\xf4\r\xff" encoded (lower) : |m|j|@|e|!t|m|!|? decoded (as byte list): [13, 10, 0, 5, 244, 13, 255] string == decoded ? true Julia strings: string -> encoded (upper) <- decoded (same or different) ALERT|G -> ALERT||G <- ALERT|G (same) wert↑ -> wert|!b|!|F|!|Q <- wert↑ (same) @♂aN°$ª7Î -> @|!b|!|Y|!|BaN|!B|!0$|!B|!*7|!C|!|N <- @♂aN°$ª7Î (same) ÙC▼æÔt6¤☻Ì -> |!C|!|YC|!b|!|V|!|<|!C|!&|!C|!|Tt6|!B|!$|!b|!|X|!;|!C|!|L <- ÙC▼æÔt6¤☻Ì (same) "@)Ð♠qhýÌÿ -> |"@)|!C|!|P|!b|!|Y|! qh|!C|!=|!C|!|L|!C|!? <- "@)Ð♠qhýÌÿ (same) +☻#o9$u♠©A -> +|!b|!|X|!;#o9$u|!b|!|Y|! |!B|!)A <- +☻#o9$u♠©A (same) ♣àlæi6Ú.é -> |!b|!|Y|!#|!C|! l|!C|!&i6|!C|!|Z.|!C|!) <- ♣àlæi6Ú.é (same) ÏÔ♀È♥@ë -> |!C|!|O|!C|!|T|!b|!|Y|!|@|!C|!|H|!b|!|Y|!%@|!C|!+ <- ÏÔ♀È♥@ë (same) Rç÷%◄MZûhZ -> R|!C|!'|!C|!7%|!b|!|W|!|DMZ|!C|!;hZ <- Rç÷%◄MZûhZ (same) ç>¾AôVâ♫↓P -> |!C|!'>|!B|!>A|!C|!4V|!C|!|"|!b|!|Y|!+|!b|!|F|!|SP <- ç>¾AôVâ♫↓P (same)