User:Eriksiers/BBC BASIC detokenizer: Difference between revisions

m
updated lang tag
m (added licensing section)
m (updated lang tag)
 
(4 intermediate revisions by the same user not shown)
Line 1:
This is to convert tokenized [[:Category:BBC BASIC|BBC BASICfor Windows]] files to a human-readable (plain text) format. Note that this will ''probably'' not handle tokenized files from other versions of [[:Category:BBC BASIC|BBC BASIC]] -- specifically, I know that the RISC OS version uses a different format (see [http://bb4w.wikispaces.com/Notes+for+RISC+OS+users this page] and [http://bb4w.wikispaces.com/Format this page] at the BB4W wiki).
 
Two pages that may be of interest at the BB4W wiki are [http://bb4w.wikispaces.com/Tokeniser Tokeniser] and [http://bb4w.wikispaces.com/Detokeniser Detokeniser], both of which discuss this topic using BBC BASIC code.
There are actually two parts to this. The first is [[:Category:VBA|VBA]]/[[wp:Microsoft Excel|Excel]], and the second is [[QuickBASIC#QBasic|QBasic]].
 
I believe that the previous issues with line numbers as the targets of GOTO and the like has been fixed, using a QB translation of code on the above-mentioned Detokeniser page at the BB4W wiki. (View the page history for details, if you're interested.)
(If you're only interested in the actual detokenizing code, and not how it's generated, skip down to the [[#QBasic|QBasic]] section.)
 
The [[:Category:VBA|VBA]] code that generates the below program has been moved to [[User:Eriksiers/BBC BASIC detokenizer/Generator|its own page]].
 
==Licensing==
All source code on this page is Copyright ©2012 Erik Siers. The sources are dual-licensed under the terms of the following licenses:
* The [http://www.gnu.org/licenses/old-licenses/gpl-2.0.html GNU General Public License (GNU GPL) v2] or, at your option, any later version.
* The [http://www.gnu.org/licenses/fdl-1.2.html GNU Free Documentation License (GNU FDL) v1.2] or, at your option, any later version.
The two licenses are not 100% compatible, so; which license you use, I leave up to you.
 
===Exception===
Any BBC BASIC source code that is processedgenerated by the belowgenerated QBasic program programs('''only''') is specifically '''excluded''' from the above licensing terms, and is instead covered by whatever license(s) cover the original, tokenized sources.
 
=Detoken.bas=
=VBA/Excel=
 
This code uses data contained in an Excel spreadsheet to generate the QBasic code below. (I'm not including the actual data, but it can be easily reconstructed from the QBasic code.)
 
<lang vb>Sub buildBas()
Open ActiveWorkbook.Path & "\detoken.bas" For Output As 1
For L0 = 2 To Cells.SpecialCells(xlCellTypeLastCell).Row
If Len(Cells(L0, 2).Value) Then
tmp = tmp + 1
If (tmp Mod 10) = 1 Then Print #1, "DATA ";
Print #1, LTrim$(Val("&h" & Cells(L0, 1).Value)); ","""; Cells(L0, 2).Value; """";
If (tmp Mod 10) = 0 Then
Print #1,
Else
Print #1, ",";
End If
End If
Next
Print #1, "0": Print #1,
Print #1, "DIM linenum AS INTEGER, indentLevel AS INTEGER, indentPending AS INTEGER"
Print #1, "DIM encoded AS STRING * 1"
Print #1, "DIM keywords(255) AS STRING"
Print #1, "DO"
Print #1, " READ t%"
Print #1, " IF 0 = t% THEN EXIT DO"
Print #1, " READ keywords(t%)"
Print #1, "LOOP"
Print #1, "nm$ = ENVIRON$(""DETOKEN"")"
Print #1, "OPEN nm$ + "".bbc"" FOR BINARY AS 2"
Print #1, "OPEN nm$ + "".bas"" FOR OUTPUT AS 3"
Print #1, "'skip first char"
Print #1, "GET #2, , encoded"
Print #1, "GET #2, , linenum"
Print #1, "IF -1 = linenum THEN"
Print #1, " CLOSE: SYSTEM"
Print #1, "ELSEIF linenum <> 0 THEN"
Print #1, " PRINT #3, LTRIM$(STR$(linenum)); "" "";"
Print #1, "END IF"
'still haven't figured out the first char...
'Print #1, "PRINT #3, ASC(encoded); CHR$(9);"
Print #1, "DO UNTIL EOF(2)"
Print #1, " GET #2, , encoded"
Print #1, " i% = ASC(encoded)"
Print #1, " SELECT CASE i%"
Print #1, " CASE 0"
Print #1, " 'do nothing"
Print #1, " CASE 13"
Print #1, " PRINT #3,"
Print #1, " 'skip first char of next line"
Print #1, " IF NOT EOF(2) THEN"
Print #1, " GET #2, , encoded"
Print #1, " GET #2, , linenum"
Print #1, " IF -1 = linenum THEN"
Print #1, " CLOSE: SYSTEM"
Print #1, " ELSEIF linenum <> 0 THEN"
Print #1, " PRINT #3, LTRIM$(STR$(linenum)); "" "";"
Print #1, " END IF"
Print #1, " END IF"
Print #1, " CASE ELSE"
Print #1, " IF keywords(i%) <> """" THEN"
Print #1, " PRINT #3, keywords(i%);"
Print #1, " ELSE"
Print #1, " PRINT #3, encoded;"
Print #1, " END IF"
Print #1, " END SELECT"
Print #1, "LOOP"
Print #1, "CLOSE"
Print #1, "SYSTEM"
Close
End Sub</lang>
 
=QBasic=
 
This is the code that actually does the work. I chose QBasic for a number of reasons, the most important of which are:
* interpreter (no compile time)
* simple
* ease of including in [[:Category:Batch File|batch files]]
* relatively portable
* I've used it for around 20 years, so why change now?
 
Note that since I expect the code to run under QBasic, you don't specify the file(s) to be detokenized on the command line, but instead do it one file at a time, via the <ttcode>DETOKEN</ttcode> environment variable. (This can easily be changed, but I don't care to.)
 
<langsyntaxhighlight lang="qbasic">DATA 1,"CIRCLE",2,"ELLIPSE",3,"FILLED",4,"MOUSE",5,"ORIGIN",6,"QUIT",7,"RECTANGLE",8,"SWAP",9,"SYS",10,"TINT"
DATA 11,"WAIT",12,"INSTALL",14,"PRIVATE",15,"BY",16,"EXIT",128,"AND",129,"DIV",130,"EOR",131,"MOD",132,"OR"
DATA 133,"ERROR",134,"LINE",135,"OFF",136,"STEP",137,"SPC",138,"TAB(",139,"ELSE",140,"THEN",142,"OPENIN",143,"PTR"
DATA 144,"PAGE",145,"TIME",146,"LOMEM",147,"HIMEM",148,"ABS",149,"ACS",150,"ADVAL",151,"ASC",152,"ASN",153,"ATN"
DATA 154,"BGET",155,"COS",156,"COUNT",157,"DEG",158,"ERL",159,"ERR",160,"EVAL",161,"EXP",162,"EXT",163,"FALSE"
DATA 164,"FN",165,"GET",166,"INKEY",167,"INSTR(",168,"INT",169,"LEN",170,"LN",171,"LOG",172,"NOT",173,"OPENUP"
DATA 174,"OPENOUT",175,"PI",176,"POINT",177,"POS",178,"RAD",179,"RND",180,"SGN",181,"SIN",182,"SQR",183,"TAN"
DATA 184,"TO",185,"TRUE",186,"USR",187,"VAL",188,"RND",189,"CHR$",190,"GET$",191,"INKEY$",192,"LEFT$",193,"MID$("
DATA 194,"RIGHT$",195,"STR$",196,"STRING",197,"EOF",198,"SUM",199,"WHILE",200,"CASE",201,"WHEN",202,"OF",203,"ENDCASE"
DATA 204,"OTHERWISE",205,"ENDIF",206,"ENDWHILE",207,"PTR",208,"PAGE",209,"TIME",210,"LOMEM",211,"HIMEM",212,"SOUND",213,"BPUT"
DATA 214,"CALL",215,"CHAIN",216,"CLEAR",217,"CLOSE",218,"CLG",219,"CLS",220,"DATA",221,"DEF",222,"DIM",223,"DRAW"
DATA 224,"END",225,"ENDPROC",226,"ENVELOPE",227,"FOR",228,"GOSUB",229,"GOTO",230,"GCOL",231,"IF",232,"INPUT",233,"LET"
DATA 234,"LOCAL",235,"MODE",236,"MOVE",237,"NEXT",238,"ON",239,"VDU",240,"PLOT",241,"PRINT",242,"PROC",243,"READ"
DATA 244,"REM",245,"REPEAT",246,"REPORT",247,"RESTORE",248,"RETURN",249,"RUN",250,"STOP",251,"COLOUR",252,"TRACE",253,"UNTIL"
DATA 254,"WIDTH",255,"OSCLI",0
 
DIM linenumlineNum AS INTEGERLONG, encoded AS STRING * 1
DIM tmp AS LONG, lineNumDec AS LONG
DIM keywords(255) AS STRING
DO
Line 123 ⟶ 54:
'skip first char
GET #2, , encoded
GET #2, , linenumlineNum
SEEK #2, SEEK(2) - 2
IF -1 = linenum THEN
lineNum = (lineNum AND &HFFFF&)
CLOSE: SYSTEM
IF (CHR$(0) = encoded) AND (&HFFFF& = lineNum) THEN
ELSEIF linenum <> 0 THEN
CLOSE : SYSTEM
PRINT #3, LTRIM$(STR$(linenum)); " ";
ELSEIF lineNum <> 0 THEN
PRINT #3, LTRIM$(STR$(lineNum)); " ";
END IF
DO UNTIL EOF(2)
Line 140 ⟶ 73:
IF NOT EOF(2) THEN
GET #2, , encoded
GET #2, , linenumlineNum
IFSEEK -1#2, =SEEK(2) linenum- THEN2
lineNum = (lineNum AND CLOSE: SYSTEM&HFFFF&)
ELSEIFIF linenum(CHR$(0) <>= 0encoded) AND (&HFFFF& = lineNum) THEN
PRINTCLOSE #3,: LTRIM$(STR$(linenum)); " ";SYSTEM
ELSEIF lineNum <> 0 THEN
PRINT #3, LTRIM$(STR$(lineNum)); " ";
END IF
END IF
CASE &H8D
'deal with encoded line numbers (GOTO targets)
IF NOT EOF(2) THEN
GET #2, , encoded
IF NOT EOF(2) THEN
GET #2, , lineNum
SEEK #2, SEEK(2) - 2
lineNum = (lineNum AND &HFFFF&)
ELSE
'should NEVER get here
lineNum = 0
END IF
lo& = ((ASC(encoded) * 4&) AND &HC0) XOR (lineNum AND 255&) 'LEFT SHIFT ASC(encoded), 2
hi& = ((ASC(encoded) * 16&) AND &HC0) XOR ((lineNum \ 256&) AND 255&) 'LEFT SHIFT ASC(encoded), 4
tmp = lo& + (hi& * 256&)
PRINT #3, LTRIM$(RTRIM$(STR$(tmp)));
END IF
CASE ELSE
Line 156 ⟶ 108:
LOOP
CLOSE
SYSTEM</langsyntaxhighlight>
1,150

edits