User:Ed Davis: Difference between revisions

Content added Content deleted
No edit summary
No edit summary
Line 1: Line 1:

{| class="wikitable"
| line 4 col 1 Print  
| line 4 col 6 Lparen  
| line 4 col 7 String "Hello, World!\n"
| line 4 col 24 Rparen  
| line 4 col 25 Semi  
| line 5 col 1 EOI  

Lexical analysis is the process of converting a sequence of characters (such as in a
Lexical analysis is the process of converting a sequence of characters (such as in a
computer program or web page) into a sequence of tokens (strings with an identified
computer program or web page) into a sequence of tokens (strings with an identified
Line 135: Line 123:

{| class="wikitable"
| line || 4 || col || 1 || Print || &nbsp;
line 4 col 1 Print
line 4 col 6 Lparen
| line || 4 || col || 6 || Lparen || &nbsp;
line 4 col 7 String "Hello, World!\n"
line 4 col 24 Rparen
| line || 4 || col || 7 || String || "Hello, World!\n"
line 4 col 25 Semi
line 5 col 1 EOI
| line || 4 || col || 24 || Rparen || &nbsp;
| line || 4 || col || 25 || Semi || &nbsp;
| line || 5 || col || 1 || EOI || &nbsp;

<lang c>
<lang c>
Line 160: Line 144:

{| class="wikitable"
| line || 4 || col || 1 || Ident || phoenix_number
line 4 col 1 Ident phoenix_number
line 4 col 16 Assign
| line || 4 || col || 16 || Assign || &nbsp;
line 4 col 18 Integer 142857
line 4 col 24 Semi
| line || 4 || col || 18 || Integer || 142857
line 5 col 1 Print
line 5 col 6 Lparen
| line || 4 || col || 24 || Semi || &nbsp;
line 5 col 7 Ident phoenix_number
line 5 col 21 Comma
| line || 5 || col || 1 || Print || &nbsp;
line 5 col 23 String "\n"
line 5 col 27 Rparen
| line || 5 || col || 6 || Lparen || &nbsp;
line 5 col 28 Semi
line 6 col 1 EOI
| line || 5 || col || 7 || Ident || phoenix_number

| line || 5 || col || 21 || Comma || &nbsp;
<lang c>
| line || 5 || col || 23 || String || "\n"
All lexical tokens - not syntatically correct, but that will
have to wait until syntax analysis
| line || 5 || col || 27 || Rparen || &nbsp;
| line || 5 || col || 28 || Semi || &nbsp;
/* Print */ print /* Sub */ -
/* Putc */ putc /* Lss */ <
| line || 6 || col || 1 || EOI || &nbsp;
/* If */ if /* Gtr */ >
/* While */ while /* Leq */ <=
/* Lbrace */ { /* Neq */ !=
/* Rbrace */ } /* And */ &&
/* Lparen */ ( /* Semi */ ;
/* Rparen */ ) /* Comma */ ,
/* Uminus */ - /* Assign */ =
/* Mul */ * /* Integer */ 42
/* Div */ / /* String */ "String literal"
/* Add */ + /* Ident */ variable_name
/* character literal */ '\n'
/* character literal */ ' '


line 5 col 15 Print
line 5 col 41 Sub
line 6 col 15 Putc
line 6 col 41 Lss
line 7 col 15 If
line 7 col 41 Gtr
line 8 col 15 While
line 8 col 41 Leq
line 9 col 15 Lbrace
line 9 col 41 Neq
line 10 col 15 Rbrace
line 10 col 41 And
line 11 col 15 Lparen
line 11 col 41 Semi
line 12 col 15 Rparen
line 12 col 41 Comma
line 13 col 15 Sub
line 13 col 41 Assign
line 14 col 15 Mul
line 14 col 41 Integer 42
line 15 col 15 Div
line 15 col 41 String "String literal"
line 16 col 15 Add
line 16 col 41 Ident variable_name
line 17 col 26 Integer 10
line 18 col 26 Integer 32
line 19 col 1 EOI</pre>

Line 244: Line 272:

static FILE *source_fp, *dest_fp;
static FILE *source_fp, *dest_fp;
static int line, col, the_ch;
static int line = 1, col = 0, the_ch = ' ';
da_dim(text, char);
da_dim(text, char);

Line 397: Line 425:
case EOF: return (tok_s){EOI, err_line, err_col, {0}};
case EOF: return (tok_s){EOI, err_line, err_col, {0}};

void init_lex() { /* initialize the scanner */
line = 1;

Line 435: Line 458:
init_io(&source_fp, stdin, "r", argc > 1 ? argv[1] : "");
init_io(&source_fp, stdin, "r", argc > 1 ? argv[1] : "");
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : "");
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : "");

<lang FreeBASIC>
enum Token_type
end enum

const NewLine = chr(10)
const DoubleQuote = chr(34)

' where we store keywords and variables
type Symbol
s_name as string
tok as Token_type
end type

dim shared symtab() as Symbol

dim shared cur_line as string
dim shared cur_ch as string
dim shared line_num as integer
dim shared col_num as integer

function is_digit(byval ch as string) as long
is_digit = (ch <> "") and ch >= "0" and ch <= "9"
end function

function is_alnum(byval ch as string) as long
is_alnum = (ch <> "") and ((UCase(ch) >= "A" and UCase(ch) <= "Z") or (is_digit(ch)))
end function

sub error_msg(byval eline as integer, byval ecol as integer, byval msg as string)
print "("; eline; ":"; ecol; ")"; " "; msg
end sub

' add an identifier to the symbol table
function install(byval s_name as string, byval tok as Token_type) as integer
dim n as integer

n = ubound(symtab)
redim preserve symtab(n + 1)
n = ubound(symtab)

symtab(n).s_name = s_name
symtab(n).tok = tok
return n
end function

' search for an identifier in the symbol table
function lookup(byval s_name as string) as integer
dim i as integer

for i = lbound(symtab) to ubound(symtab)
if symtab(i).s_name = s_name then return i
return -1
end function

sub next_line() ' read the next line of input from the source file
cur_line = ""
cur_ch = "" ' empty cur_ch means end-of-file
if eof(1) then exit sub
line input #1, cur_line
cur_line = cur_line + NewLine
line_num += + 1
col_num = 1
end sub

sub next_char() ' get the next char
cur_ch = ""
col_num += 1
if col_num > len(cur_line) then next_line()
if col_num <= len(cur_line) then cur_ch = mid(cur_line, col_num, 1)
end sub

function follow(byval err_line as integer, byval err_col as integer, byval expect as string, byval ifyes as Token_type, byval ifno as Token_type) as Token_type
if cur_ch = expect then
return ifyes
end if
if ifno = tk_eoi then error_msg(err_line, err_col, "follow unrecognized character: " + cur_ch)
return ifno
end function

sub gettok(byref err_line as integer, byref err_col as integer, byref tok as Token_type, byref v as string)
' skip whitespace
do while (cur_ch = " " or cur_ch = chr(9) or cur_ch = NewLine) and (cur_ch <> "")

err_line = line_num
err_col = col_num

select case cur_ch
case "": tok = tk_eoi: exit sub
case "{": tok = tk_lbrace: next_char(): exit sub
case "}": tok = tk_rbrace: next_char(): exit sub
case "(": tok = tk_lparen: next_char(): exit sub
case ")": tok = tk_rparen: next_char(): exit sub
case "+": tok = tk_add: next_char(): exit sub
case "-": tok = tk_sub: next_char(): exit sub
case "*": tok = tk_mul: next_char(): exit sub
case ";": tok = tk_semi: next_char(): exit sub
case ",": tok = tk_comma: next_char(): exit sub
case ">": tok = tk_gtr: next_char(): exit sub
case "=": tok = tk_assign: next_char(): exit sub
case "/": ' div or comment
if cur_ch <> "*" then
tok = tk_div
exit sub
end if
' skip comments
if cur_ch = "*" or cur_ch = "" then
if cur_ch = "/" or cur_ch = "" then
gettok(err_line, err_col, tok, v)
exit sub
end if
end if
case "'": ' single char literals
v = str(Asc(cur_ch))
if cur_ch = "'" then error_msg(err_line, err_col, "empty character constant")
if cur_ch = "\" then
if cur_ch = "n" then
v = "10"
elseif cur_ch = "\" then
v = Str(Asc("\"))
else error_msg(err_line, err_col, "unknown escape sequence: " + cur_ch)
end if
end if
if cur_ch <> "'" then error_msg(err_line, err_col, "multi-character constant")
tok = tk_integer
exit sub
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_EOI): exit sub
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub
case DoubleQuote: ' string
v = cur_ch
do while cur_ch <> DoubleQuote
if cur_ch = NewLine then error_msg(err_line, err_col, "EOL in string")
if cur_ch = "" then error_msg(err_line, err_col, "EOF in string")
v += cur_ch
v += cur_ch
tok = tk_string
exit sub
case else ' integers or identifiers
dim is_number as boolean = is_digit(cur_ch)
v = ""
do while is_alnum(cur_ch) orelse cur_ch = "_"
if not is_digit(cur_ch) then is_number = false
v += cur_ch
if len(v) = 0 then error_msg(err_line, err_col, "unknown character: " + cur_ch)
if is_digit(mid(v, 1, 1)) then
if not is_number then error_msg(err_line, err_col, "invalid number: " + v)
tok = tk_integer
exit sub
end if
dim as integer index = lookup(v)
if index = -1 then
tok = tk_ident
tok = symtab(index).tok
end if
exit sub
end select
end sub

sub init_lex(byval filein as string)
install("if", tk_if)
install("print", tk_print)
install("putc", tk_putc)
install("while", tk_while)

open filein for input as #1

cur_line = ""
line_num = 0
col_num = 0
end sub

sub scanner()
dim err_line as integer
dim err_col as integer
dim tok as Token_type
dim v as string
dim tok_list(tk_eoi to tk_ident) as string

tok_list(tk_eoi ) = "EOI"
tok_list(tk_print ) = "Print"
tok_list(tk_putc ) = "Putc"
tok_list(tk_if ) = "If"
tok_list(tk_while ) = "While"
tok_list(tk_lbrace ) = "Lbrace"
tok_list(tk_rbrace ) = "Rbrace"
tok_list(tk_lparen ) = "Lparen"
tok_list(tk_rparen ) = "Rparen"
tok_list(tk_uminus ) = "Uminus"
tok_list(tk_mul ) = "Mul"
tok_list(tk_div ) = "Div"
tok_list(tk_add ) = "Add"
tok_list(tk_sub ) = "Sub"
tok_list(tk_lss ) = "Lss"
tok_list(tk_gtr ) = "Gtr"
tok_list(tk_leq ) = "Leq"
tok_list(tk_neq ) = "Neq"
tok_list(tk_and ) = "And"
tok_list(tk_semi ) = "Semi"
tok_list(tk_comma ) = "Comma"
tok_list(tk_assign ) = "Assign"
tok_list(tk_integer ) = "Integer"
tok_list(tk_string ) = "String"
tok_list(tk_ident ) = "Ident"

gettok(err_line, err_col, tok, v)
print using "line ##### col ##### \ \"; err_line; err_col; tok_list(tok);
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v;
loop until tok = tk_eoi
end sub

sub main()
if command(1) = "" then print "filename required" : system
end sub
