User:Ed Davis: Difference between revisions
Content added Content deleted
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
{| class="wikitable" |
|||
|- |
|||
| line 4 col 1 Print |
|||
| line 4 col 6 Lparen |
|||
| line 4 col 7 String "Hello, World!\n" |
|||
| line 4 col 24 Rparen |
|||
| line 4 col 25 Semi |
|||
| line 5 col 1 EOI |
|||
|} |
|||
Lexical analysis is the process of converting a sequence of characters (such as in a |
Lexical analysis is the process of converting a sequence of characters (such as in a |
||
computer program or web page) into a sequence of tokens (strings with an identified |
computer program or web page) into a sequence of tokens (strings with an identified |
||
Line 135: | Line 123: | ||
;Output |
;Output |
||
<b> |
|||
{| class="wikitable" |
|||
<pre> |
|||
|- |
|||
line 4 col 1 Print |
|||
line 4 col 6 Lparen |
|||
|- |
|||
line 4 col 7 String "Hello, World!\n" |
|||
line 4 col 24 Rparen |
|||
|- |
|||
line 4 col 25 Semi |
|||
line 5 col 1 EOI |
|||
|- |
|||
</pre> |
|||
| line || 4 || col || 24 || Rparen || |
|||
</b> |
|||
|- |
|||
| line || 4 || col || 25 || Semi || |
|||
|- |
|||
| line || 5 || col || 1 || EOI || |
|||
|} |
|||
<lang c> |
<lang c> |
||
Line 160: | Line 144: | ||
;Output |
;Output |
||
<b> |
|||
{| class="wikitable" |
|||
<pre> |
|||
|- |
|||
line 4 col 1 Ident phoenix_number |
|||
line 4 col 16 Assign |
|||
|- |
|||
line 4 col 18 Integer 142857 |
|||
line 4 col 24 Semi |
|||
|- |
|||
line 5 col 1 Print |
|||
line 5 col 6 Lparen |
|||
|- |
|||
line 5 col 7 Ident phoenix_number |
|||
line 5 col 21 Comma |
|||
|- |
|||
line 5 col 23 String "\n" |
|||
line 5 col 27 Rparen |
|||
|- |
|||
line 5 col 28 Semi |
|||
line 6 col 1 EOI |
|||
|- |
|||
</pre> |
|||
| line || 5 || col || 7 || Ident || phoenix_number |
|||
</b> |
|||
|- |
|||
| line || 5 || col || 21 || Comma || |
|||
<lang c> |
|||
|- |
|||
/* |
|||
| line || 5 || col || 23 || String || "\n" |
|||
All lexical tokens - not syntatically correct, but that will |
|||
|- |
|||
have to wait until syntax analysis |
|||
| line || 5 || col || 27 || Rparen || |
|||
*/ |
|||
|- |
|||
/* Print */ print /* Sub */ - |
|||
/* Putc */ putc /* Lss */ < |
|||
|- |
|||
/* If */ if /* Gtr */ > |
|||
/* While */ while /* Leq */ <= |
|||
|} |
|||
/* Lbrace */ { /* Neq */ != |
|||
/* Rbrace */ } /* And */ && |
|||
/* Lparen */ ( /* Semi */ ; |
|||
/* Rparen */ ) /* Comma */ , |
|||
/* Uminus */ - /* Assign */ = |
|||
/* Mul */ * /* Integer */ 42 |
|||
/* Div */ / /* String */ "String literal" |
|||
/* Add */ + /* Ident */ variable_name |
|||
/* character literal */ '\n' |
|||
/* character literal */ ' ' |
|||
</lang> |
|||
;Output |
|||
<b> |
|||
<pre> |
|||
line 5 col 15 Print |
|||
line 5 col 41 Sub |
|||
line 6 col 15 Putc |
|||
line 6 col 41 Lss |
|||
line 7 col 15 If |
|||
line 7 col 41 Gtr |
|||
line 8 col 15 While |
|||
line 8 col 41 Leq |
|||
line 9 col 15 Lbrace |
|||
line 9 col 41 Neq |
|||
line 10 col 15 Rbrace |
|||
line 10 col 41 And |
|||
line 11 col 15 Lparen |
|||
line 11 col 41 Semi |
|||
line 12 col 15 Rparen |
|||
line 12 col 41 Comma |
|||
line 13 col 15 Sub |
|||
line 13 col 41 Assign |
|||
line 14 col 15 Mul |
|||
line 14 col 41 Integer 42 |
|||
line 15 col 15 Div |
|||
line 15 col 41 String "String literal" |
|||
line 16 col 15 Add |
|||
line 16 col 41 Ident variable_name |
|||
line 17 col 26 Integer 10 |
|||
line 18 col 26 Integer 32 |
|||
line 19 col 1 EOI</pre> |
|||
</b> |
|||
;Diagnostics |
;Diagnostics |
||
Line 244: | Line 272: | ||
static FILE *source_fp, *dest_fp; |
static FILE *source_fp, *dest_fp; |
||
static int line, col, the_ch; |
static int line = 1, col = 0, the_ch = ' '; |
||
da_dim(text, char); |
da_dim(text, char); |
||
Line 397: | Line 425: | ||
case EOF: return (tok_s){EOI, err_line, err_col, {0}}; |
case EOF: return (tok_s){EOI, err_line, err_col, {0}}; |
||
} |
} |
||
} |
|||
void init_lex() { /* initialize the scanner */ |
|||
line = 1; |
|||
read_ch(); |
|||
} |
} |
||
Line 435: | Line 458: | ||
init_io(&source_fp, stdin, "r", argc > 1 ? argv[1] : ""); |
init_io(&source_fp, stdin, "r", argc > 1 ? argv[1] : ""); |
||
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : ""); |
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : ""); |
||
init_lex(); |
|||
run(); |
run(); |
||
} |
} |
||
</lang> |
|||
=={{header|FreeBASIC}}== |
|||
<lang FreeBASIC> |
|||
enum Token_type |
|||
tk_eoi |
|||
tk_print |
|||
tk_putc |
|||
tk_if |
|||
tk_while |
|||
tk_lbrace |
|||
tk_rbrace |
|||
tk_lparen |
|||
tk_rparen |
|||
tk_uminus |
|||
tk_mul |
|||
tk_div |
|||
tk_add |
|||
tk_sub |
|||
tk_lss |
|||
tk_gtr |
|||
tk_leq |
|||
tk_neq |
|||
tk_and |
|||
tk_semi |
|||
tk_comma |
|||
tk_assign |
|||
tk_integer |
|||
tk_string |
|||
tk_ident |
|||
end enum |
|||
const NewLine = chr(10) |
|||
const DoubleQuote = chr(34) |
|||
' where we store keywords and variables |
|||
type Symbol |
|||
s_name as string |
|||
tok as Token_type |
|||
end type |
|||
dim shared symtab() as Symbol |
|||
dim shared cur_line as string |
|||
dim shared cur_ch as string |
|||
dim shared line_num as integer |
|||
dim shared col_num as integer |
|||
function is_digit(byval ch as string) as long |
|||
is_digit = (ch <> "") and ch >= "0" and ch <= "9" |
|||
end function |
|||
function is_alnum(byval ch as string) as long |
|||
is_alnum = (ch <> "") and ((UCase(ch) >= "A" and UCase(ch) <= "Z") or (is_digit(ch))) |
|||
end function |
|||
sub error_msg(byval eline as integer, byval ecol as integer, byval msg as string) |
|||
print "("; eline; ":"; ecol; ")"; " "; msg |
|||
system |
|||
end sub |
|||
' add an identifier to the symbol table |
|||
function install(byval s_name as string, byval tok as Token_type) as integer |
|||
dim n as integer |
|||
n = ubound(symtab) |
|||
redim preserve symtab(n + 1) |
|||
n = ubound(symtab) |
|||
symtab(n).s_name = s_name |
|||
symtab(n).tok = tok |
|||
return n |
|||
end function |
|||
' search for an identifier in the symbol table |
|||
function lookup(byval s_name as string) as integer |
|||
dim i as integer |
|||
for i = lbound(symtab) to ubound(symtab) |
|||
if symtab(i).s_name = s_name then return i |
|||
next |
|||
return -1 |
|||
end function |
|||
sub next_line() ' read the next line of input from the source file |
|||
cur_line = "" |
|||
cur_ch = "" ' empty cur_ch means end-of-file |
|||
if eof(1) then exit sub |
|||
line input #1, cur_line |
|||
cur_line = cur_line + NewLine |
|||
line_num += + 1 |
|||
col_num = 1 |
|||
end sub |
|||
sub next_char() ' get the next char |
|||
cur_ch = "" |
|||
col_num += 1 |
|||
if col_num > len(cur_line) then next_line() |
|||
if col_num <= len(cur_line) then cur_ch = mid(cur_line, col_num, 1) |
|||
end sub |
|||
function follow(byval err_line as integer, byval err_col as integer, byval expect as string, byval ifyes as Token_type, byval ifno as Token_type) as Token_type |
|||
if cur_ch = expect then |
|||
next_char() |
|||
return ifyes |
|||
end if |
|||
if ifno = tk_eoi then error_msg(err_line, err_col, "follow unrecognized character: " + cur_ch) |
|||
return ifno |
|||
end function |
|||
sub gettok(byref err_line as integer, byref err_col as integer, byref tok as Token_type, byref v as string) |
|||
' skip whitespace |
|||
do while (cur_ch = " " or cur_ch = chr(9) or cur_ch = NewLine) and (cur_ch <> "") |
|||
next_char() |
|||
loop |
|||
err_line = line_num |
|||
err_col = col_num |
|||
select case cur_ch |
|||
case "": tok = tk_eoi: exit sub |
|||
case "{": tok = tk_lbrace: next_char(): exit sub |
|||
case "}": tok = tk_rbrace: next_char(): exit sub |
|||
case "(": tok = tk_lparen: next_char(): exit sub |
|||
case ")": tok = tk_rparen: next_char(): exit sub |
|||
case "+": tok = tk_add: next_char(): exit sub |
|||
case "-": tok = tk_sub: next_char(): exit sub |
|||
case "*": tok = tk_mul: next_char(): exit sub |
|||
case ";": tok = tk_semi: next_char(): exit sub |
|||
case ",": tok = tk_comma: next_char(): exit sub |
|||
case ">": tok = tk_gtr: next_char(): exit sub |
|||
case "=": tok = tk_assign: next_char(): exit sub |
|||
case "/": ' div or comment |
|||
next_char() |
|||
if cur_ch <> "*" then |
|||
tok = tk_div |
|||
exit sub |
|||
end if |
|||
' skip comments |
|||
do |
|||
next_char() |
|||
if cur_ch = "*" or cur_ch = "" then |
|||
next_char() |
|||
if cur_ch = "/" or cur_ch = "" then |
|||
next_char() |
|||
gettok(err_line, err_col, tok, v) |
|||
exit sub |
|||
end if |
|||
end if |
|||
loop |
|||
case "'": ' single char literals |
|||
next_char() |
|||
v = str(Asc(cur_ch)) |
|||
if cur_ch = "'" then error_msg(err_line, err_col, "empty character constant") |
|||
if cur_ch = "\" then |
|||
next_char() |
|||
if cur_ch = "n" then |
|||
v = "10" |
|||
elseif cur_ch = "\" then |
|||
v = Str(Asc("\")) |
|||
else error_msg(err_line, err_col, "unknown escape sequence: " + cur_ch) |
|||
end if |
|||
end if |
|||
next_char() |
|||
if cur_ch <> "'" then error_msg(err_line, err_col, "multi-character constant") |
|||
next_char() |
|||
tok = tk_integer |
|||
exit sub |
|||
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub |
|||
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_EOI): exit sub |
|||
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub |
|||
case DoubleQuote: ' string |
|||
v = cur_ch |
|||
next_char() |
|||
do while cur_ch <> DoubleQuote |
|||
if cur_ch = NewLine then error_msg(err_line, err_col, "EOL in string") |
|||
if cur_ch = "" then error_msg(err_line, err_col, "EOF in string") |
|||
v += cur_ch |
|||
next_char() |
|||
loop |
|||
v += cur_ch |
|||
next_char() |
|||
tok = tk_string |
|||
exit sub |
|||
case else ' integers or identifiers |
|||
dim is_number as boolean = is_digit(cur_ch) |
|||
v = "" |
|||
do while is_alnum(cur_ch) orelse cur_ch = "_" |
|||
if not is_digit(cur_ch) then is_number = false |
|||
v += cur_ch |
|||
next_char() |
|||
loop |
|||
if len(v) = 0 then error_msg(err_line, err_col, "unknown character: " + cur_ch) |
|||
if is_digit(mid(v, 1, 1)) then |
|||
if not is_number then error_msg(err_line, err_col, "invalid number: " + v) |
|||
tok = tk_integer |
|||
exit sub |
|||
end if |
|||
dim as integer index = lookup(v) |
|||
if index = -1 then |
|||
tok = tk_ident |
|||
else |
|||
tok = symtab(index).tok |
|||
end if |
|||
exit sub |
|||
end select |
|||
end sub |
|||
sub init_lex(byval filein as string) |
|||
install("if", tk_if) |
|||
install("print", tk_print) |
|||
install("putc", tk_putc) |
|||
install("while", tk_while) |
|||
open filein for input as #1 |
|||
cur_line = "" |
|||
line_num = 0 |
|||
col_num = 0 |
|||
next_char() |
|||
end sub |
|||
sub scanner() |
|||
dim err_line as integer |
|||
dim err_col as integer |
|||
dim tok as Token_type |
|||
dim v as string |
|||
dim tok_list(tk_eoi to tk_ident) as string |
|||
tok_list(tk_eoi ) = "EOI" |
|||
tok_list(tk_print ) = "Print" |
|||
tok_list(tk_putc ) = "Putc" |
|||
tok_list(tk_if ) = "If" |
|||
tok_list(tk_while ) = "While" |
|||
tok_list(tk_lbrace ) = "Lbrace" |
|||
tok_list(tk_rbrace ) = "Rbrace" |
|||
tok_list(tk_lparen ) = "Lparen" |
|||
tok_list(tk_rparen ) = "Rparen" |
|||
tok_list(tk_uminus ) = "Uminus" |
|||
tok_list(tk_mul ) = "Mul" |
|||
tok_list(tk_div ) = "Div" |
|||
tok_list(tk_add ) = "Add" |
|||
tok_list(tk_sub ) = "Sub" |
|||
tok_list(tk_lss ) = "Lss" |
|||
tok_list(tk_gtr ) = "Gtr" |
|||
tok_list(tk_leq ) = "Leq" |
|||
tok_list(tk_neq ) = "Neq" |
|||
tok_list(tk_and ) = "And" |
|||
tok_list(tk_semi ) = "Semi" |
|||
tok_list(tk_comma ) = "Comma" |
|||
tok_list(tk_assign ) = "Assign" |
|||
tok_list(tk_integer ) = "Integer" |
|||
tok_list(tk_string ) = "String" |
|||
tok_list(tk_ident ) = "Ident" |
|||
do |
|||
gettok(err_line, err_col, tok, v) |
|||
print using "line ##### col ##### \ \"; err_line; err_col; tok_list(tok); |
|||
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v; |
|||
print |
|||
loop until tok = tk_eoi |
|||
end sub |
|||
sub main() |
|||
if command(1) = "" then print "filename required" : system |
|||
init_lex(command(1)) |
|||
scanner() |
|||
end sub |
|||
main() |
|||
system |
|||
</lang> |
</lang> |
||