User:Ed Davis: Difference between revisions

Content added Content deleted
No edit summary
No edit summary
Line 163: Line 163:
<lang c>
<lang c>
/*
/*
All lexical tokens - not syntatically correct, but that will
All lexical tokens - not syntactically correct, but that will
have to wait until syntax analysis
have to wait until syntax analysis
*/
*/
Line 232: Line 232:
;Implementations
;Implementations


__TOC__


=={{header|C}}==
=={{header|C}}==
Line 288: Line 287:
}
}


static void read_ch() { /* get next char from input */
static int next_ch() { /* get next char from input */
the_ch = getc(source_fp);
the_ch = getc(source_fp);
++col;
++col;
Line 295: Line 294:
col = 0;
col = 0;
}
}
return the_ch;
}
}


Line 301: Line 301:
error(err_line, err_col, "gettok: empty character constant");
error(err_line, err_col, "gettok: empty character constant");
if (the_ch == '\\') {
if (the_ch == '\\') {
read_ch();
next_ch();
if (the_ch == 'n')
if (the_ch == 'n')
n = 10;
n = 10;
Line 308: Line 308:
else error(err_line, err_col, "gettok: unknown escape sequence \\%c", the_ch);
else error(err_line, err_col, "gettok: unknown escape sequence \\%c", the_ch);
}
}
if (next_ch() != '\'')
read_ch();
if (the_ch != '\'') error(err_line, err_col, "multi-character constant");
error(err_line, err_col, "multi-character constant");
read_ch();
next_ch();
return (tok_s){Integerk, err_line, err_col, {n}};
return (tok_s){Integerk, err_line, err_col, {n}};
}
}
Line 320: Line 320:
/* comment found */
/* comment found */
for (;;) {
for (;;) {
if (next_ch() == '*' && next_ch() == '/') {
read_ch();
if (the_ch == '*' || the_ch == EOF) {
next_ch();
read_ch();
return gettok();
if (the_ch == '/' || the_ch == EOF) {
} else if (the_ch == EOF)
read_ch();
error(err_line, err_col, "EOF in comment");
return gettok();
}
}
}
}
}
}
Line 334: Line 331:
da_rewind(text);
da_rewind(text);


for (read_ch(); the_ch != start; read_ch()) {
while (next_ch() != start) {
if (the_ch == '\n')
if (the_ch == '\n') error(err_line, err_col, "EOL in string");
error(err_line, err_col, "EOL in string");
if (the_ch == EOF) error(err_line, err_col, "EOF in string");
if (the_ch == EOF)
error(err_line, err_col, "EOF in string");
da_append(text, (char)the_ch);
da_append(text, (char)the_ch);
}
}
da_append(text, '\0');
da_append(text, '\0');


read_ch();
next_ch();
return (tok_s){Stringk, err_line, err_col, {.text=text}};
return (tok_s){Stringk, err_line, err_col, {.text=text}};
}
}
Line 373: Line 368:
if (!isdigit(the_ch))
if (!isdigit(the_ch))
is_number = false;
is_number = false;
read_ch();
next_ch();
}
}
if (da_len(text) == 0)
if (da_len(text) == 0)
Line 391: Line 386:
static tok_s follow(int expect, TokenType ifyes, TokenType ifno, int err_line, int err_col) { /* look ahead for '>=', etc. */
static tok_s follow(int expect, TokenType ifyes, TokenType ifno, int err_line, int err_col) { /* look ahead for '>=', etc. */
if (the_ch == expect) {
if (the_ch == expect) {
read_ch();
next_ch();
return (tok_s){ifyes, err_line, err_col, {0}};
return (tok_s){ifyes, err_line, err_col, {0}};
}
}
if (ifno == EOI)
if (ifno == EOI) error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch);
error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch);
return (tok_s){ifno, err_line, err_col, {0}};
return (tok_s){ifno, err_line, err_col, {0}};
}
}
Line 401: Line 397:
/* skip white space */
/* skip white space */
while (isspace(the_ch))
while (isspace(the_ch))
read_ch();
next_ch();
int err_line = line;
int err_line = line;
int err_col = col;
int err_col = col;
switch (the_ch) {
switch (the_ch) {
case '{': read_ch(); return (tok_s){Lbrace, err_line, err_col, {0}};
case '{': next_ch(); return (tok_s){Lbrace, err_line, err_col, {0}};
case '}': read_ch(); return (tok_s){Rbrace, err_line, err_col, {0}};
case '}': next_ch(); return (tok_s){Rbrace, err_line, err_col, {0}};
case '(': read_ch(); return (tok_s){Lparen, err_line, err_col, {0}};
case '(': next_ch(); return (tok_s){Lparen, err_line, err_col, {0}};
case ')': read_ch(); return (tok_s){Rparen, err_line, err_col, {0}};
case ')': next_ch(); return (tok_s){Rparen, err_line, err_col, {0}};
case '+': read_ch(); return (tok_s){Add, err_line, err_col, {0}};
case '+': next_ch(); return (tok_s){Add, err_line, err_col, {0}};
case '-': read_ch(); return (tok_s){Sub, err_line, err_col, {0}};
case '-': next_ch(); return (tok_s){Sub, err_line, err_col, {0}};
case '*': read_ch(); return (tok_s){Mul, err_line, err_col, {0}};
case '*': next_ch(); return (tok_s){Mul, err_line, err_col, {0}};
case ';': read_ch(); return (tok_s){Semi, err_line, err_col, {0}};
case ';': next_ch(); return (tok_s){Semi, err_line, err_col, {0}};
case ',': read_ch(); return (tok_s){Comma, err_line, err_col, {0}};
case ',': next_ch(); return (tok_s){Comma, err_line, err_col, {0}};
case '>': read_ch(); return (tok_s){Gtr, err_line, err_col, {0}};
case '>': next_ch(); return (tok_s){Gtr, err_line, err_col, {0}};
case '=': read_ch(); return (tok_s){Assign, err_line, err_col, {0}};
case '=': next_ch(); return (tok_s){Assign, err_line, err_col, {0}};
case '/': read_ch(); return div_or_cmt(err_line, err_col);
case '/': next_ch(); return div_or_cmt(err_line, err_col);
case '\'': read_ch(); return char_lit(the_ch, err_line, err_col);
case '\'': next_ch(); return char_lit(the_ch, err_line, err_col);
case '<': read_ch(); return follow('=', Leq, Lss, err_line, err_col);
case '<': next_ch(); return follow('=', Leq, Lss, err_line, err_col);
case '!': read_ch(); return follow('=', Neq, EOI, err_line, err_col);
case '!': next_ch(); return follow('=', Neq, EOI, err_line, err_col);
case '&': read_ch(); return follow('&', And, EOI, err_line, err_col);
case '&': next_ch(); return follow('&', And, EOI, err_line, err_col);
case '"' : return string_lit(the_ch, err_line, err_col);
case '"' : return string_lit(the_ch, err_line, err_col);
default: return ident_or_int(err_line, err_col);
default: return ident_or_int(err_line, err_col);
Line 436: Line 432:
"Uminus Mul Div Add Sub Lss Gtr Leq Neq "
"Uminus Mul Div Add Sub Lss Gtr Leq Neq "
"And Semi Comma Assign Integer String Ident "[tok.tok * 9]);
"And Semi Comma Assign Integer String Ident "[tok.tok * 9]);

if (tok.tok == Integerk)
fprintf(dest_fp, " %8d", tok.n);
if (tok.tok == Integerk) fprintf(dest_fp, " %4d", tok.n);
else if (tok.tok == Ident)
else if (tok.tok == Ident) fprintf(dest_fp, " %s", tok.text);
fprintf(dest_fp, " %s", tok.text);
else if (tok.tok == Stringk) fprintf(dest_fp, " \"%s\"", tok.text);
else if (tok.tok == Stringk)
fprintf(dest_fp, " \"%s\"", tok.text);
fprintf(dest_fp, "\n");
fprintf(dest_fp, "\n");
} while (tok.tok != EOI);
} while (tok.tok != EOI);
Line 460: Line 454:
run();
run();
}
}
</lang>

=={{header|Euphoria}}==
<lang euphoria>
include std/io.e
include std/map.e
include std/types.e
include std/convert.e

constant true = 1, false = 0, EOF = -1

enum EOI, Printk, Putc, Ifk, Whilek, Lbrace, Rbrace, Lparen, Rparen, Uminus, Mul, Div,
Add, Sub, Lss, Gtr, Leq, Neq, Andk, Semi, Comma, Assign, Integerk, Stringk, Ident

constant all_syms = { "EOI", "Print", "Putc", "If", "While", "Lbrace", "Rbrace", "Lparen",
"Rparen", "Uminus", "Mul", "Div", "Add", "Sub", "Lss", "Gtr", "Leq", "Neq", "And",
"Semi", "Comma", "Assign", "Integer", "String", "Ident"}

integer input_file, the_ch = ' ', the_col = 0, the_line = 1
sequence symbols
map key_words = new()

procedure error(sequence format, sequence data)
printf(STDOUT, format, data)
abort(1)
end procedure

-- get the next character from the input
function next_ch()
the_ch = getc(input_file)
the_col += 1
if the_ch = '\n' then
the_line += 1
the_col = 0
end if
return the_ch
end function

-- 'x' - character constants
function char_lit(integer err_line, integer err_col)
integer n = next_ch() -- skip opening quote
if the_ch = '\'' then
error("%d %d empty character constant", {err_line, err_col})
elsif the_ch = '\\' then
next_ch()
if the_ch = 'n' then
n = 10
elsif the_ch = '\\' then
n = '\\'
else
error("%d %d unknown escape sequence \\%c", {err_line, err_col, the_ch})
end if
end if
if next_ch() != '\'' then
error("%d %d multi-character constant", {err_line, err_col})
end if
next_ch()
return {Integerk, err_line, err_col, n}
end function

-- process divide or comments
function div_or_cmt(integer err_line, integer err_col)
if next_ch() != '*' then
return {Div, err_line, err_col}
end if

-- comment found
while true do
if next_ch() = '*' and next_ch() = '/' then
next_ch()
return get_tok()
elsif the_ch = EOF then
error("%d %d EOF in comment", {err_line, err_col})
end if
end while
end function

-- "string"
function string_lit(integer start, integer err_line, integer err_col)
string text = ""

while next_ch() != start do
if the_ch = EOF then
error("%d %d EOF while scanning string literal", {err_line, err_col})
end if
if the_ch = '\n' then
error("%d %d EOL while scanning string literal", {err_line, err_col})
end if
text &= the_ch
end while

next_ch()
return {Stringk, err_line, err_col, text}
end function

-- handle identifiers and integers
function ident_or_int(integer err_line, integer err_col)
integer n, is_number = true
string text = ""

while t_alnum(the_ch) or the_ch = '_' do
text &= the_ch
if not t_digit(the_ch) then
is_number = false
end if
next_ch()
end while

if length(text) = 0 then
error("%d %d ident_or_int: unrecognized character: (%d) '%s'", {err_line, err_col, the_ch, the_ch})
end if

if t_digit(text[1]) then
if not is_number then
error("%d %d invalid number: %s", {err_line, err_col, text})
end if
n = to_integer(text)
return {Integerk, err_line, err_col, n}
end if

if has(key_words, text) then
return {get(key_words, text), err_line, err_col}
end if

return {Ident, err_line, err_col, text}
end function

-- look ahead for '>=', etc.
function follow(integer expect, integer ifyes, integer ifno, integer err_line, integer err_col)
if next_ch() = expect then
next_ch()
return {ifyes, err_line, err_col}
end if

if ifno = EOI then
error("%d %d follow: unrecognized character: (%d)", {err_line, err_col, the_ch})
end if

return {ifno, err_line, err_col}
end function

-- return the next token type
function get_tok()
while t_space(the_ch) do
next_ch()
end while

integer err_line = the_line
integer err_col = the_col

switch the_ch do
case EOF then return {EOI, err_line, err_col}
case '/' then return div_or_cmt(err_line, err_col)
case '\'' then return char_lit(err_line, err_col)
case '<' then return follow('=', Leq, Lss, err_line, err_col)
case '!' then return follow('=', Neq, EOI, err_line, err_col)
case '&' then return follow('&', Andk, EOI, err_line, err_col)
case '"' then return string_lit(the_ch, err_line, err_col)
case else
integer sym = symbols[the_ch]
if sym != EOI then
next_ch()
return {sym, err_line, err_col}
end if
return ident_or_int(err_line, err_col)
end switch
end function

procedure init()
put(key_words, "if", Ifk)
put(key_words, "print", Printk)
put(key_words, "putc", Putc)
put(key_words, "while", Whilek)

symbols = repeat(EOI, 256)
symbols['{'] = Lbrace
symbols['}'] = Rbrace
symbols['('] = Lparen
symbols[')'] = Rparen
symbols['+'] = Add
symbols['-'] = Sub
symbols['*'] = Mul
symbols[';'] = Semi
symbols[','] = Comma
symbols['>'] = Gtr
symbols['='] = Assign
end procedure

procedure main(sequence cl)
sequence file_name

input_file = STDIN
if length(cl) > 2 then
file_name = cl[3]
input_file = open(file_name, "r")
if input_file = -1 then
error("Could not open %s", {file_name})
end if
end if
init()
sequence t
loop do
t = get_tok()
printf(STDOUT, "line %5d col %5d %-8s", {t[2], t[3], all_syms[t[1]]})
switch t[1] do
case Integerk then printf(STDOUT, " %5d\n", {t[4]})
case Ident then printf(STDOUT, " %s\n", {t[4]})
case Stringk then printf(STDOUT, " \"%s\"\n", {t[4]})
case else printf(STDOUT, "\n")
end switch
until t[1] = EOI
end loop
end procedure

main(command_line())
</lang>
</lang>


Line 689: Line 898:
dim tok_list(tk_eoi to tk_ident) as string
dim tok_list(tk_eoi to tk_ident) as string


tok_list(tk_eoi ) = "EOI"
tok_list(tk_eoi ) = "EOI"
tok_list(tk_print ) = "Print"
tok_list(tk_print ) = "Print"
tok_list(tk_putc ) = "Putc"
tok_list(tk_putc ) = "Putc"
tok_list(tk_if ) = "If"
tok_list(tk_if ) = "If"
tok_list(tk_while ) = "While"
tok_list(tk_while ) = "While"
tok_list(tk_lbrace ) = "Lbrace"
tok_list(tk_lbrace ) = "Lbrace"
tok_list(tk_rbrace ) = "Rbrace"
tok_list(tk_rbrace ) = "Rbrace"
tok_list(tk_lparen ) = "Lparen"
tok_list(tk_lparen ) = "Lparen"
tok_list(tk_rparen ) = "Rparen"
tok_list(tk_rparen ) = "Rparen"
tok_list(tk_uminus ) = "Uminus"
tok_list(tk_uminus ) = "Uminus"
tok_list(tk_mul ) = "Mul"
tok_list(tk_mul ) = "Mul"
tok_list(tk_div ) = "Div"
tok_list(tk_div ) = "Div"
tok_list(tk_add ) = "Add"
tok_list(tk_add ) = "Add"
tok_list(tk_sub ) = "Sub"
tok_list(tk_sub ) = "Sub"
tok_list(tk_lss ) = "Lss"
tok_list(tk_lss ) = "Lss"
tok_list(tk_gtr ) = "Gtr"
tok_list(tk_gtr ) = "Gtr"
tok_list(tk_leq ) = "Leq"
tok_list(tk_leq ) = "Leq"
tok_list(tk_neq ) = "Neq"
tok_list(tk_neq ) = "Neq"
tok_list(tk_and ) = "And"
tok_list(tk_and ) = "And"
tok_list(tk_semi ) = "Semi"
tok_list(tk_semi ) = "Semi"
tok_list(tk_comma ) = "Comma"
tok_list(tk_comma ) = "Comma"
tok_list(tk_assign ) = "Assign"
tok_list(tk_assign ) = "Assign"
tok_list(tk_integer ) = "Integer"
tok_list(tk_integer) = "Integer"
tok_list(tk_string ) = "String"
tok_list(tk_string ) = "String"
tok_list(tk_ident ) = "Ident"
tok_list(tk_ident ) = "Ident"


do
do
Line 735: Line 944:
=={{header|Python}}==
=={{header|Python}}==
<lang Python>
<lang Python>
from __future__ import print_function
import sys
import sys


Line 762: Line 972:


#*** get the next character from the input
#*** get the next character from the input
def getc():
def next_ch():
global the_ch, the_col, the_line
global the_ch, the_col, the_line


Line 774: Line 984:
#*** 'x' - character constants
#*** 'x' - character constants
def char_lit(err_line, err_col):
def char_lit(err_line, err_col):
n = ord(getc()) # skip opening quote
n = ord(next_ch()) # skip opening quote
if the_ch == '\'':
if the_ch == '\'':
error(err_line, err_col, "empty character constant")
error(err_line, err_col, "empty character constant")
elif the_ch == '\\':
elif the_ch == '\\':
getc()
next_ch()
if the_ch == 'n':
if the_ch == 'n':
n = 10
n = 10
Line 785: Line 995:
else:
else:
error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch))
error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch))
if getc() != '\'':
if next_ch() != '\'':
error(err_line, err_col, "multi-character constant")
error(err_line, err_col, "multi-character constant")
getc()
next_ch()
return Integerk, err_line, err_col, n
return Integerk, err_line, err_col, n


#*** process divide or comments
#*** process divide or comments
def div_or_cmt(err_line, err_col):
def div_or_cmt(err_line, err_col):
if getc() != '*':
if next_ch() != '*':
return Div, err_line, err_col
return Div, err_line, err_col


# comment found
# comment found
while True:
while True:
if getc() == '*' and getc() == '/':
if next_ch() == '*' and next_ch() == '/':
getc()
next_ch()
return gettok()
return gettok()
elif len(the_ch) == 0:
elif len(the_ch) == 0:
Line 807: Line 1,017:
text = ""
text = ""


while getc() != start:
while next_ch() != start:
if len(the_ch) == 0:
if len(the_ch) == 0:
error(err_line, err_col, "EOF while scanning string literal")
error(err_line, err_col, "EOF while scanning string literal")
Line 814: Line 1,024:
text += the_ch
text += the_ch


getc()
next_ch()
return Stringk, err_line, err_col, text
return Stringk, err_line, err_col, text


Line 826: Line 1,036:
if not the_ch.isdigit():
if not the_ch.isdigit():
is_number = False
is_number = False
getc()
next_ch()


if len(text) == 0:
if len(text) == 0:
Line 844: Line 1,054:
#*** look ahead for '>=', etc.
#*** look ahead for '>=', etc.
def follow(expect, ifyes, ifno, err_line, err_col):
def follow(expect, ifyes, ifno, err_line, err_col):
if getc() == expect:
if next_ch() == expect:
getc()
next_ch()
return ifyes, err_line, err_col
return ifyes, err_line, err_col


Line 856: Line 1,066:
def gettok():
def gettok():
while the_ch.isspace():
while the_ch.isspace():
getc()
next_ch()


err_line = the_line
err_line = the_line
Line 862: Line 1,072:


if len(the_ch) == 0: return EOI, err_line, err_col
if len(the_ch) == 0: return EOI, err_line, err_col
elif the_ch in symbols: sym = symbols[the_ch]; getc(); return sym, err_line, err_col
elif the_ch == '/': return div_or_cmt(err_line, err_col)
elif the_ch == '/': return div_or_cmt(err_line, err_col)
elif the_ch == '\'': return char_lit(err_line, err_col)
elif the_ch == '\'': return char_lit(err_line, err_col)
Line 869: Line 1,078:
elif the_ch == '&': return follow('&', And, EOI, err_line, err_col)
elif the_ch == '&': return follow('&', And, EOI, err_line, err_col)
elif the_ch == '"': return string_lit(the_ch, err_line, err_col)
elif the_ch == '"': return string_lit(the_ch, err_line, err_col)
elif the_ch in symbols:
else: return ident_or_int(err_line, err_col)
sym = symbols[the_ch]
next_ch()
return sym, err_line, err_col
else: return ident_or_int(err_line, err_col)


#*** main driver
#*** main driver
Line 885: Line 1,098:
col = t[2]
col = t[2]


print("line %5d col %5d %-8s" % (line, col, all_syms[tok]), end='')
if tok == Integerk:

print("line %5d col %5d %-8s %8d" % (line, col, all_syms[tok], t[3]))
elif tok == Ident:
if tok == Integerk: print(" %5d" % (t[3]))
print("line %5d col %5d %-8s %s" % (line, col, all_syms[tok], t[3]))
elif tok == Ident: print(" %s" % (t[3]))
elif tok == Stringk:
elif tok == Stringk: print(' "%s"' % (t[3]))
print('line %5d col %5d %-8s "%s"' % (line, col, all_syms[tok], t[3]))
else: print("")
else:
print("line %5d col %5d %-8s" % (line, col, all_syms[tok]))


if tok == EOI:
if tok == EOI: