Compiler/lexical analyzer: Difference between revisions

Line 9,632:
if token.kind == tokEnd:
break</lang>
 
=={{header|ObjectIcon}}==
{{trans|Icon}}
{{trans|ATS}}
 
<lang ObjectIcon># -*- ObjectIcon -*-
#
# The Rosetta Code lexical analyzer in Object Icon. Based upon the ATS
# implementation.
#
# Usage: lex [INPUTFILE [OUTPUTFILE]]
# If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input
# or standard output is used, respectively. *)
#
 
import io
 
$define EOF -1
 
$define TOKEN_ELSE 0
$define TOKEN_IF 1
$define TOKEN_PRINT 2
$define TOKEN_PUTC 3
$define TOKEN_WHILE 4
$define TOKEN_MULTIPLY 5
$define TOKEN_DIVIDE 6
$define TOKEN_MOD 7
$define TOKEN_ADD 8
$define TOKEN_SUBTRACT 9
$define TOKEN_NEGATE 10
$define TOKEN_LESS 11
$define TOKEN_LESSEQUAL 12
$define TOKEN_GREATER 13
$define TOKEN_GREATEREQUAL 14
$define TOKEN_EQUAL 15
$define TOKEN_NOTEQUAL 16
$define TOKEN_NOT 17
$define TOKEN_ASSIGN 18
$define TOKEN_AND 19
$define TOKEN_OR 20
$define TOKEN_LEFTPAREN 21
$define TOKEN_RIGHTPAREN 22
$define TOKEN_LEFTBRACE 23
$define TOKEN_RIGHTBRACE 24
$define TOKEN_SEMICOLON 25
$define TOKEN_COMMA 26
$define TOKEN_IDENTIFIER 27
$define TOKEN_INTEGER 28
$define TOKEN_STRING 29
$define TOKEN_END_OF_INPUT 30
 
global whitespace
global ident_start
global ident_continuation
 
procedure main(args)
local inpf, outf
local pushback_buffer, inp, pushback
 
initial {
whitespace := ' \t\v\f\r\n'
ident_start := '_' ++ &letters
ident_continuation := ident_start ++ &digits
}
 
inpf := FileStream.stdin
outf := FileStream.stdout
if 1 <= *args & args[1] ~== "-" then {
inpf := open(args[1], "r") | stop(&why)
}
if 2 <= *args & args[2] ~== "-" then {
outf := open(args[2], "w") | stop(&why)
}
 
pushback_buffer := []
inp := create inputter(inpf, pushback_buffer)
pushback := create repeat push(pushback_buffer, \@&source)
@pushback # The first invocation does nothing.
 
scan_text(outf, inp, pushback)
end
 
procedure scan_text(outf, inp, pushback)
local ch
 
while /ch | ch[1] ~=== EOF do {
skip_spaces_and_comments(inp, pushback)
ch := @inp
if ch[1] === EOF then {
print_token(outf, [TOKEN_END_OF_INPUT, "", ch[2], ch[3]])
} else {
ch @pushback
print_token(outf, get_next_token(inp, pushback))
}
}
end
 
procedure get_next_token(inp, pushback)
local ch, ch1
local ln, cn
 
skip_spaces_and_comments(inp, pushback)
ch := @inp
ln := ch[2] # line number
cn := ch[3] # column number
case ch[1] of {
"," : return [TOKEN_COMMA, ",", ln, cn]
";" : return [TOKEN_SEMICOLON, ";", ln, cn]
"(" : return [TOKEN_LEFTPAREN, "(", ln, cn]
")" : return [TOKEN_RIGHTPAREN, ")", ln, cn]
"{" : return [TOKEN_LEFTBRACE, "{", ln, cn]
"}" : return [TOKEN_RIGHTBRACE, "}", ln, cn]
"*" : return [TOKEN_MULTIPLY, "*", ln, cn]
"/" : return [TOKEN_DIVIDE, "/", ln, cn]
"%" : return [TOKEN_MOD, "%", ln, cn]
"+" : return [TOKEN_ADD, "+", ln, cn]
"-" : return [TOKEN_SUBTRACT, "-", ln, cn]
"<" : {
ch1 := @inp
if ch1[1] === "=" then {
return [TOKEN_LESSEQUAL, "<=", ln, cn]
} else {
ch1 @pushback
return [TOKEN_LESS, "<", ln, cn]
}
}
">" : {
ch1 := @inp
if ch1[1] === "=" then {
return [TOKEN_GREATEREQUAL, ">=", ln, cn]
} else {
ch1 @pushback
return [TOKEN_GREATER, ">", ln, cn]
}
}
"=" : {
ch1 := @inp
if ch1[1] === "=" then {
return [TOKEN_EQUAL, "==", ln, cn]
} else {
ch1 @pushback
return [TOKEN_ASSIGN, "=", ln, cn]
}
}
"!" : {
ch1 := @inp
if ch1[1] === "=" then {
return [TOKEN_NOTEQUAL, "!=", ln, cn]
} else {
ch1 @pushback
return [TOKEN_NOT, "!", ln, cn]
}
}
"&" : {
ch1 := @inp
if ch1[1] === "&" then {
return [TOKEN_AND, "&&", ln, cn]
} else {
unexpected_character(ln, cn, ch)
}
}
"|" : {
ch1 := @inp
if ch1[1] === "|" then {
return [TOKEN_OR, "||", ln, cn]
} else {
unexpected_character(ln, cn, ch)
}
}
"\"" : {
ch @pushback
return scan_string_literal(inp)
}
"'" : {
ch @pushback
return scan_character_literal(inp, pushback)
}
default : {
if any(&digits, ch[1]) then {
ch @pushback
return scan_integer_literal(inp, pushback)
} else if any(ident_start, ch[1]) then {
ch @pushback
return scan_identifier_or_reserved_word (inp, pushback)
} else {
unexpected_character(ln, cn, ch)
}
}
}
end
 
procedure scan_identifier_or_reserved_word(inp, pushback)
local ch
local s
local line_no, column_no
 
s := ""
ch := @inp
line_no := ch[2]
column_no := ch[3]
while EOF ~=== ch[1] & any(ident_continuation, ch[1]) do {
s ||:= ch[1]
ch := @inp
}
ch @pushback
return reserved_word_lookup (s, line_no, column_no)
end
 
procedure scan_integer_literal(inp, pushback)
local ch
local s
local line_no, column_no
 
s := ""
ch := @inp
line_no := ch[2]
column_no := ch[3]
while EOF ~=== ch[1] & any(ident_continuation, ch[1]) do {
s ||:= ch[1]
ch := @inp
}
ch @pushback
not upto(~&digits, s) | invalid_integer_literal(line_no, column_no, s)
return [TOKEN_INTEGER, s, line_no, column_no]
end
 
procedure scan_character_literal(inp, pushback)
local ch, ch1
local close_quote
local toktup
local line_no, column_no
 
ch := @inp # The opening quote.
close_quote := ch[1] # Same as the opening quote.
ch @pushback
 
line_no := ch[2]
column_no := ch[3]
 
toktup := scan_character_literal_without_checking_end(inp)
ch1 := @inp
while EOF ~=== ch1[1] & ch1[1] ~== close_quote do {
case ch1[1] of {
EOF : unterminated_character_literal(line_no, column_no)
close_quote : multicharacter_literal(line_no, column_no)
default : ch1 := @inp
}
}
return toktup
end
 
procedure scan_character_literal_without_checking_end(inp)
local ch, ch1, ch2
 
ch := @inp # The opening quote.
ch1 := @inp
EOF ~=== ch1[1] | unterminated_character_literal(ch[2], ch[3])
if ch1[1] == "\\" then {
ch2 := @inp
EOF ~=== ch2[1] | unterminated_character_literal(ch[2], ch[3])
case ch2[1] of {
"n" : return [TOKEN_INTEGER, string(ord("\n")), ch[2], ch[3]]
"\\" : return [TOKEN_INTEGER, string(ord("\\")), ch[2], ch[3]]
default : unsupported_escape(ch1[2], ch1[3], ch2)
}
} else {
return [TOKEN_INTEGER, string(ord(ch1[1])), ch[2], ch[3]]
}
end
 
procedure scan_string_literal(inp)
local ch, ch1, ch2
local line_no, column_no
local close_quote
local s
local retval
 
ch := @inp # The opening quote
close_quote := ch[1] # Same as the opening quote.
line_no := ch[2]
column_no := ch[3]
 
s := ch[1]
until \retval do {
ch1 := @inp
ch1[1] ~=== EOF |
unterminated_string_literal (line_no, column_no,
"end of input")
ch1[1] ~== "\n" |
unterminated_string_literal (line_no, column_no,
"end of line")
if ch1[1] == close_quote then {
retval := [TOKEN_STRING, s || close_quote, line_no, column_no]
} else if ch1[1] ~== "\\" then {
s ||:= ch1[1]
} else {
ch2 := @inp
EOF ~=== ch2[1] | unsupported_escape(line_no, column_no, ch2)
case ch2[1] of {
"n" : s ||:= "\\n"
"\\" : s ||:= "\\\\"
default : unsupported_escape(line_no, column_no, ch2)
}
}
}
return retval
end
 
procedure skip_spaces_and_comments(inp, pushback)
local ch, ch1
 
repeat {
ch := @inp
(EOF === ch[1]) & { ch @pushback; return }
if not any(whitespace, ch[1]) then {
(ch[1] == "/") | { ch @pushback; return }
(ch1 := @inp) | { ch @pushback; return }
(ch1[1] == "*") | { ch1 @pushback; ch @pushback; return }
scan_comment(inp, ch[2], ch[3])
}
}
end
 
procedure scan_comment(inp, line_no, column_no)
local ch, ch1
 
until (\ch)[1] == "*" & (\ch1)[1] == "/" do {
ch := @inp
(EOF === ch[1]) & unterminated_comment(line_no, column_no)
if ch[1] == "*" then {
ch1 := @inp
(EOF === ch[1]) & unterminated_comment(line_no, column_no)
}
}
return
end
 
procedure reserved_word_lookup(s, line_no, column_no)
 
# Lookup is by an extremely simple perfect hash.
 
static reserved_words
static reserved_word_tokens
local hashval, token, toktup
 
initial {
reserved_words := ["if", "print", "else",
"", "putc", "",
"", "while", ""]
reserved_word_tokens :=
[TOKEN_IF, TOKEN_PRINT, TOKEN_ELSE,
TOKEN_IDENTIFIER, TOKEN_PUTC, TOKEN_IDENTIFIER,
TOKEN_IDENTIFIER, TOKEN_WHILE, TOKEN_IDENTIFIER]
}
 
if *s < 2 then {
toktup := [TOKEN_IDENTIFIER, s, line_no, column_no]
} else {
hashval := ((ord(s[1]) + ord(s[2])) % (*reserved_words)) + 1
token := reserved_word_tokens[hashval]
if token = TOKEN_IDENTIFIER | s ~== reserved_words[hashval] then {
toktup := [TOKEN_IDENTIFIER, s, line_no, column_no]
} else {
toktup := [token, s, line_no, column_no]
}
}
return toktup
end
 
procedure print_token(outf, toktup)
static token_names
local s_line, s_column
 
initial {
token_names := ["Keyword_else",
"Keyword_if",
"Keyword_print",
"Keyword_putc",
"Keyword_while",
"Op_multiply",
"Op_divide",
"Op_mod",
"Op_add",
"Op_subtract",
"Op_negate",
"Op_less",
"Op_lessequal",
"Op_greater",
"Op_greaterequal",
"Op_equal",
"Op_notequal",
"Op_not",
"Op_assign",
"Op_and",
"Op_or",
"LeftParen",
"RightParen",
"LeftBrace",
"RightBrace",
"Semicolon",
"Comma",
"Identifier",
"Integer",
"String",
"End_of_input"]
}
 
/outf := FileStream.stdout
s_line := string(toktup[3])
s_column := string(toktup[4])
writes(outf, right (s_line, max(5, *s_line)))
writes(outf, " ")
writes(outf, right (s_column, max(5, *s_column)))
writes(outf, " ")
writes(outf, token_names[toktup[1] + 1])
case toktup[1] of {
TOKEN_IDENTIFIER : writes(outf, " ", toktup[2])
TOKEN_INTEGER : writes(outf, " ", toktup[2])
TOKEN_STRING : writes(outf, " ", toktup[2])
}
write(outf)
return
end
 
procedure inputter(inpf, pushback_buffer)
local buffer
local line_no, column_no
local c
 
buffer := ""
line_no := 1
column_no := 1
 
repeat {
buffer? {
until *pushback_buffer = 0 & pos(0) do {
if *pushback_buffer ~= 0 then {
suspend pop(pushback_buffer)
} else {
c := move(1)
suspend [c, line_no, column_no]
if c == "\n" then {
line_no +:= 1
column_no := 1
} else {
column_no +:= 1
}
}
}
}
(buffer := reads(inpf, 2048)) |
suspend [EOF, line_no, column_no]
}
end
 
procedure unterminated_comment(line_no, column_no)
error("unterminated comment starting at ",
line_no, ":", column_no)
end
 
procedure unexpected_character(line_no, column_no, ch)
error("unexpected character '", ch[1], "' starting at ",
line_no, ":", column_no)
end
 
procedure unterminated_string_literal (line_no, column_no, cause)
error("unterminated string literal (", cause, ") starting at ",
line_no, ":", column_no)
end
 
procedure unsupported_escape (line_no, column_no, ch)
if ch[1] === EOF then {
error("unexpected \\ at end of input",
" starting at ", line_no, ":", column_no)
} else {
error("unsupported escape \\", ch[1],
" starting at ", line_no, ":", column_no)
}
end
 
procedure invalid_integer_literal(line_no, column_no, s)
error("invalid integer literal ", s,
" starting at ", line_no, ":", column_no)
end
 
procedure unterminated_character_literal(line_no, column_no)
error("unterminated character literal starting at ",
line_no, ":", column_no)
end
 
procedure multicharacter_literal(line_no, column_no)
error("unsupported multicharacter literal starting at ",
line_no, ":", column_no)
end
 
procedure error(args[])
write!([FileStream.stderr] ||| args)
exit(1)
end</lang>
 
 
=={{header|Ol}}==
1,448

edits