Compiler/Verifying syntax: Difference between revisions

From Rosetta Code
Content added Content deleted
m (→‎{{header|Wren}}: Minor tidy)
 
(18 intermediate revisions by 7 users not shown)
Line 47: Line 47:
=={{header|ALGOL W}}==
=={{header|ALGOL W}}==
Includes the test cases from the Go sample. Note, strings are limited to 256 characters in Algol W.
Includes the test cases from the Go sample. Note, strings are limited to 256 characters in Algol W.
<lang algolw>begin
<syntaxhighlight lang="algolw">begin
% verify expressions match expected syntax %
% verify expressions match expected syntax %
procedure stmt ( string(256) value text ) ; begin
procedure stmt ( string(256) value text ) ; begin
% the parsing procedures return truee if the expression matches the %
% the parsing procedures return true if the expression matches the %
% the required element; false otherwise - a parse tree is not built %
% required element, false otherwise - a parse tree is not built %
boolean haveInteger, haveIdentifier, haveEof, hadError;
logical haveInteger, haveIdentifier, haveEof, hadError;
integer currPos, maxPos, syPos;
integer currPos, maxPos, syPos;
string(1) currCh;
string(1) currCh;
string(256) srcText;
string(256) srcText;
string(256) sy;
string(256) sy;
boolean procedure expr ; expr_level_2;
logical procedure expr ; expr_level_2;
boolean procedure expr_level_2 ; begin
logical procedure expr_level_2 ; begin
boolean ok;
logical ok;
ok := expr_level_3;
ok := expr_level_3;
while ok and have( "or" ) do ok := next and expr_level_3;
while ok and have( "or" ) do ok := next and expr_level_3;
ok
ok
end expr_level_2 ;
end expr_level_2 ;
boolean procedure expr_level_3 ; begin
logical procedure expr_level_3 ; begin
boolean ok;
logical ok;
ok := expr_level_4;
ok := expr_level_4;
while have( "and" ) and ok do ok := next and expr_level_4;
while have( "and" ) and ok do ok := next and expr_level_4;
ok
ok
end expr_level_3 ;
end expr_level_3 ;
boolean procedure expr_level_4 ; begin
logical procedure expr_level_4 ; begin
boolean ok;
logical ok;
ok := true;
ok := true;
if have( "not" ) then ok := next;
if have( "not" ) then ok := next;
Line 78: Line 78:
ok
ok
end expr_level_4 ;
end expr_level_4 ;
boolean procedure expr_level_5 ; begin
logical procedure expr_level_5 ; begin
boolean ok;
logical ok;
ok := expr_level_6;
ok := expr_level_6;
while ok and ( have( "+" ) or have( "-" ) ) do ok := next and expr_level_6;
while ok and ( have( "+" ) or have( "-" ) ) do ok := next and expr_level_6;
ok
ok
end expr_level_5 ;
end expr_level_5 ;
boolean procedure expr_level_6 ; begin
logical procedure expr_level_6 ; begin
boolean ok;
logical ok;
ok := primary;
ok := primary;
while ok and ( have( "*" ) or have( "/" ) ) do ok := next and primary;
while ok and ( have( "*" ) or have( "/" ) ) do ok := next and primary;
ok
ok
end expr_level_6 ;
end expr_level_6 ;
boolean procedure primary ;
logical procedure primary ;
if haveIdentifier or haveInteger or have( "true" ) or have( "false" ) then begin
if haveIdentifier or haveInteger or have( "true" ) or have( "false" ) then begin
void( next );
void( next );
Line 96: Line 96:
end
end
else if have( "(" ) then next and expr and mustBe( ")" )
else if have( "(" ) then next and expr and mustBe( ")" )
else error( "Expecting identifier, integer or ""(""" ) ;
else error( "Expecting identifier, literal or ""(""" ) ;
logical procedure addAndNextChar ; begin
logical procedure addAndNextChar ; begin
if syPos = 255 then void( error( "Symbol too long" ) )
if syPos = 255 then void( error( "Symbol too long" ) )
Line 105: Line 105:
nextChar
nextChar
end addAndNextChar ;
end addAndNextChar ;
boolean procedure next ; begin
logical procedure next ; begin
logical ok;
logical ok;
haveInteger := haveIdentifier := false;
haveInteger := haveIdentifier := false;
Line 125: Line 125:
ok
ok
end next ;
end next ;
boolean procedure skipSpaces ; begin
logical procedure skipSpaces ; begin
boolean ok;
logical ok;
ok := not haveEof;
ok := not haveEof;
while ok and currCh = " " do ok := nextChar;
while ok and currCh = " " do ok := nextChar;
ok
ok
end skipSpaces ;
end skipSpaces ;
boolean procedure haveLetter ; not haveEof and ( ( currCh >= "a" and currCh <= "z" )
logical procedure haveLetter ; not haveEof and ( ( currCh >= "a" and currCh <= "z" )
or ( currCh >= "A" and currCh <= "Z" ) );
or ( currCh >= "A" and currCh <= "Z" ) );
boolean procedure haveDigit ; not haveEof and ( currCh >= "0" and currCh <= "9" );
logical procedure haveDigit ; not haveEof and ( currCh >= "0" and currCh <= "9" );
boolean procedure have ( string(12) value text ) ; text = sy;
logical procedure have ( string(12) value text ) ; text = sy;
boolean procedure mustBe ( string(12) value text ) ; begin
logical procedure mustBe ( string(12) value text ) ; begin
boolean ok, haveSy;
logical ok;
ok := have( text );
ok := have( text );
if ok then haveSy := next
if ok then void( next )
else begin
else begin
string(256) msg;
string(256) msg;
Line 147: Line 147:
ok
ok
end mustBe ;
end mustBe ;
boolean procedure nextChar ; begin
logical procedure nextChar ; begin
haveEof := currPos > maxPos;
haveEof := currPos > maxPos;
if not haveEof then begin
if not haveEof then begin
Line 161: Line 161:
length
length
end strlen ;
end strlen ;
boolean procedure error ( string(256) value msg ) ; begin
logical procedure error ( string(256) value msg ) ; begin
if not hadError then begin
if not hadError then begin
% have the first error %
% have the first error %
Line 173: Line 173:
end ewrror ;
end ewrror ;
procedure showText ( string(256) value text; integer value length ) ; for c := 0 until length do writeon( text( c // 1 ) );
procedure showText ( string(256) value text; integer value length ) ; for c := 0 until length do writeon( text( c // 1 ) );
procedure void ( boolean value b ) ; begin end void ;
procedure void ( logical value b ) ; begin end void ;
% parse text and output messages indicating whether it is OK or not %
% parse text and output messages indicating whether it is OK or not %
hadError := false;
hadError := false;
Line 192: Line 192:
stmt( "wombat" );
stmt( "wombat" );
stmt( "wombat or monotreme" );
stmt( "wombat or monotreme" );
stmt( "( wombat and not )" );
stmt( "wombat or not" );
stmt( "a + 1" );
stmt( "a + 1" );
stmt( "a + b < c" );
stmt( "a + b < c" );
Line 200: Line 202:
stmt( "$" );
stmt( "$" );
% test cases from Go %
% test cases from Go %
stmt( "true or false = not true" );
stmt( "not true = false" );
stmt( "3 + not 5" );
stmt( "3 + not 5" );
stmt( "3 + (not 5)" );
stmt( "3 + (not 5)" );
Line 220: Line 224:
stmt( "j & k" );
stmt( "j & k" );
stmt( "l or _m" )
stmt( "l or _m" )
end.</lang>
end.</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
wombat: true
wombat: true
wombat or monotreme: true
wombat or monotreme: true
( wombat and not ): error at: 18 ()): Expecting identifier, literal or "(" false
wombat or not: error at: 13 (<eof>): Expression expected false
a + 1: true
a + 1: true
a + b < c: true
a + b < c: true
Line 231: Line 237:
a = b: true
a = b: true
a or b = c: true
a or b = c: true
$: error at: 1 ($): Expecting identifier, integer or "(" false
$: error at: 1 ($): Expecting identifier, literal or "(" false
3 + not 5: error at: 8 (not): Expecting identifier, integer or "(" false
true or false = not true: error at: 20 (not): Expecting identifier, literal or "(" false
not true = false: true
3 + not 5: error at: 8 (not): Expecting identifier, literal or "(" false
3 + (not 5): true
3 + (not 5): true
(42 + 3: error at: 7 (<eof>): Expected: ) false
(42 + 3: error at: 7 (<eof>): Expected: ) false
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true: true
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true: true
and 3 < 2: error at: 5 (and): Expecting identifier, integer or "(" false
and 3 < 2: error at: 5 (and): Expecting identifier, literal or "(" false
not 7 < 2: true
not 7 < 2: true
2 < 3 < 4: error at: 8 (<): Expected EOF after expression false
2 < 3 < 4: error at: 8 (<): Expected EOF after expression false
Line 243: Line 251:
4 * (32 - 16) + 9 = 73: true
4 * (32 - 16) + 9 = 73: true
235 76 + 1: error at: 7 (76): Expected EOF after expression false
235 76 + 1: error at: 7 (76): Expected EOF after expression false
a + b = not c and false: error at: 12 (not): Expecting identifier, integer or "(" false
a + b = not c and false: error at: 12 (not): Expecting identifier, literal or "(" false
a + b = (not c) and false: true
a + b = (not c) and false: true
a + b = (not c and false): true
a + b = (not c and false): true
ab_c / bd2 or < e_f7: error at: 16 (<): Expecting identifier, integer or "(" false
ab_c / bd2 or < e_f7: error at: 16 (<): Expecting identifier, literal or "(" false
g not = h: error at: 6 (not): Expected EOF after expression false
g not = h: error at: 6 (not): Expected EOF after expression false
été = false: error at: 2 (Ã): Expecting identifier, integer or "(" false
été = false: error at: 2 (Ã): Expecting identifier, literal or "(" false
i++: error at: 3 (+): Expecting identifier, integer or "(" false
i++: error at: 3 (+): Expecting identifier, literal or "(" false
j & k: error at: 4 (&): Expected EOF after expression false
j & k: error at: 4 (&): Expected EOF after expression false
l or _m: error at: 7 (_): Expecting identifier, integer or "(" false
l or _m: error at: 7 (_): Expecting identifier, literal or "(" false
</pre>
</pre>


=={{header|C}}==
=={{header|C}}==
<lang perl>// cverifyingsyntaxrosetta.c
<syntaxhighlight lang="c">// cverifyingsyntaxrosetta.c
// http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax
// http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax


Line 359: Line 367:
{
{
for( int i = 0; i < sizeof(tests)/sizeof(*tests); i++ ) parse(tests[i]);
for( int i = 0; i < sizeof(tests)/sizeof(*tests); i++ ) parse(tests[i]);
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 401: Line 409:
^ expected a primary
^ expected a primary
</pre>
</pre>



=={{header|Go}}==
=={{header|Go}}==
Line 411: Line 418:


In particular, after substitutions, "= not", "+ not" etc. would be allowed by the Go parser so we need to exclude them. Curiously, the Go parser allows something like "2 < 3 < 4" even though it doesn't compile. We need therefore to exclude that also (see Talk page).
In particular, after substitutions, "= not", "+ not" etc. would be allowed by the Go parser so we need to exclude them. Curiously, the Go parser allows something like "2 < 3 < 4" even though it doesn't compile. We need therefore to exclude that also (see Talk page).
<lang go>package main
<syntaxhighlight lang="go">package main


import (
import (
Line 424: Line 431:
re2 = regexp.MustCompile(`\b_\w*\b`)
re2 = regexp.MustCompile(`\b_\w*\b`)
re3 = regexp.MustCompile(`[=<+*/-]\s*not`)
re3 = regexp.MustCompile(`[=<+*/-]\s*not`)
re4 = regexp.MustCompile(`(=|<)\s*[^=< ]+\s*([=<+*/-])`)
re4 = regexp.MustCompile(`(=|<)\s*[^(=< ]+\s*([=<+*/-])`)
)
)


Line 459: Line 466:
func main() {
func main() {
exprs := []string{
exprs := []string{
"$",
"one",
"either or both",
"a + 1",
"a + b < c",
"a = b",
"a or b = c",
"3 + not 5",
"3 + not 5",
"3 + (not 5)",
"3 + (not 5)",
"(42 + 3",
"(42 + 3",
"(42 + 3)",
" not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true",
" not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true",
" and 3 < 2",
" and 3 < 2",
"not 7 < 2",
"not 7 < 2",
"2 < 3 < 4",
"2 < 3 < 4",
"2 < (3 < 4)",
"2 < foobar - 3 < 4",
"2 < foobar - 3 < 4",
"2 < foobar and 3 < 4",
"2 < foobar and 3 < 4",
Line 471: Line 487:
"235 76 + 1",
"235 76 + 1",
"true or false = not true",
"true or false = not true",
"true or false = (not true)",
"not true or false = false",
"not true = false",
"not true = false",
"a + b = not c and false",
"a + b = not c and false",
Line 481: Line 499:
"j & k",
"j & k",
"l or _m",
"l or _m",
}
}

for _, expr := range exprs {
for _, expr := range exprs {
fmt.Printf("Statement to verify: %q\n", expr)
fmt.Printf("Statement to verify: %q\n", expr)
Line 501: Line 519:
fmt.Println()
fmt.Println()
}
}
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
<pre>
<pre>
Statement to verify: "$"
"false" -> invalid character '$' found

Statement to verify: "one"
"true"

Statement to verify: "either or both"
"true"

Statement to verify: "a + 1"
"true"

Statement to verify: "a + b < c"
"true"

Statement to verify: "a = b"
"true"

Statement to verify: "a or b = c"
"true"

Statement to verify: "3 + not 5"
Statement to verify: "3 + not 5"
"false" -> expected operand, found 'not'
"false" -> expected operand, found 'not'
Line 513: Line 552:
Statement to verify: "(42 + 3"
Statement to verify: "(42 + 3"
"false" -> expected ')', found newline
"false" -> expected ')', found newline

Statement to verify: "(42 + 3)"
"true"


Statement to verify: " not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true"
Statement to verify: " not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true"
Line 525: Line 567:
Statement to verify: "2 < 3 < 4"
Statement to verify: "2 < 3 < 4"
"false" -> operator '<' is non-associative
"false" -> operator '<' is non-associative

Statement to verify: "2 < (3 < 4)"
"true"


Statement to verify: "2 < foobar - 3 < 4"
Statement to verify: "2 < foobar - 3 < 4"
Line 540: Line 585:
Statement to verify: "true or false = not true"
Statement to verify: "true or false = not true"
"false" -> expected operand, found 'not'
"false" -> expected operand, found 'not'

Statement to verify: "true or false = (not true)"
"true"

Statement to verify: "not true or false = false"
"true"


Statement to verify: "not true = false"
Statement to verify: "not true = false"
Line 571: Line 622:
"false" -> identifier cannot begin with an underscore
"false" -> identifier cannot begin with an underscore
</pre>
</pre>

=={{header|jq}}==
{{works with|jq}}
'''Also works with gojq, the Go implementation of jq'''

This entry uses the PEG (Parsing Expression Grammar) formalism
to transform the given grammar to a jq verification program
by a simple process that amounts to transcription.

For example, in the rule for `primary`, the alternation

Identifier | Integer

becomes the jq expression:

Identifer // Integer

The transcription process is not completely trivial as
jq requires definitions be ordered and perhaps nested
to satisfy a "define-before-use" rule. In the present case,
since `primary` and `expr` are defined circularly,
we define `primary` as an inner function of `expr`.

This PEG-to-jq transcription process is
described in detail at
[https://github.com/stedolan/jq/wiki/Parsing-Expression-Grammars].

The following presentation uses jq's support for regular expressions
for the sake of simplicity, brevity and efficiency.
For example, the grammar rule:

Digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;

becomes the jq program:

def Digit: parse("[0-9]");

where `parse` is a utility function defined in the library
of PEG-oriented functions in the first subsection below.

The jq program presented here works on character strings and textual files, and hence
the use of `ws` (for whitespace) in the program. Since `ws` is defined here
to make whitespace optional, it might be desirable to modify the program
to require whitespace (e.g. using the utility function `_`) in certain places instead.
</syntaxhighlight>
====Generic PEG Library====
The jq module at [[:Category:Jq/peg.jq]] can be included by copying it to a file,
and adding an `include` statement to top of the main program, e.g. as follows:
<syntaxhighlight lang=jq>
include "peg" {search: "."};
</syntaxhighlight>

====The Grammar====
<syntaxhighlight lang=jq>
def expr:
def Digit : parse("[0-9]");
def Letter : parse("[a-zA-Z]");
def Identifier : Letter | star(Letter // Digit // literal("_"));
def Integer : plus(Digit);
def primary : ws
| (Identifier
// Integer
// (literal("(") | expr | literal(")"))
// literal("true")
// literal("false"))
| ws;
def expr_level_6 : primary | star((literal("*") // literal("/")) | primary) ;
def expr_level_5 : expr_level_6 | star((literal("+") // literal("-")) | expr_level_6) ;
def expr_level_4 : ws | optional(literal("not")) | expr_level_5 | optional(parse("[=<]") | expr_level_5) ;
def expr_level_3 : expr_level_4 | star(literal("and") | expr_level_4) ;
def expr_level_2 : expr_level_3 | star(literal("or") | expr_level_3) ;

ws | expr_level_2 | ws;

def stmt:
{remainder: .} | expr | eos;
</syntaxhighlight>
====The Go examples====
<syntaxhighlight lang=jq>
def go: [
"$",
"one",
"either or both",
"a + 1",
"a + b < c",
"a = b",
"a or b = c",
"3 + not 5",
"3 + (not 5)",
"(42 + 3",
"(42 + 3)",
" not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true",
" and 3 < 2",
"not 7 < 2",
"2 < 3 < 4",
"2 < (3 < 4)",
"2 < foobar - 3 < 4",
"2 < foobar and 3 < 4",
"4 * (32 - 16) + 9 = 73",
"235 76 + 1",
"true or false = not true",
"true or false = (not true)",
"not true or false = false",
"not true = false",
"a + b = not c and false",
"a + b = (not c) and false",
"a + b = (not c and false)",
"ab_c / bd2 or < e_f7",
"g not = h",
"été = false",
"i++",
"j & k",
"l or _m"
];

# For ease of comparison with the Go output, simply emit `true` or `false`
go[]
| (stmt | true) // false
</syntaxhighlight>

'''Invocation''': jq -nr -f compiler-verifying-syntax.jq
{{output}}
The same sequence of `true` and `false` values as at [[#Go|Go]].


=={{header|Julia}}==
=={{header|Julia}}==
<lang julia>function substituteinnerparentheses(s, subs)
<syntaxhighlight lang="julia">function substituteinnerparentheses(s, subs)
((i = findlast('(', s)) == nothing) && return (s, false)
((i = findlast('(', s)) == nothing) && return (s, false)
((j = findfirst(')', s[i:end])) == nothing) && return (s, false)
((j = findfirst(')', s[i:end])) == nothing) && return (s, false)
Line 625: Line 804:
println("The compiler parses the statement { $s } and outputs: ", okparse(s))
println("The compiler parses the statement { $s } and outputs: ", okparse(s))
end
end
</lang>{{out}}
</syntaxhighlight>{{out}}
<pre>
<pre>
The compiler parses the statement { not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true } and outputs: true
The compiler parses the statement { not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true } and outputs: true
Line 636: Line 815:
The compiler parses the statement { 2 < 5 < 9 } and outputs: false
The compiler parses the statement { 2 < 5 < 9 } and outputs: false
</pre>
</pre>

=={{header|Nim}}==
<syntaxhighlight lang="nim">import strutils, tables

type

# List of tokens with their textual representation.
Token = enum tkError = "invalid token", tkIdent = "identifier", tkInt = "integer",
tkLPar = "'('", tkRPar = "')'", tkFalse = "'false'", tkTrue = "'true'",
tkLt = "'<'", tkEq = "'='", tkAdd = "'+'", tkSub = "'-'", tkMul = "'*'",
tkDiv = "'/'", tkOr = "'or'", tkAnd = "'and'", tkNot = "'not'", tkEOF = "EOF"

Lexer = object
str: string # String to parse.
len: Natural # String length.
pos: int # Current lexer position.
token: Token # Current token.
error: string # Error message.

const
# Mapping of reserved identifiers to tokens.
IdentTokens = {"false": tkFalse, "true": tkTrue, "or": tkOr, "and": tkAnd, "not": tkNot}.toTable
# Mapping of characters to tokens.
CharTokens = {'(': tkLPar, ')': tkRPar, '<': tkLt, '=': tkEq,
'+': tkAdd, '-': tkSub, '*': tkMul, '/': tkDiv}.toTable


####################################################################################################
# Lexer.

# Forward reference.
func nextToken(lex: var Lexer)


func initLexer(str: string): Lexer =
## Initialize a lexer.
result = Lexer(str: str, len: str.len, pos: 0)
result.nextToken() # Always a token ahead.


func getIdToken(lex: var Lexer) =
## Get the token for an identifier.
var str: string
while lex.pos < lex.len and lex.str[lex.pos] in IdentChars:
str.add lex.str[lex.pos]
inc lex.pos
lex.token = IdentTokens.getOrDefault(str, tkIdent)


func getInt(lex: var Lexer) =
## Get an integer token.
while lex.pos < lex.len and lex.str[lex.pos] in Digits:
inc lex.pos
lex.token = tkInt


func nextToken(lex: var Lexer) =
## Find the next token.

# Skip spaces.
while lex.pos < lex.str.len and lex.str[lex.pos] == ' ':
inc lex.pos

if lex.pos == lex.str.len:
lex.token = tkEOF

else:
let ch = lex.str[lex.pos]
case ch
of 'a'..'z':
lex.getIdToken()
of '0'..'9':
lex.getint()
else:
inc lex.pos
lex.token = CharTokens.getOrDefault(ch, tkError)


####################################################################################################
# Parser.

# Forward reference.
proc checkExpr(lex: var Lexer): bool


proc checkPrimary(lex: var Lexer): bool =
## Check validity of a primary.

if lex.token in {tkIdent, tkInt, tkFalse, tkTrue}:
lex.nextToken()
return true

elif lex.token == tkLPar:
lex.nextToken()
if not lex.checkExpr():
return false
if lex.token != tkRPar:
lex.error = "Encountered $#; expected ')'.".format(lex.token)
return false
else:
lex.nextToken()
return true

else:
lex.error = "Encountered $#; expected identifier, litteral or '('.".format(lex.token)
return false


proc checkExpr6(lex: var Lexer): bool =
## Check validity of an expr6.

if not lex.checkPrimary(): return false
while lex.token in [tkMul, tkDiv]:
lex.nextToken()
if not lex.checkPrimary(): return false
result = true


proc checkExpr5(lex: var Lexer): bool =
## Check validity of an expr5.

if not lex.checkExpr6(): return false
while lex.token in [tkAdd, tkSub]:
lex.nextToken()
if not lex.checkExpr6(): return false
result = true


proc checkExpr4(lex: var Lexer): bool =
## Check validity of an expr4.

if lex.token == tkNot: lex.nextToken()
if not lex.checkExpr5(): return false
if lex.token in [tkLt, tkEq]:
lex.nextToken()
if not lex.checkExpr5(): return false
result = true


proc checkExpr3(lex: var Lexer): bool =
## Check validity of an expr3.

if not lex.checkExpr4(): return false
while lex.token == tkAnd:
lex.nextToken()
if not lex.checkExpr4(): return false
result = true


proc checkExpr2(lex: var Lexer): bool =
## Check validity of an expr2.

if not lex.checkExpr3(): return false
while lex.token == tkOr:
lex.nextToken()
if not lex.checkExpr3(): return false
result = true


proc checkExpr(lex: var Lexer): bool =
## Check validity of an expr.
lex.checkExpr2()


proc checkStmt(lex: var Lexer): bool =
## Check validity of a statement.

result = lex.checkExpr()
if result and lex.pos < lex.len:
lex.error = "Extra characters at end of statement."
result = false

#———————————————————————————————————————————————————————————————————————————————————————————————————

# Using test set from Algol68 version.

const Tests = ["wombat",
"wombat or monotreme",
"( wombat and not )",
"wombat or not",
"a + 1",
"a + b < c",
"a + b - c * d / e < f and not ( g = h )",
"a + b - c * d / e < f and not ( g = h",
"a = b",
"a or b = c",
"$",
"true or false = not true",
"not true = false",
"3 + not 5",
"3 + (not 5)",
"(42 + 3",
" not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true",
" and 3 < 2",
"not 7 < 2",
"2 < 3 < 4",
"2 < foobar - 3 < 4",
"2 < foobar and 3 < 4",
"4 * (32 - 16) + 9 = 73",
"235 76 + 1",
"a + b = not c and false",
"a + b = (not c) and false",
"a + b = (not c and false)",
"ab_c / bd2 or < e_f7",
"g not = h",
"été = false",
"i++",
"j & k",
"l or _m"]

for test in Tests:
var lex = initLexer(test)
let ok = checkStmt(lex)
echo test, " → ", ok
if not ok: echo "*** Error at position $1. $2 ".format(lex.pos, lex.error)</syntaxhighlight>

{{out}}
<pre>wombat → true
wombat or monotreme → true
( wombat and not ) → false
*** Error at position 18. Encountered ')'; expected identifier, litteral or '('.
wombat or not → false
*** Error at position 13. Encountered EOF; expected identifier, litteral or '('.
a + 1 → true
a + b < c → true
a + b - c * d / e < f and not ( g = h ) → true
a + b - c * d / e < f and not ( g = h → false
*** Error at position 37. Encountered EOF; expected ')'.
a = b → true
a or b = c → true
$ → false
*** Error at position 1. Encountered invalid token; expected identifier, litteral or '('.
true or false = not true → false
*** Error at position 19. Encountered 'not'; expected identifier, litteral or '('.
not true = false → true
3 + not 5 → false
*** Error at position 7. Encountered 'not'; expected identifier, litteral or '('.
3 + (not 5) → true
(42 + 3 → false
*** Error at position 7. Encountered EOF; expected ')'.
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true → true
and 3 < 2 → false
*** Error at position 4. Encountered 'and'; expected identifier, litteral or '('.
not 7 < 2 → true
2 < 3 < 4 → false
*** Error at position 7. Extra characters at end of statement.
2 < foobar - 3 < 4 → false
*** Error at position 16. Extra characters at end of statement.
2 < foobar and 3 < 4 → true
4 * (32 - 16) + 9 = 73 → true
235 76 + 1 → false
*** Error at position 6. Extra characters at end of statement.
a + b = not c and false → false
*** Error at position 11. Encountered 'not'; expected identifier, litteral or '('.
a + b = (not c) and false → true
a + b = (not c and false) → true
ab_c / bd2 or < e_f7 → false
*** Error at position 15. Encountered '<'; expected identifier, litteral or '('.
g not = h → false
*** Error at position 5. Extra characters at end of statement.
été = false → false
*** Error at position 1. Encountered invalid token; expected identifier, litteral or '('.
i++ → false
*** Error at position 3. Encountered '+'; expected identifier, litteral or '('.
j & k → false
*** Error at position 3. Extra characters at end of statement.
l or _m → false
*** Error at position 6. Encountered invalid token; expected identifier, litteral or '('.</pre>


=={{header|Perl}}==
=={{header|Perl}}==
Line 641: Line 1,090:
Added 'not' and non-assoc fixes.
Added 'not' and non-assoc fixes.
Cooler output.
Cooler output.
<lang perl>#!/usr/bin/perl
<syntaxhighlight lang="perl">#!/usr/bin/perl


use strict; # http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax
use strict; # http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax
Line 698: Line 1,147:
j & k
j & k
l or _m
l or _m
UPPER_cAsE_aNd_letter_and_12345_test</lang>
UPPER_cAsE_aNd_letter_and_12345_test</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 738: Line 1,187:


=={{header|Phix}}==
=={{header|Phix}}==
<!--<syntaxhighlight lang="phix">(phixonline)-->
<lang Phix>-- demo\rosetta\Compiler\Verify_Syntax.exw
<span style="color: #000080;font-style:italic;">-- demo\rosetta\Compiler\Verify_Syntax.exw</span>
string src
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
integer ch, sdx
<span style="color: #004080;">string</span> <span style="color: #000000;">src</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">sdx</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\t'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\r'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\n'</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">enum</span> <span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">INTEGER</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">IDENT</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">ERROR</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">EOF</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">toktypes</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"SYMBOL"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"INTEGER"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"IDENT"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"ERROR"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"EOF"</span><span style="color: #0000FF;">}</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tok</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">sprintok</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">fmt</span><span style="color: #0000FF;">)</span>
procedure skip_spaces()
<span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">toktypes</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]]</span>
while 1 do
<span style="color: #008080;">return</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fmt</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">})</span>
if sdx>length(src) then exit end if
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
ch = src[sdx]
if not find(ch," \t\r\n") then exit end if
sdx += 1
end while
end procedure
<span style="color: #008080;">procedure</span> <span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
enum SYMBOL, INTEGER, IDENT, ERROR, EOF
<span style="color: #000080;font-style:italic;">-- yeilds one of:
constant toktypes = {"SYMBOL","INTEGER","IDENT","ERROR","EOF"}
-- {SYMBOL,ch} where ch is one of "()+-/*=&&lt;", or
sequence tok
-- {INTEGER,n}, or

function sprintok(string fmt)
-- {IDENT,string}, or
-- {ERROR,msg}, or
tok[1] = toktypes[tok[1]]
-- {EOF}</span>
return sprintf(fmt,{tok})
<span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
end function
<span style="color: #004080;">integer</span> <span style="color: #000000;">tokstart</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sdx</span>

<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">ERROR</span> <span style="color: #008080;">then</span>
procedure next_token()
<span style="color: #0000FF;">?{</span><span style="color: #008000;">"erm, tok is"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">}</span> <span style="color: #000080;font-style:italic;">-- looping??</span>
-- yeilds one of:
<span style="color: #008080;">elsif</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
-- {SYMBOL,ch} where ch is one of "()+-/*=&<", or
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">EOF</span><span style="color: #0000FF;">}</span>
-- {INTEGER,n}, or
<span style="color: #008080;">elsif</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"()+-/*=&&lt;"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
-- {IDENT,string}, or
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
-- {ERROR,msg}, or
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">&</span><span style="color: #008000;">""</span><span style="color: #0000FF;">}</span>
-- {EOF}
<span style="color: #008080;">elsif</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'0'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'9'</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
skip_spaces()
<span style="color: #004080;">integer</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">-</span><span style="color: #008000;">'0'</span>
integer tokstart = sdx
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
if tok[1]=ERROR then
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
?{"erm, tok is",tok} -- looping??
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
elsif sdx>length(src) then
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span>
tok = {EOF}
<span style="color: #008080;">if</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'0'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'9'</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
elsif find(ch,"()+-/*=&<") then
<span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">*</span><span style="color: #000000;">10</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">-</span><span style="color: #008000;">'0'</span>
sdx += 1
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
tok = {SYMBOL,ch&""}
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">INTEGER</span><span style="color: #0000FF;">,</span><span style="color: #000000;">n</span><span style="color: #0000FF;">}</span>
elsif (ch>='0' and ch<='9') then
<span style="color: #008080;">elsif</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span>
integer n = ch-'0'
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'A'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'Z'</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
while true do
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
sdx += 1
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
if sdx>length(src) then exit end if
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
ch = src[sdx]
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span>
if ch<'0' or ch>'9' then exit end if
<span style="color: #008080;">if</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">!=</span><span style="color: #008000;">'_'</span>
n = n*10 + ch-'0'
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span>
end while
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'A'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'Z'</span><span style="color: #0000FF;">)</span>
tok = {INTEGER,n}
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'0'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'9'</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
elsif (ch>='a' and ch<='z')
<span style="color: #008080;">exit</span>
or (ch>='A' and ch<='Z') then
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
while true do
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
sdx += 1
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">IDENT</span><span style="color: #0000FF;">,</span><span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tokstart</span><span style="color: #0000FF;">..</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]}</span>
if sdx>length(src) then exit end if
<span style="color: #008080;">elsif</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'_'</span> <span style="color: #008080;">then</span>
ch = src[sdx]
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">ERROR</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"identifiers may not start with _"</span><span style="color: #0000FF;">}</span>
if ch!='_'
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
and (ch<'a' or ch>'z')
<span style="color: #008080;">else</span>
and (ch<'A' or ch>'Z')
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">ERROR</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"illegal char (%c/%d)"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">)}</span>
and (ch<'0' or ch>'9') then
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
exit
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
end while
tok = {IDENT,src[tokstart..sdx-1]}
elsif ch='_' then
tok = {ERROR,"identifiers may not start with _"}
sdx += 1
else
tok = {ERROR,sprintf("illegal char (%c/%d)",ch)}
sdx += 1
end if
end procedure

forward procedure or_expr()

procedure primary()
integer tt = tok[1]
if tt=IDENT
or tt=INTEGER then
next_token()
elsif tok={SYMBOL,"("} then
next_token()
or_expr()
if tok!={SYMBOL,")"} then
tok = {ERROR,") expected"}
else
next_token()
end if
else
tok = {ERROR,sprintok("invalid [%v]")}
end if
end procedure

procedure mul_expr()
while true do
primary()
if not find(tok,{{SYMBOL,"*"},{SYMBOL,"/"}}) then exit end if
next_token()
end while
end procedure

procedure sum_expr()
while true do
mul_expr()
if not find(tok,{{SYMBOL,"+"},{SYMBOL,"-"}}) then exit end if
next_token()
end while
end procedure

procedure cmp_expr()
if tok=={IDENT,"not"} then next_token() end if
sum_expr()
if find(tok,{{SYMBOL,"="},{SYMBOL,"<"}}) then
next_token()
sum_expr()
end if
end procedure

procedure and_expr()
while true do
cmp_expr()
if tok!={IDENT,"and"} then exit end if
next_token()
end while
end procedure

procedure or_expr()
while true do
and_expr()
if tok!={IDENT,"or"} then exit end if
next_token()
end while
end procedure

procedure statement()
or_expr()
end procedure
<span style="color: #008080;">forward</span> <span style="color: #008080;">procedure</span> <span style="color: #000000;">or_expr</span><span style="color: #0000FF;">()</span>
procedure verify_syntax(string source)
src = source
<span style="color: #008080;">procedure</span> <span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span>
sdx = 1
<span style="color: #004080;">integer</span> <span style="color: #000000;">tt</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
tok = {0} -- ("not error"/invalid-ish)
<span style="color: #008080;">if</span> <span style="color: #000000;">tt</span><span style="color: #0000FF;">=</span><span style="color: #000000;">IDENT</span>
next_token()
<span style="color: #008080;">or</span> <span style="color: #000000;">tt</span><span style="color: #0000FF;">=</span><span style="color: #000000;">INTEGER</span> <span style="color: #008080;">then</span>
statement()
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
printf(1,"%30s ==> %s\n",{source,iff(tok[1]=EOF?"true":sprintok("false [tok=%v]"))})
<span style="color: #008080;">elsif</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">={</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"("</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span>
end procedure
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>

<span style="color: #000000;">or_expr</span><span style="color: #0000FF;">()</span>
constant tests = {
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">!={</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">")"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span>
"$",
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">ERROR</span><span style="color: #0000FF;">,</span><span style="color: #008000;">") expected"</span><span style="color: #0000FF;">}</span>
"one",
<span style="color: #008080;">else</span>
"either or both",
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
"a + 1",
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
"a + b < c",
<span style="color: #008080;">else</span>
"a = b",
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">ERROR</span><span style="color: #0000FF;">,</span><span style="color: #000000;">sprintok</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"invalid [%v]"</span><span style="color: #0000FF;">)}</span>
"a or b = c",
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
"3 + not 5",
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
"3 + (not 5)",
"(42 + 3",
<span style="color: #008080;">procedure</span> <span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span>
"(42 + 3)",
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
" not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true",
<span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span>
" and 3 < 2",
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">,{{</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">},{</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"/"</span><span style="color: #0000FF;">}})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
"not 7 < 2",
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
"2 < 3 < 4",
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
"2 < (3 < 4)",
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
"2 < foobar - 3 < 4",
"2 < foobar and 3 < 4",
<span style="color: #008080;">procedure</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span>
"4 * (32 - 16) + 9 = 73",
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
"235 76 + 1",
<span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span>
"true or false = not true",
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">,{{</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"+"</span><span style="color: #0000FF;">},{</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">}})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
"true or false = (not true)",
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
"not true or false = false",
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
"not true = false",
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
"a + b = not c and false",
"a + b = (not c) and false",
<span style="color: #008080;">procedure</span> <span style="color: #000000;">cmp_expr</span><span style="color: #0000FF;">()</span>
"a + b = (not c and false)",
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">=={</span><span style="color: #000000;">IDENT</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"not"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
"ab_c / bd2 or < e_f7",
<span style="color: #000080;font-style:italic;">-- while true do
"g not = h",
"i++",
-- sum_expr()
-- if not find(tok,{{SYMBOL,"="},{SYMBOL,"&lt;"}}) then exit end if
"j & k",
"l or _m"}
-- next_token()
-- end while</span>

<span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span>
printf(1,"Verify Syntax:\n")
<span style="color: #008080;">if</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">,{{</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"="</span><span style="color: #0000FF;">},{</span><span style="color: #000000;">SYMBOL</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&lt;"</span><span style="color: #0000FF;">}})</span> <span style="color: #008080;">then</span>
for i=1 to length(tests) do
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
verify_syntax(tests[i])
<span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span>
end for</lang>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">and_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">cmp_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">!={</span><span style="color: #000000;">IDENT</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"and"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">or_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">and_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">!={</span><span style="color: #000000;">IDENT</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"or"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">statement</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">or_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">verify_syntax</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">source</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">src</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">source</span>
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">0</span><span style="color: #0000FF;">}</span> <span style="color: #000080;font-style:italic;">-- ("not error"/invalid-ish)</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">statement</span><span style="color: #0000FF;">()</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%30s ==&gt; %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">source</span><span style="color: #0000FF;">,</span><span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">EOF</span><span style="color: #0000FF;">?</span><span style="color: #008000;">"true"</span><span style="color: #0000FF;">:</span><span style="color: #000000;">sprintok</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"false [tok=%v]"</span><span style="color: #0000FF;">))})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span>
<span style="color: #008000;">"$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"one"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"either or both"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"a + 1"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"a + b &lt; c"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"a = b"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"a or b = c"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"3 + not 5"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"3 + (not 5)"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"(42 + 3"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"(42 + 3)"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">" not 3 &lt; 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 &lt; 56) and 4 * 3 &lt; 12 or not true"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">" and 3 &lt; 2"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"not 7 &lt; 2"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"2 &lt; 3 &lt; 4"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"2 &lt; (3 &lt; 4)"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"2 &lt; foobar - 3 &lt; 4"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"2 &lt; foobar and 3 &lt; 4"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"4 * (32 - 16) + 9 = 73"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"235 76 + 1"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"true or false = not true"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"true or false = (not true)"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"not true or false = false"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"not true = false"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"a + b = not c and false"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"a + b = (not c) and false"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"a + b = (not c and false)"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ab_c / bd2 or &lt; e_f7"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"g not = h"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"i++"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"j & k"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"l or _m"</span><span style="color: #0000FF;">}</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Verify Syntax:\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">verify_syntax</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #0000FF;">?</span><span style="color: #008000;">"done"</span>
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
<!--</syntaxhighlight>-->
{{out}}
{{out}}
Note that "= not c" fails, whereas "= (not c)" passes, see talk page. (Arguably the task definition should be fixed.)
Note that "= not c" fails, whereas "= (not c)" passes, see talk page. (Arguably the task definition should be fixed.)
Line 957: Line 1,417:
j & k ==> false [tok={"SYMBOL","&"}]
j & k ==> false [tok={"SYMBOL","&"}]
l or _m ==> false [tok={"ERROR",`invalid [{"ERROR","identifiers may not start with _"}]`}]
l or _m ==> false [tok={"ERROR",`invalid [{"ERROR","identifiers may not start with _"}]`}]
</pre>

=={{header|Raku}}==
Format of task grammar is changed from EBNF to ABNF to cater for the Grammar::ABNF module and testing data is taken from the Perl entry.
<syntaxhighlight lang="raku" line># 20200511 Raku programming solution

use Grammar::ABNF;

grammar G is Grammar::ABNF { token CRLF { "\n" } };

my $g = G.generate(Q:to<RULES>
stmt = expr
expr = expr_level_2
expr_level_2 = expr_level_3 *( " or " expr_level_3 )
expr_level_3 = expr_level_4 *( " and " expr_level_4 )
expr_level_4 = ["not "] expr_level_5 [ [ " = " / " < " ] expr_level_5 ]
expr_level_5 = expr_level_6 *( [ " + " / " - " ] expr_level_6 )
expr_level_6 = primary *( [ " * " / " / " ] primary )
Integer = Digit *( Digit )
Identifier = Letter *( Letter / Digit / "_" )
primary = Identifier / Integer / "(" expr ")" / " true " / " false "
Digit = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9"
Letter = "a" / "b" / "c" / "d" / "e" / "f" / "g" / "h" / "i" / "j"
/ "k" / "l" / "m" / "n" / "o" / "p" / "q" / "r" / "s" / "t"
/ "u" / "v" / "w" / "x" / "y" / "z" / "A" / "B" / "C" / "D"
/ "E" / "F" / "G" / "H" / "I" / "J" / "K" / "L" / "M" / "N"
/ "O" / "P" / "Q" / "R" / "S" / "T" / "U" / "V" / "W" / "X"
/ "Y" / "Z"
RULES
);

my \DATA = Q:to<DATA>;
3 + not 5
3 + (not 5)
(42 + 3
(42 + 3 syntax_error
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true
and 3 < 2
not 7 < 2
2 < 3 < 4
2 < foobar - 3 < 4
2 < foobar and 3 < 4
4 * (32 - 16) + 9 = 73
235 76 + 1
a + b = not c and false
a + b = (not c) and false
a + b = (not c and false)
ab_c / bd2 or < e_f7
g not = h
i++
j & k
l or _m
UPPER_cAsE_aNd_letter_and_12345_test
DATA

say $g.parse($_).Bool, "\t", $_ for DATA.lines</syntaxhighlight>
{{out}}
<pre>False 3 + not 5
True 3 + (not 5)
False (42 + 3
False (42 + 3 syntax_error
True not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true
False and 3 < 2
True not 7 < 2
False 2 < 3 < 4
False 2 < foobar - 3 < 4
True 2 < foobar and 3 < 4
True 4 * (32 - 16) + 9 = 73
False 235 76 + 1
False a + b = not c and false
True a + b = (not c) and false
True a + b = (not c and false)
False ab_c / bd2 or < e_f7
False g not = h
False i++
False j & k
False l or _m
True UPPER_cAsE_aNd_letter_and_12345_test
</pre>

=={{header|Wren}}==
{{trans|Nim}}
{{libheader|Wren-dynamic}}
{{libheader|Wren-str}}
<syntaxhighlight lang="wren">import "./dynamic" for Enum
import "./str" for Char

var Token = Enum.create(
"Token", ["error", "ident", "int", "lpar", "rpar", "False", "True",
"lt", "eq", "add", "sub", "mul", "div", "or", "and", "not", "eof"]
)

var Token2Text = {
Token.error: "invalid token", Token.ident: "identifier", Token.int: "integer",
Token.lpar: "'('", Token.rpar: "')'", Token.False: "'false'", Token.True: "'true'",
Token.lt: "'<'", Token.eq: "'='", Token.add: "'+'", Token.sub: "'-'",
Token.mul: "'*'", Token.div: "'/'", Token.or: "'or'", Token.and: "'and'",
Token.not: "'not'", Token.eof: "EOF"
}

var IdentTokens = {
"false": Token.False, "true": Token.True, "or": Token.or,
"and": Token.and, "not": Token.not
}

var CharTokens = {
"(": Token.lpar, ")": Token.rpar, "<": Token.lt, "=": Token.eq,
"+": Token.add, "-": Token.sub, "*": Token.mul, "/": Token.div
}

var IsIdentChar = Fn.new { |c| Char.isAsciiAlphaNum(c) || c == "_" }

class Lexer {
static init(s) {
var lex = Lexer.new(s, s.count, 0, "", "")
lex.nextToken
return lex
}

construct new(str, len, pos, token, error) {
_str = str // string to parse
_len = len // string length
_pos = pos // current lexer position
_token = token // current token
_error = error // error message
}

// property getters required
pos { _pos }
error { _error }

// get the token for an identifier
getIdToken {
var s = ""
while (_pos < _len && IsIdentChar.call(_str[_pos])) {
s = s + _str[_pos]
_pos = _pos + 1
}
_token = IdentTokens.containsKey(s) ? IdentTokens[s] : Token.ident
}

// get an integer token
getInt {
while (_pos < _len && Char.isDigit(_str[_pos])) _pos = _pos + 1
_token = Token.int
}

// find the next token
nextToken {
// skip spaces
while (_pos < _len && _str[_pos] == " ") _pos = _pos + 1
if (_pos == _len) {
_token = Token.eof
} else {
var ch = _str[_pos]
if (Char.isAsciiLower(ch)) {
getIdToken
} else if (Char.isDigit(ch)) {
getInt
} else {
_pos = _pos + 1
_token = CharTokens.containsKey(ch) ? CharTokens[ch] : Token.error
}
}
}

// check validity of a primary
checkPrimary {
if ([Token.ident, Token.int, Token.False, Token.True].contains(_token)) {
nextToken
return true
} else if (_token == Token.lpar) {
nextToken
if (!checkExpr) return false
if (_token != Token.rpar) {
_error = "Encountered %(Token2Text[_token]); expected ')'"
return false
} else {
nextToken
return true
}
} else {
_error = "Encountered %(Token2Text[_token]); expected identifier, literal or '('"
return false
}
}

// check validity of an expr6
checkExpr6 {
if (!checkPrimary) return false
while ([Token.mul, Token.div].contains(_token)) {
nextToken
if (!checkPrimary) return false
}
return true
}

// check validity of an expr5
checkExpr5 {
if (!checkExpr6) return false
while ([Token.add, Token.sub].contains(_token)) {
nextToken
if (!checkExpr6) return false
}
return true
}

// check validity of an expr4
checkExpr4 {
if (_token == Token.not) nextToken
if (!checkExpr5) return false
if ([Token.lt, Token.eq].contains(_token)) {
nextToken
if (!checkExpr5) return false
}
return true
}

// check validity of an expr3
checkExpr3 {
if (!checkExpr4) return false
while (_token == Token.and) {
nextToken
if (!checkExpr4) return false
}
return true
}

// check validity of an expr2
checkExpr2 {
if (!checkExpr3) return false
while (_token == Token.or) {
nextToken
if (!checkExpr3) return false
}
return true
}

// check validity of an expr
checkExpr { checkExpr2 }

// check validity of a statement
checkStmt {
var result = checkExpr
if (result && _pos < _len) {
_error = "Extra characters at end of statement."
result = false
}
return result
}
}

// using test set from Algol68 version

var tests = [
"wombat",
"wombat or monotreme",
"( wombat and not )",
"wombat or not",
"a + 1",
"a + b < c",
"a + b - c * d / e < f and not ( g = h )",
"a + b - c * d / e < f and not ( g = h",
"a = b",
"a or b = c",
"$",
"true or false = not true",
"not true = false",
"3 + not 5",
"3 + (not 5)",
"(42 + 3",
" not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true",
" and 3 < 2",
"not 7 < 2",
"2 < 3 < 4",
"2 < foobar - 3 < 4",
"2 < foobar and 3 < 4",
"4 * (32 - 16) + 9 = 73",
"235 76 + 1",
"a + b = not c and false",
"a + b = (not c) and false",
"a + b = (not c and false)",
"ab_c / bd2 or < e_f7",
"g not = h",
"été = false",
"i++",
"j & k",
"l or _m"
]

for (test in tests) {
var lex = Lexer.init(test)
var ok = lex.checkStmt
System.print("%(test) -> %(ok)")
if (!ok) {
System.print("*** Error at position %(lex.pos). %(lex.error)\n")
}
}</syntaxhighlight>

{{out}}
<pre>
wombat -> true
wombat or monotreme -> true
( wombat and not ) -> false
*** Error at position 18. Encountered ')'; expected identifier, literal or '('

wombat or not -> false
*** Error at position 13. Encountered EOF; expected identifier, literal or '('

a + 1 -> true
a + b < c -> true
a + b - c * d / e < f and not ( g = h ) -> true
a + b - c * d / e < f and not ( g = h -> false
*** Error at position 37. Encountered EOF; expected ')'

a = b -> true
a or b = c -> true
$ -> false
*** Error at position 1. Encountered invalid token; expected identifier, literal or '('

true or false = not true -> false
*** Error at position 19. Encountered 'not'; expected identifier, literal or '('

not true = false -> true
3 + not 5 -> false
*** Error at position 7. Encountered 'not'; expected identifier, literal or '('

3 + (not 5) -> true
(42 + 3 -> false
*** Error at position 7. Encountered EOF; expected ')'

not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true -> true
and 3 < 2 -> false
*** Error at position 4. Encountered 'and'; expected identifier, literal or '('

not 7 < 2 -> true
2 < 3 < 4 -> false
*** Error at position 7. Extra characters at end of statement.

2 < foobar - 3 < 4 -> false
*** Error at position 16. Extra characters at end of statement.

2 < foobar and 3 < 4 -> true
4 * (32 - 16) + 9 = 73 -> true
235 76 + 1 -> false
*** Error at position 6. Extra characters at end of statement.

a + b = not c and false -> false
*** Error at position 11. Encountered 'not'; expected identifier, literal or '('

a + b = (not c) and false -> true
a + b = (not c and false) -> true
ab_c / bd2 or < e_f7 -> false
*** Error at position 15. Encountered '<'; expected identifier, literal or '('

g not = h -> false
*** Error at position 5. Extra characters at end of statement.

été = false -> false
*** Error at position 1. Encountered invalid token; expected identifier, literal or '('

i++ -> false
*** Error at position 3. Encountered '+'; expected identifier, literal or '('

j & k -> false
*** Error at position 3. Extra characters at end of statement.

l or _m -> false
*** Error at position 6. Encountered invalid token; expected identifier, literal or '('
</pre>
</pre>

Latest revision as of 16:18, 20 November 2023

Compiler/Verifying syntax is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Verifying Syntax
A Syntax Analyzer that verifies a token stream,
outputs a string "true" if the token stream matches the grammar requirement,
outputs a string "false" if the token stream does not match the grammar.

Task
The program reads input from a file of token stream,
reads it and outputs a string "true" if the token stream matches the grammar,
outputs a string "false" and error messages if the token stream does not match the grammar,
based on the grammar below. The grammar is written in Extended Backus-Naur Form (EBNF).

Grammar

stmt         =         expr ; 

expr         =         expr_level_2; 
expr_level_2 =         expr_level_3 {"or" expr_level_3} ; 
expr_level_3 =         expr_level_4 {"and" expr_level_4} ; 
expr_level_4 = ["not"] expr_level_5 [('=' | '<') expr_level_5] ; 
expr_level_5 =         expr_level_6 {('+' | '-') expr_level_6} ; 
expr_level_6 =         primary      {('*' | '/') primary} ; 

primary      =         Identifier
                     | Integer
                     | '(' expr ')'
                     | "true"
                     | "false"
                     ;
Integer      =         Digit {Digit};

Identifier   =         Letter {Letter | Digit | '_'};

Digit        =         "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;

Letter       =         "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" 
                     | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" 
                     | "u" | "v" | "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" 
                     | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" 
                     | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" 
                     | "Y" | "Z" ;


ALGOL W

Includes the test cases from the Go sample. Note, strings are limited to 256 characters in Algol W.

begin
    % verify expressions match expected syntax                                %
    procedure stmt ( string(256) value text ) ; begin
        % the parsing procedures return true if the expression matches the    %
        % required element, false otherwise - a parse tree is not built       %
        logical     haveInteger, haveIdentifier, haveEof, hadError;
        integer     currPos, maxPos, syPos;
        string(1)   currCh;
        string(256) srcText;
        string(256) sy;
        logical procedure expr ; expr_level_2;
        logical procedure expr_level_2 ; begin
            logical ok;
            ok := expr_level_3;
            while ok and have( "or" ) do ok := next and expr_level_3;
            ok
        end expr_level_2 ;
        logical procedure expr_level_3 ; begin
            logical ok;
            ok := expr_level_4;
            while have( "and" ) and ok do ok := next and expr_level_4;
            ok
        end expr_level_3 ;
        logical procedure expr_level_4 ; begin
            logical ok;
            ok := true;
            if have( "not" ) then ok := next;
            if ok then ok := expr_level_5;
            if ok and ( have( "=" ) or have( "<" ) ) then ok := next and expr_level_5;
            ok
        end expr_level_4 ;
        logical procedure expr_level_5 ; begin
            logical ok;
            ok := expr_level_6;
            while ok and ( have( "+" ) or have( "-" ) ) do ok := next and expr_level_6;
            ok
        end expr_level_5 ;
        logical procedure expr_level_6 ; begin
            logical ok;
            ok := primary;
            while ok and ( have( "*" ) or have( "/" ) ) do ok := next and primary;
            ok
        end expr_level_6 ;
        logical procedure primary ;
            if haveIdentifier or haveInteger or have( "true" ) or have( "false" ) then begin
                void( next );
                true
                end
            else if have( "(" ) then next and expr and mustBe( ")" )
            else error( "Expecting identifier, literal or ""(""" ) ;
        logical procedure addAndNextChar ; begin
            if syPos = 255 then void( error( "Symbol too long" ) )
            else if syPos < 255 then begin
                sy( syPos // 1 ) := currCh;
                syPos := syPos + 1
            end if_syPos_eq_255__lt_255 ;
            nextChar
        end addAndNextChar ;
        logical procedure next ; begin
            logical ok;
            haveInteger := haveIdentifier := false;
            ok          := skipSpaces;
            sy          := "";
            syPos       := 0;
            if not ok then sy := "<eof>"
            else begin
                if haveDigit then begin
                    haveInteger := true;
                    while addAndNextChar and haveDigit do begin end
                    end
                else if haveLetter then begin
                    while addAndNextChar and ( haveLetter or haveDigit or currCh = "_" ) do begin end;
                    haveIdentifier := sy not = "and" and sy not = "or" and sy not = "not" and sy not = "true" and sy not = "false"
                    end
                else void( addAndNextChar );
            end if_not_ok__ ;
            ok
        end next ;
        logical procedure skipSpaces ; begin
            logical ok;
            ok := not haveEof;
            while ok and currCh = " " do ok := nextChar;
            ok
        end skipSpaces ;
        logical procedure haveLetter ; not haveEof and (  ( currCh >= "a" and currCh <= "z" )
                                                       or ( currCh >= "A" and currCh <= "Z" ) );
        logical procedure haveDigit  ; not haveEof and (    currCh >= "0" and currCh <= "9" );
        logical procedure have   ( string(12) value text ) ; text = sy;
        logical procedure mustBe ( string(12) value text ) ; begin
            logical ok;
            ok := have( text );
            if ok then void( next )
            else begin
                string(256) msg;
                msg := "Expected:";
                msg( strlen( msg ) + 2 // 12 ) := text;
                void( error( msg ) )
            end if_ok;
            ok
        end mustBe ;
        logical procedure nextChar ; begin
            haveEof := currPos > maxPos;
            if not haveEof then begin
                currCh  := srcText( currPos // 1 ); 
                currPos := currPos + 1
            end if_not_haveEof ;
            not haveEof
        end nextChar ;
        integer procedure strlen ( string(256) value text ) ; begin
            integer length;
            length := 255;
            while length >= 0 and text( length // 1 ) = " " do length := length - 1;
            length
        end strlen ;
        logical procedure error ( string(256) value msg ) ; begin
            if not hadError then begin
                % have the first error %
                writeon( " error at: ", I_W := 1, currPos, "(" );
                showText( sy, strlen( sy ) );
                writeon( "): " );
                showText( msg, strlen( msg ) );
                hadError := true
            end if_not_hadError ;
            false
        end ewrror ;
        procedure showText ( string(256) value text; integer value length ) ; for c := 0 until length do writeon( text( c // 1 ) );
        procedure void ( logical value b ) ; begin end void ;
        % parse text and output messages indicating whether it is OK or not %
        hadError := false;
        sy       := "<bof>";
        srcText  := text;
        currPos  := 0;
        maxPos   := strlen( srcText );
        write();
        showText( srcText, maxPos );
        writeon( ": " );
        if not nextChar               then void( error( "Blank source text"             ) )
        else if not ( next and expr ) then void( error( "Expression expected"           ) )
        else if not haveEof           then void( error( "Expected EOF after expression" ) );
        if hadError then writeon( " false" )
        else             writeon( " true"  )
    end stmt ;
    % test cases %
    stmt( "wombat" );
    stmt( "wombat or monotreme" );
    stmt( "( wombat and not )" );
    stmt( "wombat or not" );
    stmt( "a + 1" );
    stmt( "a + b < c" );
    stmt( "a + b - c * d / e < f and not ( g = h )" );
    stmt( "a + b - c * d / e < f and not ( g = h" );
    stmt( "a = b" );
    stmt( "a or b = c" );
    stmt( "$" );
    % test cases from Go %
    stmt( "true or false = not true" );
    stmt( "not true = false" );
    stmt( "3 + not 5" );
    stmt( "3 + (not 5)" );
    stmt( "(42 + 3" );
    stmt( " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true" );
    stmt( " and 3 < 2" );
    stmt( "not 7 < 2" );
    stmt( "2 < 3 < 4" );
    stmt( "2 < foobar - 3 < 4" );
    stmt( "2 < foobar and 3 < 4" );
    stmt( "4 * (32 - 16) + 9 = 73" );
    stmt( "235 76 + 1" );
    stmt( "a + b = not c and false" );
    stmt( "a + b = (not c) and false" );
    stmt( "a + b = (not c and false)" );
    stmt( "ab_c / bd2 or < e_f7" );
    stmt( "g not = h" );
    stmt( "été = false" );
    stmt( "i++" );
    stmt( "j & k" );
    stmt( "l or _m" )
end.
Output:
wombat:  true
wombat or monotreme:  true
( wombat and not ):  error at: 18  ()): Expecting identifier, literal or "(" false
wombat or not:  error at: 13  (<eof>): Expression expected false
a + 1:  true
a + b < c:  true
a + b - c * d / e < f and not ( g = h ):  true
a + b - c * d / e < f and not ( g = h:  error at: 37  (<eof>): Expected: ) false
a = b:  true
a or b = c:  true
$:  error at: 1  ($): Expecting identifier, literal or "(" false
true or false = not true:  error at: 20  (not): Expecting identifier, literal or "(" false
not true = false:  true
3 + not 5:  error at: 8  (not): Expecting identifier, literal or "(" false
3 + (not 5):  true
(42 + 3:  error at: 7  (<eof>): Expected: ) false
 not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true:  true
 and 3 < 2:  error at: 5  (and): Expecting identifier, literal or "(" false
not 7 < 2:  true
2 < 3 < 4:  error at: 8  (<): Expected EOF after expression false
2 < foobar - 3 < 4:  error at: 17  (<): Expected EOF after expression false
2 < foobar and 3 < 4:  true
4 * (32 - 16) + 9 = 73:  true
235 76 + 1:  error at: 7  (76): Expected EOF after expression false
a + b = not c and false:  error at: 12  (not): Expecting identifier, literal or "(" false
a + b = (not c) and false:  true
a + b = (not c and false):  true
ab_c / bd2 or < e_f7:  error at: 16  (<): Expecting identifier, literal or "(" false
g not = h:  error at: 6  (not): Expected EOF after expression false
été = false:  error at: 2  (Ã): Expecting identifier, literal or "(" false
i++:  error at: 3  (+): Expecting identifier, literal or "(" false
j & k:  error at: 4  (&): Expected EOF after expression false
l or _m:  error at: 7  (_): Expecting identifier, literal or "(" false

C

// cverifyingsyntaxrosetta.c
// http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax

/*
# Makefile
CFLAGS = -O3 -Wall -Wfatal-errors
all: cverifyingsyntaxrosetta
*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <setjmp.h>

#define AT(CHAR) ( *pos == CHAR && ++pos )
#define TEST(STR) ( strncmp( pos, STR, strlen(STR) ) == 0 \
  && ! isalnum(pos[strlen(STR)]) && pos[strlen(STR)] != '_' )
#define IS(STR) ( TEST(STR) && (pos += strlen(STR)) )

static char *pos;                                 // current position in source
static char *startpos;                            // start of source
static jmp_buf jmpenv;

static int
error(char *message)
  {
  printf("false  %s\n%*s^ %s\n", startpos, pos - startpos + 7, "", message);
  longjmp( jmpenv, 1 );
  }

static int
expr(int level)
  {
  while( isspace(*pos) ) ++pos;                     // skip white space
  if( AT('(') )                                     // find a primary (operand)
    {
    if( expr(0) && ! AT(')') ) error("missing close paren");
    }
  else if( level <= 4 && IS("not") && expr(6) ) { }
  else if( TEST("or") || TEST("and") || TEST("not") )
    {
    error("expected a primary, found an operator");
    }
  else if( isdigit(*pos) ) pos += strspn( pos, "0123456789" );
  else if( isalpha(*pos) ) pos += strspn( pos, "0123456789_"
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" );
  else error("expected a primary");

  do                    // then look for zero or more valid following operators
    {
    while( isspace(*pos) ) ++pos;
    }
  while(
    level <= 2 && IS("or") ? expr(3) :
    level <= 3 && IS("and") ? expr(4) :
    level <= 4 && (AT('=') || AT('<')) ? expr(5) :
    level == 5 && (*pos == '=' || *pos == '<') ? error("non-associative") :
    level <= 6 && (AT('+') || AT('-')) ? expr(7) :
    level <= 7 && (AT('*') || AT('/')) ? expr(8) :
    0 );
  return 1;
  }

static void
parse(char *source)
  {
  startpos = pos = source;
  if( setjmp(jmpenv) ) return; // for catching errors during recursion
  expr(0);
  if( *pos ) error("unexpected character following valid parse");
  printf(" true  %s\n", source);
  }

static char *tests[] = {
  "3 + not 5",
  "3 + (not 5)",
  "(42 + 3",
  "(42 + 3 some_other_syntax_error",
  "not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true",
  "and 3 < 2",
  "not 7 < 2",
  "2 < 3 < 4",
  "2 < foobar - 3 < 4",
  "2 < foobar and 3 < 4",
  "4 * (32 - 16) + 9 = 73",
  "235 76 + 1",
  "a + b = not c and false",
  "a + b = (not c) and false",
  "a + b = (not c and false)",
  "ab_c / bd2 or < e_f7",
  "g not = h",
  "i++",
  "j & k",
  "l or _m",
  "wombat",
  "WOMBAT or monotreme",
  "a + b - c * d / e < f and not ( g = h )",
  "$",
  };

int
main(int argc, char *argv[])
  {
  for( int i = 0; i < sizeof(tests)/sizeof(*tests); i++ ) parse(tests[i]);
  }
Output:
false  3 + not 5
           ^ expected a primary, found an operator
 true  3 + (not 5)
false  (42 + 3
              ^ missing close paren
false  (42 + 3 some_other_syntax_error
               ^ missing close paren
 true  not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true
false  and 3 < 2
       ^ expected a primary, found an operator
 true  not 7 < 2
false  2 < 3 < 4
             ^ non-associative
false  2 < foobar - 3 < 4
                      ^ non-associative
 true  2 < foobar and 3 < 4
 true  4 * (32 - 16) + 9 = 73
false  235 76 + 1
           ^ unexpected character following valid parse
false  a + b = not c and false
               ^ expected a primary, found an operator
 true  a + b = (not c) and false
 true  a + b = (not c and false)
false  ab_c / bd2 or < e_f7
                     ^ expected a primary
false  g not = h
         ^ unexpected character following valid parse
false  i++
         ^ expected a primary
false  j & k
         ^ unexpected character following valid parse
false  l or _m
            ^ expected a primary
 true  wombat
 true  WOMBAT or monotreme
 true  a + b - c * d / e < f and not ( g = h )
false  $
       ^ expected a primary

Go

If "and", "or", "not" and "=" are replaced by the corresponding symbols: "&&", "||", "!" and "==", then the task grammar is a subset of Go's own grammar - operator precedence and identifier construction are the same.

After making the appropriate substitutions, we can therefore use the parser in Go's standard library to verify whether the statements are valid or not. As expressions cannot be statements in Go, we simply parse the latter as expressions.

However, before applying the parser, we first need to ensure that the expressions don't include any characters (including non-ASCII) or usages thereof which Go would otherwise permit. Note that it's not necessary to specifically check for "++" and "--" as these are statements in Go and can't appear in expressions anyway.

In particular, after substitutions, "= not", "+ not" etc. would be allowed by the Go parser so we need to exclude them. Curiously, the Go parser allows something like "2 < 3 < 4" even though it doesn't compile. We need therefore to exclude that also (see Talk page).

package main

import (
    "fmt"
    "go/parser"
    "regexp"
    "strings"
)

var (
    re1 = regexp.MustCompile(`[^_a-zA-Z0-9\+\-\*/=<\(\)\s]`)
    re2 = regexp.MustCompile(`\b_\w*\b`)
    re3 = regexp.MustCompile(`[=<+*/-]\s*not`)
    re4 = regexp.MustCompile(`(=|<)\s*[^(=< ]+\s*([=<+*/-])`)
)

var subs = [][2]string{
    {"=", "=="}, {" not ", " ! "}, {"(not ", "(! "}, {" or ", " || "}, {" and ", " && "},
}

func possiblyValid(expr string) error {
    matches := re1.FindStringSubmatch(expr)
    if matches != nil {
        return fmt.Errorf("invalid character %q found", []rune(matches[0])[0])
    }
    if re2.MatchString(expr) {
        return fmt.Errorf("identifier cannot begin with an underscore")
    }
    if re3.MatchString(expr) {
        return fmt.Errorf("expected operand, found 'not'")
    }
    matches = re4.FindStringSubmatch(expr)
    if matches != nil {
        return fmt.Errorf("operator %q is non-associative", []rune(matches[1])[0])
    }
    return nil
}

func modify(err error) string {
    e := err.Error()
    for _, sub := range subs {
        e = strings.ReplaceAll(e, strings.TrimSpace(sub[1]), strings.TrimSpace(sub[0]))
    }
    return strings.Split(e, ":")[2][1:] // remove location info as may be inaccurate
}

func main() {
    exprs := []string{
        "$",
        "one",
        "either or both",
        "a + 1",
        "a + b < c",
        "a = b",
        "a or b = c",
        "3 + not 5",
        "3 + (not 5)",
        "(42 + 3",
        "(42 + 3)",
        " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
        " and 3 < 2",
        "not 7 < 2",
        "2 < 3 < 4",
        "2 < (3 < 4)",
        "2 < foobar - 3 < 4",
        "2 < foobar and 3 < 4",
        "4 * (32 - 16) + 9 = 73",
        "235 76 + 1",
        "true or false = not true",
        "true or false = (not true)",
        "not true or false = false",
        "not true = false",
        "a + b = not c and false",
        "a + b = (not c) and false",
        "a + b = (not c and false)",
        "ab_c / bd2 or < e_f7",
        "g not = h",
        "été = false",
        "i++",
        "j & k",
        "l or _m",
    } 
   
    for _, expr := range exprs {
        fmt.Printf("Statement to verify: %q\n", expr)
        if err := possiblyValid(expr); err != nil {
            fmt.Printf("\"false\" -> %s\n\n", err.Error())
            continue
        }
        expr = fmt.Sprintf(" %s ", expr) // make sure there are spaces at both ends
        for _, sub := range subs {
            expr = strings.ReplaceAll(expr, sub[0], sub[1])
        }
        _, err := parser.ParseExpr(expr)
        if err != nil {
            fmt.Println(`"false" ->`, modify(err))
        } else {
            fmt.Println(`"true"`)
        }
        fmt.Println()
    }
}
Output:
Statement to verify: "$"
"false" -> invalid character '$' found

Statement to verify: "one"
"true"

Statement to verify: "either or both"
"true"

Statement to verify: "a + 1"
"true"

Statement to verify: "a + b < c"
"true"

Statement to verify: "a = b"
"true"

Statement to verify: "a or b = c"
"true"

Statement to verify: "3 + not 5"
"false" -> expected operand, found 'not'

Statement to verify: "3 + (not 5)"
"true"

Statement to verify: "(42 + 3"
"false" -> expected ')', found newline

Statement to verify: "(42 + 3)"
"true"

Statement to verify: " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true"
"true"

Statement to verify: " and 3 < 2"
"false" -> expected operand, found 'and'

Statement to verify: "not 7 < 2"
"true"

Statement to verify: "2 < 3 < 4"
"false" -> operator '<' is non-associative

Statement to verify: "2 < (3 < 4)"
"true"

Statement to verify: "2 < foobar - 3 < 4"
"false" -> operator '<' is non-associative

Statement to verify: "2 < foobar and 3 < 4"
"true"

Statement to verify: "4 * (32 - 16) + 9 = 73"
"true"

Statement to verify: "235 76 + 1"
"false" -> expected 'EOF', found 76

Statement to verify: "true or false = not true"
"false" -> expected operand, found 'not'

Statement to verify: "true or false = (not true)"
"true"

Statement to verify: "not true or false = false"
"true"

Statement to verify: "not true = false"
"true"

Statement to verify: "a + b = not c and false"
"false" -> expected operand, found 'not'

Statement to verify: "a + b = (not c) and false"
"true"

Statement to verify: "a + b = (not c and false)"
"true"

Statement to verify: "ab_c / bd2 or < e_f7"
"false" -> expected operand, found '<'

Statement to verify: "g not = h"
"false" -> expected 'EOF', found 'not'

Statement to verify: "été = false"
"false" -> invalid character 'é' found

Statement to verify: "i++"
"false" -> expected 'EOF', found '++'

Statement to verify: "j & k"
"false" -> invalid character '&' found

Statement to verify: "l or _m"
"false" -> identifier cannot begin with an underscore

jq

Works with: jq

Also works with gojq, the Go implementation of jq

This entry uses the PEG (Parsing Expression Grammar) formalism to transform the given grammar to a jq verification program by a simple process that amounts to transcription.

For example, in the rule for `primary`, the alternation

   Identifier | Integer

becomes the jq expression:

   Identifer // Integer

The transcription process is not completely trivial as jq requires definitions be ordered and perhaps nested to satisfy a "define-before-use" rule. In the present case, since `primary` and `expr` are defined circularly, we define `primary` as an inner function of `expr`.

This PEG-to-jq transcription process is described in detail at [1].

The following presentation uses jq's support for regular expressions for the sake of simplicity, brevity and efficiency. For example, the grammar rule:

   Digit        =         "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;

becomes the jq program:

  def Digit:  parse("[0-9]");

where `parse` is a utility function defined in the library of PEG-oriented functions in the first subsection below.

The jq program presented here works on character strings and textual files, and hence the use of `ws` (for whitespace) in the program. Since `ws` is defined here to make whitespace optional, it might be desirable to modify the program to require whitespace (e.g. using the utility function `_`) in certain places instead. </syntaxhighlight>

Generic PEG Library

The jq module at Category:Jq/peg.jq can be included by copying it to a file, and adding an `include` statement to top of the main program, e.g. as follows:

include "peg" {search: "."};

The Grammar

def expr:
  def Digit        :  parse("[0-9]");
  
  def Letter       :  parse("[a-zA-Z]");
  
  def Identifier   :  Letter | star(Letter // Digit // literal("_"));
  
  def Integer      :  plus(Digit);
  
  def primary      :  ws
                      | (Identifier
                        // Integer
                        // (literal("(") | expr | literal(")"))
                        // literal("true")
                        // literal("false"))
		      | ws;
  
  def expr_level_6 :  primary | star((literal("*") // literal("/")) | primary) ; 
  def expr_level_5 :  expr_level_6 | star((literal("+") // literal("-")) | expr_level_6) ; 
  def expr_level_4 :  ws | optional(literal("not")) | expr_level_5 | optional(parse("[=<]") | expr_level_5) ;
  def expr_level_3 :  expr_level_4 | star(literal("and") | expr_level_4) ; 
  def expr_level_2 :  expr_level_3 | star(literal("or")  | expr_level_3) ; 

  ws | expr_level_2 | ws; 

def stmt:
  {remainder: .} | expr | eos;

The Go examples

def go: [
        "$",
        "one",
        "either or both",
        "a + 1",
        "a + b < c",
        "a = b",
        "a or b = c",
        "3 + not 5",
        "3 + (not 5)",
        "(42 + 3",
        "(42 + 3)",
        " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
        " and 3 < 2",
        "not 7 < 2",
        "2 < 3 < 4",
        "2 < (3 < 4)",
        "2 < foobar - 3 < 4",
        "2 < foobar and 3 < 4",
        "4 * (32 - 16) + 9 = 73",
        "235 76 + 1",
        "true or false = not true",
        "true or false = (not true)",
        "not true or false = false",
        "not true = false",
        "a + b = not c and false",
        "a + b = (not c) and false",
        "a + b = (not c and false)",
        "ab_c / bd2 or < e_f7",
        "g not = h",
        "été = false",
        "i++",
        "j & k",
        "l or _m"
    ];

# For ease of comparison with the Go output, simply emit `true` or `false`
go[]
| (stmt | true) // false

Invocation: jq -nr -f compiler-verifying-syntax.jq

Output:

The same sequence of `true` and `false` values as at Go.

Julia

function substituteinnerparentheses(s, subs)
    ((i = findlast('(', s)) == nothing) && return (s, false)
    ((j = findfirst(')', s[i:end])) == nothing) && return (s, false)
    okparse(s[i+1:j+i-2]) || return (s, false)
    return s[1:i-1] * " " * subs * " " * s[j+i:end], true
end
 
function okparse(s)
    while findfirst('(', s) != nothing
        s, okinparentheses = substituteinnerparentheses(s, "true")
        okinparentheses || return false
    end
    s = strip(s)
    # Julia allows expressions like 2 + + + 3, or like true = not false, but these are not allowed here
    # = or < can be used only once within parentheses
    if occursin(r"(and|or|[\=\<\+\-\*\/])\s*(and|or|[\=\<\+\-\*\/])", s) ||
        occursin(r"(^(and|^or|^[\=\<\+\-\*\/]))|((and|or|[\=\<\+\-\*\/])$)", s) ||
        occursin(r"(\=|\<)\s*not\s", s) || count(c -> c == '=' || c == '<', s) > 1
        return false
    end
    # Julia allows ., ,, ; and operators like % but these are not allowed here
    # permitted: -+*/ true false and or not, ascii identifiers, and integers
    for item in split(s, r"\s+")
        !occursin(
            r"^[a-zA-Z][a-zA-Z_0-9]*$|^\d+$|^true$|^false$|^or$|^and$|^not$|^\=$|^\<$|^\+$|^-$|^\*$|^\/$",
            item) && return false
    end
    # change and, or, and not to the corresponding Julia operators
    s = replace(replace(replace(s, "and" => "&&"), "or" => "||"), "not" => "!")
    try 
        # Use Julia's parser, which will throw exception if it parses an error
        Meta.parse(s)    
    catch
        return false
    end
    return true
end    
 
teststatements = [
" not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
" and 3 < 2",
"not 7 < 2",
"4 * (32 - 16) + 9 = 73",
"235 76 + 1",
"true or false = not true",
"not true = false",
"2 < 5 < 9"
]
 
for s in teststatements
    println("The compiler parses the statement { $s } and outputs: ", okparse(s))
end
Output:
The compiler parses the statement {  not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true } and outputs: true
The compiler parses the statement {  and 3 < 2 } and outputs: false
The compiler parses the statement { not 7 < 2 } and outputs: true
The compiler parses the statement { 4 * (32 - 16) + 9 = 73 } and outputs: true
The compiler parses the statement { 235 76 + 1 } and outputs: false
The compiler parses the statement { true or false = not true } and outputs: false
The compiler parses the statement { not true = false } and outputs: true
The compiler parses the statement { 2 < 5 < 9 } and outputs: false

Nim

import strutils, tables

type

  # List of tokens with their textual representation.
  Token = enum tkError = "invalid token", tkIdent = "identifier", tkInt = "integer",
               tkLPar = "'('", tkRPar = "')'", tkFalse = "'false'", tkTrue = "'true'",
               tkLt = "'<'", tkEq = "'='", tkAdd = "'+'", tkSub = "'-'", tkMul = "'*'",
               tkDiv = "'/'", tkOr = "'or'", tkAnd = "'and'", tkNot = "'not'", tkEOF = "EOF"

  Lexer = object
    str: string     # String to parse.
    len: Natural    # String length.
    pos: int        # Current lexer position.
    token: Token    # Current token.
    error: string   # Error message.

const
  
  # Mapping of reserved identifiers to tokens.
  IdentTokens = {"false": tkFalse, "true": tkTrue, "or": tkOr, "and": tkAnd, "not": tkNot}.toTable
  
  # Mapping of characters to tokens.
  CharTokens = {'(': tkLPar, ')': tkRPar, '<': tkLt, '=': tkEq,
                '+': tkAdd, '-': tkSub, '*': tkMul, '/': tkDiv}.toTable


####################################################################################################
# Lexer.

# Forward reference.
func nextToken(lex: var Lexer)


func initLexer(str: string): Lexer =
  ## Initialize a lexer.
  result = Lexer(str: str, len: str.len, pos: 0)
  result.nextToken()  # Always a token ahead.


func getIdToken(lex: var Lexer) =
  ## Get the token for an identifier.
  var str: string
  while lex.pos < lex.len and lex.str[lex.pos] in IdentChars:
    str.add lex.str[lex.pos]
    inc lex.pos
  lex.token = IdentTokens.getOrDefault(str, tkIdent)


func getInt(lex: var Lexer) =
  ## Get an integer token.
  while lex.pos < lex.len and lex.str[lex.pos] in Digits:
    inc lex.pos
  lex.token = tkInt


func nextToken(lex: var Lexer) =
  ## Find the next token.

  # Skip spaces.
  while lex.pos < lex.str.len and lex.str[lex.pos] == ' ':
    inc lex.pos

  if lex.pos == lex.str.len:
    lex.token = tkEOF

  else:
    let ch = lex.str[lex.pos]
    case ch
    of 'a'..'z':
      lex.getIdToken()
    of '0'..'9':
      lex.getint()
    else:
      inc lex.pos
      lex.token = CharTokens.getOrDefault(ch, tkError)


####################################################################################################
# Parser.

# Forward reference.
proc checkExpr(lex: var Lexer): bool


proc checkPrimary(lex: var Lexer): bool =
  ## Check validity of a primary.

  if lex.token in {tkIdent, tkInt, tkFalse, tkTrue}:
    lex.nextToken()
    return true

  elif lex.token == tkLPar:
    lex.nextToken()
    if not lex.checkExpr():
      return false
    if lex.token != tkRPar:
      lex.error = "Encountered $#; expected ')'.".format(lex.token)
      return false
    else:
      lex.nextToken()
      return true

  else:
    lex.error = "Encountered $#; expected identifier, litteral or '('.".format(lex.token)
    return false


proc checkExpr6(lex: var Lexer): bool =
  ## Check validity of an expr6.

  if not lex.checkPrimary(): return false
  while lex.token in [tkMul, tkDiv]:
    lex.nextToken()
    if not lex.checkPrimary(): return false
  result = true


proc checkExpr5(lex: var Lexer): bool =
  ## Check validity of an expr5.

  if not lex.checkExpr6(): return false
  while lex.token in [tkAdd, tkSub]:
    lex.nextToken()
    if not lex.checkExpr6(): return false
  result = true


proc checkExpr4(lex: var Lexer): bool =
  ## Check validity of an expr4.

  if lex.token == tkNot: lex.nextToken()
  if not lex.checkExpr5(): return false
  if lex.token in [tkLt, tkEq]:
    lex.nextToken()
    if not lex.checkExpr5(): return false
  result = true


proc checkExpr3(lex: var Lexer): bool =
  ## Check validity of an expr3.

  if not lex.checkExpr4(): return false
  while lex.token == tkAnd:
    lex.nextToken()
    if not lex.checkExpr4(): return false
  result = true


proc checkExpr2(lex: var Lexer): bool =
  ## Check validity of an expr2.

  if not lex.checkExpr3(): return false
  while lex.token == tkOr:
    lex.nextToken()
    if not lex.checkExpr3(): return false
  result = true


proc checkExpr(lex: var Lexer): bool =
  ## Check validity of an expr.
  lex.checkExpr2()


proc checkStmt(lex: var Lexer): bool =
  ## Check validity of a statement.

  result = lex.checkExpr()
  if result and lex.pos < lex.len:
    lex.error = "Extra characters at end of statement."
    result = false

#———————————————————————————————————————————————————————————————————————————————————————————————————

# Using test set from Algol68 version.

const Tests = ["wombat",
               "wombat or monotreme",
               "( wombat and not )",
               "wombat or not",
               "a + 1",
               "a + b < c",
               "a + b - c * d / e < f and not ( g = h )",
               "a + b - c * d / e < f and not ( g = h",
               "a = b",
               "a or b = c",
               "$",
               "true or false = not true",
               "not true = false",
               "3 + not 5",
               "3 + (not 5)",
               "(42 + 3",
               " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
               " and 3 < 2",
               "not 7 < 2",
               "2 < 3 < 4",
               "2 < foobar - 3 < 4",
               "2 < foobar and 3 < 4",
               "4 * (32 - 16) + 9 = 73",
               "235 76 + 1",
               "a + b = not c and false",
               "a + b = (not c) and false",
               "a + b = (not c and false)",
               "ab_c / bd2 or < e_f7",
               "g not = h",
               "été = false",
               "i++",
               "j & k",
               "l or _m"]

for test in Tests:
  var lex = initLexer(test)
  let ok = checkStmt(lex)
  echo test, " → ", ok
  if not ok: echo "*** Error at position $1. $2 ".format(lex.pos, lex.error)
Output:
wombat → true
wombat or monotreme → true
( wombat and not ) → false
*** Error at position 18. Encountered ')'; expected identifier, litteral or '('. 
wombat or not → false
*** Error at position 13. Encountered EOF; expected identifier, litteral or '('. 
a + 1 → true
a + b < c → true
a + b - c * d / e < f and not ( g = h ) → true
a + b - c * d / e < f and not ( g = h → false
*** Error at position 37. Encountered EOF; expected ')'. 
a = b → true
a or b = c → true
$ → false
*** Error at position 1. Encountered invalid token; expected identifier, litteral or '('. 
true or false = not true → false
*** Error at position 19. Encountered 'not'; expected identifier, litteral or '('. 
not true = false → true
3 + not 5 → false
*** Error at position 7. Encountered 'not'; expected identifier, litteral or '('. 
3 + (not 5) → true
(42 + 3 → false
*** Error at position 7. Encountered EOF; expected ')'. 
 not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true → true
 and 3 < 2 → false
*** Error at position 4. Encountered 'and'; expected identifier, litteral or '('. 
not 7 < 2 → true
2 < 3 < 4 → false
*** Error at position 7. Extra characters at end of statement. 
2 < foobar - 3 < 4 → false
*** Error at position 16. Extra characters at end of statement. 
2 < foobar and 3 < 4 → true
4 * (32 - 16) + 9 = 73 → true
235 76 + 1 → false
*** Error at position 6. Extra characters at end of statement. 
a + b = not c and false → false
*** Error at position 11. Encountered 'not'; expected identifier, litteral or '('. 
a + b = (not c) and false → true
a + b = (not c and false) → true
ab_c / bd2 or < e_f7 → false
*** Error at position 15. Encountered '<'; expected identifier, litteral or '('. 
g not = h → false
*** Error at position 5. Extra characters at end of statement. 
été = false → false
*** Error at position 1. Encountered invalid token; expected identifier, litteral or '('. 
i++ → false
*** Error at position 3. Encountered '+'; expected identifier, litteral or '('. 
j & k → false
*** Error at position 3. Extra characters at end of statement. 
l or _m → false
*** Error at position 6. Encountered invalid token; expected identifier, litteral or '('.

Perl

Made fix for 'not' see Discussion. Added 'not' and non-assoc fixes. Cooler output.

#!/usr/bin/perl

use strict; # http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax
use warnings;

sub error
  {
  my $pos = pos($_) // 0;
  die $_, ' ' x ($pos + 7), "^ $_[0]\n";
  }
sub want { /\G\Q$_[0]/gc or error $_[1] }

sub expr
  {
  my $level = shift;
  /\G\h+/gc;
  /\G\(/gc && want ')', "expected a closing paren", expr(0) or
    $level <= 4 && /\Gnot\b/gc && expr(5) or
    /\G (?!(?:and|or|not)\b) (?:\d+|[a-zA-Z]\w*)/gcx or
    error "expected a primary";
  /\G\h+/gc ? 1 :
    $level <= 2   && /\Gor\b/gc     ? expr(3) :
    $level <= 3   && /\Gand\b/gc    ? expr(4) :
    $level <= 4   && /\G[=<]/gc     ? expr(4.5) :
    $level == 4.5 && /\G(?=[=<])/gc ? error "non-associative operator" :
    $level <= 5   && /\G[+-]/gc     ? expr(6) :
    $level <= 6   && /\G[*\/]/gc    ? expr(7) :
    return 1 while 1;
  }

while( <DATA> )
  {
  print eval { want "\n", "expected end of input", expr(0) } ?
    " true  $_" : "false  $@";
  }

__DATA__
3 + not 5
3 + (not 5)
(42 + 3
(42 + 3 syntax_error
not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true
and 3 < 2
not 7 < 2
2 < 3 < 4
2 < foobar - 3 < 4
2 < foobar and 3 < 4
4 * (32 - 16) + 9 = 73
235 76 + 1
a + b = not c and false
a + b = (not c) and false
a + b = (not c and false)
ab_c / bd2 or < e_f7
g not = h
i++
j & k
l or _m
UPPER_cAsE_aNd_letter_and_12345_test
Output:
false  3 + not 5
           ^ expected a primary
 true  3 + (not 5)
false  (42 + 3
              ^ expected a closing paren
false  (42 + 3 syntax_error
               ^ expected a closing paren
 true  not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true
false  and 3 < 2
       ^ expected a primary
 true  not 7 < 2
false  2 < 3 < 4
             ^ non-associative operator
false  2 < foobar - 3 < 4
                      ^ non-associative operator
 true  2 < foobar and 3 < 4
 true  4 * (32 - 16) + 9 = 73
false  235 76 + 1
           ^ expected end of input
false  a + b = not c and false
               ^ expected a primary
 true  a + b = (not c) and false
 true  a + b = (not c and false)
false  ab_c / bd2 or < e_f7
                     ^ expected a primary
false  g not = h
         ^ expected end of input
false  i++
         ^ expected a primary
false  j & k
         ^ expected end of input
false  l or _m
            ^ expected a primary
 true  UPPER_cAsE_aNd_letter_and_12345_test

Phix

-- demo\rosetta\Compiler\Verify_Syntax.exw
with javascript_semantics
string src
integer ch, sdx
 
procedure skip_spaces()
    while 1 do
        if sdx>length(src) then exit end if
        ch = src[sdx]
        if not find(ch,{' ','\t','\r','\n'}) then exit end if
        sdx += 1
    end while
end procedure
 
enum SYMBOL, INTEGER, IDENT, ERROR, EOF
constant toktypes = {"SYMBOL","INTEGER","IDENT","ERROR","EOF"}
sequence tok

function sprintok(string fmt)
    tok[1] = toktypes[tok[1]]
    return sprintf(fmt,{tok})
end function

procedure next_token()
-- yeilds one of:
--  {SYMBOL,ch} where ch is one of "()+-/*=&<", or
--  {INTEGER,n}, or
--  {IDENT,string}, or
--  {ERROR,msg}, or
--  {EOF}
    skip_spaces()
    integer tokstart = sdx
    if tok[1]=ERROR then
        ?{"erm, tok is",tok} -- looping??
    elsif sdx>length(src) then
        tok = {EOF}
    elsif find(ch,"()+-/*=&<") then
        sdx += 1
        tok = {SYMBOL,ch&""}
    elsif (ch>='0' and ch<='9') then
        integer n = ch-'0'
        while true do
            sdx += 1
            if sdx>length(src) then exit end if
            ch = src[sdx]
            if ch<'0' or ch>'9' then exit end if
            n = n*10 + ch-'0'
        end while
        tok = {INTEGER,n}       
    elsif (ch>='a' and ch<='z')
       or (ch>='A' and ch<='Z') then
        while true do
            sdx += 1
            if sdx>length(src) then exit end if
            ch = src[sdx]
            if ch!='_' 
            and (ch<'a' or ch>'z')
            and (ch<'A' or ch>'Z')
            and (ch<'0' or ch>'9') then
                exit
            end if
        end while
        tok = {IDENT,src[tokstart..sdx-1]}
    elsif ch='_' then
        tok = {ERROR,"identifiers may not start with _"}
        sdx += 1
    else
        tok = {ERROR,sprintf("illegal char (%c/%d)",ch)}
        sdx += 1
    end if
end procedure

forward procedure or_expr()

procedure primary()
    integer tt = tok[1]
    if tt=IDENT
    or tt=INTEGER then
        next_token()
    elsif tok={SYMBOL,"("} then
        next_token()
        or_expr()
        if tok!={SYMBOL,")"} then
            tok = {ERROR,") expected"}
        else
            next_token()
        end if
    else
        tok = {ERROR,sprintok("invalid [%v]")}
    end if
end procedure

procedure mul_expr()
    while true do
        primary()
        if not find(tok,{{SYMBOL,"*"},{SYMBOL,"/"}}) then exit end if
        next_token()
    end while
end procedure

procedure sum_expr()
    while true do
        mul_expr()
        if not find(tok,{{SYMBOL,"+"},{SYMBOL,"-"}}) then exit end if
        next_token()
    end while
end procedure

procedure cmp_expr()
    if tok=={IDENT,"not"} then next_token() end if
--  while true do
--      sum_expr()
--      if not find(tok,{{SYMBOL,"="},{SYMBOL,"<"}}) then exit end if
--      next_token()
--  end while
    sum_expr()
    if find(tok,{{SYMBOL,"="},{SYMBOL,"<"}}) then
        next_token()
        sum_expr()
    end if
end procedure

procedure and_expr()
    while true do
        cmp_expr()
        if tok!={IDENT,"and"} then exit end if
        next_token()
    end while
end procedure

procedure or_expr()
    while true do
        and_expr()
        if tok!={IDENT,"or"} then exit end if
        next_token()
    end while
end procedure

procedure statement()
    or_expr()
end procedure
 
procedure verify_syntax(string source)
    src = source
    sdx = 1
    tok = {0} -- ("not error"/invalid-ish)
    next_token()
    statement()
    printf(1,"%30s  ==>  %s\n",{source,iff(tok[1]=EOF?"true":sprintok("false [tok=%v]"))})
end procedure

constant tests = {
        "$",
        "one",
        "either or both",
        "a + 1",
        "a + b < c",
        "a = b",
        "a or b = c",
        "3 + not 5",
        "3 + (not 5)",
        "(42 + 3",
        "(42 + 3)",
        " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
        " and 3 < 2",
        "not 7 < 2",
        "2 < 3 < 4",
        "2 < (3 < 4)",
        "2 < foobar - 3 < 4",
        "2 < foobar and 3 < 4",
        "4 * (32 - 16) + 9 = 73",
        "235 76 + 1",
        "true or false = not true",
        "true or false = (not true)",
        "not true or false = false",
        "not true = false",
        "a + b = not c and false",
        "a + b = (not c) and false",
        "a + b = (not c and false)",
        "ab_c / bd2 or < e_f7",
        "g not = h",
        "i++",
        "j & k",
        "l or _m"}

printf(1,"Verify Syntax:\n")
for i=1 to length(tests) do
    verify_syntax(tests[i])
end for

?"done"
{} = wait_key()
Output:

Note that "= not c" fails, whereas "= (not c)" passes, see talk page. (Arguably the task definition should be fixed.)

Verify Syntax:
                             $  ==>  false [tok={"ERROR",`invalid [{"ERROR","illegal char ($/36)"}]`}]
                           one  ==>  true
                either or both  ==>  true
                         a + 1  ==>  true
                     a + b < c  ==>  true
                         a = b  ==>  true
                    a or b = c  ==>  true
                     3 + not 5  ==>  false [tok={"INTEGER",5}]
                   3 + (not 5)  ==>  true
                       (42 + 3  ==>  false [tok={"ERROR",") expected"}]
                      (42 + 3)  ==>  true
 not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true  ==>  true
                     and 3 < 2  ==>  false [tok={"INTEGER",3}]
                     not 7 < 2  ==>  true
                     2 < 3 < 4  ==>  false [tok={"SYMBOL","<"}]
                   2 < (3 < 4)  ==>  true
            2 < foobar - 3 < 4  ==>  false [tok={"SYMBOL","<"}]
          2 < foobar and 3 < 4  ==>  true
        4 * (32 - 16) + 9 = 73  ==>  true
                    235 76 + 1  ==>  false [tok={"INTEGER",76}]
      true or false = not true  ==>  false [tok={"IDENT","true"}]
    true or false = (not true)  ==>  true
     not true or false = false  ==>  true
              not true = false  ==>  true
       a + b = not c and false  ==>  false [tok={"IDENT","c"}]
     a + b = (not c) and false  ==>  true
     a + b = (not c and false)  ==>  true
          ab_c / bd2 or < e_f7  ==>  false [tok={"ERROR",`invalid [{"SYMBOL","<"}]`}]
                     g not = h  ==>  false [tok={"IDENT","not"}]
                           i++  ==>  false [tok={"ERROR",`invalid [{"SYMBOL","+"}]`}]
                         j & k  ==>  false [tok={"SYMBOL","&"}]
                       l or _m  ==>  false [tok={"ERROR",`invalid [{"ERROR","identifiers may not start with _"}]`}]

Raku

Format of task grammar is changed from EBNF to ABNF to cater for the Grammar::ABNF module and testing data is taken from the Perl entry.

# 20200511 Raku programming solution

use Grammar::ABNF;

grammar G is Grammar::ABNF { token CRLF { "\n" } };

my $g = G.generate(Q:to<RULES>
stmt         =          expr
expr         =          expr_level_2
expr_level_2 =          expr_level_3 *(      " or "       expr_level_3 )
expr_level_3 =          expr_level_4 *(      " and "      expr_level_4 )
expr_level_4 = ["not "] expr_level_5  [ [ " = " / " < " ] expr_level_5 ]
expr_level_5 =          expr_level_6 *( [ " + " / " - " ] expr_level_6 )
expr_level_6 =          primary      *( [ " * " / " / " ] primary )
Integer      =  Digit  *( Digit )
Identifier   =  Letter *( Letter / Digit / "_" )
primary      =  Identifier / Integer / "(" expr ")" / " true " / " false "
Digit        =  "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9"
Letter       =  "a" / "b" / "c" / "d" / "e" / "f" / "g" / "h" / "i" / "j" 
               / "k" / "l" / "m" / "n" / "o" / "p" / "q" / "r" / "s" / "t"
               / "u" / "v" / "w" / "x" / "y" / "z" / "A" / "B" / "C" / "D"
               / "E" / "F" / "G" / "H" / "I" / "J" / "K" / "L" / "M" / "N"
               / "O" / "P" / "Q" / "R" / "S" / "T" / "U" / "V" / "W" / "X"   
               / "Y" / "Z"
RULES
);

my \DATA = Q:to<DATA>;
3 + not 5
3 + (not 5)
(42 + 3
(42 + 3 syntax_error
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true
and 3 < 2
not 7 < 2
2 < 3 < 4
2 < foobar - 3 < 4
2 < foobar and 3 < 4
4 * (32 - 16) + 9 = 73
235 76 + 1
a + b = not c and false
a + b = (not c) and false
a + b = (not c and false)
ab_c / bd2 or < e_f7
g not = h
i++
j & k
l or _m
UPPER_cAsE_aNd_letter_and_12345_test
DATA

say $g.parse($_).Bool, "\t", $_ for DATA.lines
Output:
False   3 + not 5
True    3 + (not 5)
False   (42 + 3
False   (42 + 3 syntax_error
True    not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true
False   and 3 < 2
True    not 7 < 2
False   2 < 3 < 4
False   2 < foobar - 3 < 4
True    2 < foobar and 3 < 4
True    4 * (32 - 16) + 9 = 73
False   235 76 + 1
False   a + b = not c and false
True    a + b = (not c) and false
True    a + b = (not c and false)
False   ab_c / bd2 or < e_f7
False   g not = h
False   i++
False   j & k
False   l or _m
True    UPPER_cAsE_aNd_letter_and_12345_test

Wren

Translation of: Nim
Library: Wren-dynamic
Library: Wren-str
import "./dynamic" for Enum
import "./str" for Char

var Token = Enum.create(
    "Token", ["error", "ident", "int", "lpar", "rpar", "False", "True",
    "lt", "eq", "add", "sub", "mul", "div", "or", "and", "not", "eof"]
)

var Token2Text = {
    Token.error: "invalid token", Token.ident: "identifier", Token.int: "integer",
    Token.lpar: "'('", Token.rpar: "')'", Token.False: "'false'", Token.True: "'true'",
    Token.lt: "'<'", Token.eq: "'='", Token.add: "'+'", Token.sub: "'-'",
    Token.mul: "'*'", Token.div: "'/'", Token.or: "'or'", Token.and: "'and'",
    Token.not: "'not'", Token.eof: "EOF"
}

var IdentTokens = {
    "false": Token.False, "true": Token.True, "or": Token.or,
    "and": Token.and, "not": Token.not
}

var CharTokens = {
    "(": Token.lpar, ")": Token.rpar, "<": Token.lt, "=": Token.eq,
    "+": Token.add, "-": Token.sub, "*": Token.mul, "/": Token.div
}

var IsIdentChar = Fn.new { |c| Char.isAsciiAlphaNum(c) || c == "_" }

class Lexer {
    static init(s) {
        var lex = Lexer.new(s, s.count, 0, "", "")
        lex.nextToken
        return lex
    }

    construct new(str, len, pos, token, error) {
        _str   = str     // string to parse
        _len   = len     // string length
        _pos   = pos     // current lexer position
        _token = token   // current token
        _error = error   // error message
    }

    // property getters required
    pos   { _pos }
    error { _error }

    // get the token for an identifier
    getIdToken {
        var s = ""
        while (_pos < _len && IsIdentChar.call(_str[_pos])) {
          s = s + _str[_pos]
          _pos = _pos + 1
        }
        _token = IdentTokens.containsKey(s) ? IdentTokens[s] : Token.ident
    }

    // get an integer token
    getInt {
        while (_pos < _len && Char.isDigit(_str[_pos])) _pos = _pos + 1
        _token = Token.int
    }

    // find the next token
    nextToken {
        // skip spaces
        while (_pos < _len && _str[_pos] == " ") _pos = _pos + 1
        if (_pos == _len) {
            _token = Token.eof
        } else {
            var ch = _str[_pos]
            if (Char.isAsciiLower(ch)) {
                getIdToken
            } else if (Char.isDigit(ch)) {
                getInt
            } else {
                _pos = _pos + 1
                _token = CharTokens.containsKey(ch) ? CharTokens[ch] : Token.error
            }
        }
    }

    // check validity of a primary
    checkPrimary {
        if ([Token.ident, Token.int, Token.False, Token.True].contains(_token)) {
            nextToken
            return true
        } else if (_token == Token.lpar) {
            nextToken
            if (!checkExpr) return false
            if (_token != Token.rpar) {
                _error = "Encountered %(Token2Text[_token]); expected ')'"
                return false
            } else {
                nextToken
                return true
            }
        } else {
            _error = "Encountered %(Token2Text[_token]); expected identifier, literal or '('"
            return false
        }
    }

    // check validity of an expr6
    checkExpr6 {
        if (!checkPrimary) return false
        while ([Token.mul, Token.div].contains(_token)) {
            nextToken
            if (!checkPrimary) return false
        }
        return true
    }

    // check validity of an expr5
    checkExpr5 {
        if (!checkExpr6) return false
        while ([Token.add, Token.sub].contains(_token)) {
            nextToken
            if (!checkExpr6) return false
        }
        return true
    }

    // check validity of an expr4
    checkExpr4 {
        if (_token == Token.not) nextToken
        if (!checkExpr5) return false
        if ([Token.lt, Token.eq].contains(_token)) {
            nextToken
            if (!checkExpr5) return false
        }
        return true
    }

    // check validity of an expr3
    checkExpr3 {
        if (!checkExpr4) return false
        while (_token == Token.and) {
            nextToken
            if (!checkExpr4) return false
        }
        return true
    }

    // check validity of an expr2
    checkExpr2 {
        if (!checkExpr3) return false
        while (_token == Token.or) {
            nextToken
            if (!checkExpr3) return false
        }
        return true
    }

    // check validity of an expr
    checkExpr { checkExpr2 }

    // check validity of a statement
    checkStmt {
        var result = checkExpr
        if (result && _pos < _len) {
            _error = "Extra characters at end of statement."
            result = false
        }
        return result
    }
}

// using test set from Algol68 version

var tests = [
    "wombat",
    "wombat or monotreme",
    "( wombat and not )",
    "wombat or not",
    "a + 1",
    "a + b < c",
    "a + b - c * d / e < f and not ( g = h )",
    "a + b - c * d / e < f and not ( g = h",
    "a = b",
    "a or b = c",
    "$",
    "true or false = not true",
    "not true = false",
    "3 + not 5",
    "3 + (not 5)",
    "(42 + 3",
    " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
    " and 3 < 2",
    "not 7 < 2",
    "2 < 3 < 4",
    "2 < foobar - 3 < 4",
    "2 < foobar and 3 < 4",
    "4 * (32 - 16) + 9 = 73",
    "235 76 + 1",
    "a + b = not c and false",
    "a + b = (not c) and false",
    "a + b = (not c and false)",
    "ab_c / bd2 or < e_f7",
    "g not = h",
    "été = false",
    "i++",
    "j & k",
    "l or _m"
]

for (test in tests) {
    var lex = Lexer.init(test)
    var ok = lex.checkStmt
    System.print("%(test) -> %(ok)")
    if (!ok) {
        System.print("*** Error at position %(lex.pos). %(lex.error)\n")
    }
}
Output:
wombat -> true
wombat or monotreme -> true
( wombat and not ) -> false
*** Error at position 18. Encountered ')'; expected identifier, literal or '('

wombat or not -> false
*** Error at position 13. Encountered EOF; expected identifier, literal or '('

a + 1 -> true
a + b < c -> true
a + b - c * d / e < f and not ( g = h ) -> true
a + b - c * d / e < f and not ( g = h -> false
*** Error at position 37. Encountered EOF; expected ')'

a = b -> true
a or b = c -> true
$ -> false
*** Error at position 1. Encountered invalid token; expected identifier, literal or '('

true or false = not true -> false
*** Error at position 19. Encountered 'not'; expected identifier, literal or '('

not true = false -> true
3 + not 5 -> false
*** Error at position 7. Encountered 'not'; expected identifier, literal or '('

3 + (not 5) -> true
(42 + 3 -> false
*** Error at position 7. Encountered EOF; expected ')'

 not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true -> true
 and 3 < 2 -> false
*** Error at position 4. Encountered 'and'; expected identifier, literal or '('

not 7 < 2 -> true
2 < 3 < 4 -> false
*** Error at position 7. Extra characters at end of statement.

2 < foobar - 3 < 4 -> false
*** Error at position 16. Extra characters at end of statement.

2 < foobar and 3 < 4 -> true
4 * (32 - 16) + 9 = 73 -> true
235 76 + 1 -> false
*** Error at position 6. Extra characters at end of statement.

a + b = not c and false -> false
*** Error at position 11. Encountered 'not'; expected identifier, literal or '('

a + b = (not c) and false -> true
a + b = (not c and false) -> true
ab_c / bd2 or < e_f7 -> false
*** Error at position 15. Encountered '<'; expected identifier, literal or '('

g not = h -> false
*** Error at position 5. Extra characters at end of statement.

été = false -> false
*** Error at position 1. Encountered invalid token; expected identifier, literal or '('

i++ -> false
*** Error at position 3. Encountered '+'; expected identifier, literal or '('

j & k -> false
*** Error at position 3. Extra characters at end of statement.

l or _m -> false
*** Error at position 6. Encountered invalid token; expected identifier, literal or '('