Compiler/Verifying syntax: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎{{header|Go}}: Another RE tweak and examples aligned with those of Phix entry.)
(→‎{{header|ALGOL W}}: Slight tweeks, couple more tests, use "logical" instead of non-standard "boolean")
Line 50: Line 50:
% verify expressions match expected syntax %
% verify expressions match expected syntax %
procedure stmt ( string(256) value text ) ; begin
procedure stmt ( string(256) value text ) ; begin
% the parsing procedures return truee if the expression matches the %
% the parsing procedures return true if the expression matches the %
% the required element; false otherwise - a parse tree is not built %
% required element, false otherwise - a parse tree is not built %
boolean haveInteger, haveIdentifier, haveEof, hadError;
logical haveInteger, haveIdentifier, haveEof, hadError;
integer currPos, maxPos, syPos;
integer currPos, maxPos, syPos;
string(1) currCh;
string(1) currCh;
string(256) srcText;
string(256) srcText;
string(256) sy;
string(256) sy;
boolean procedure expr ; expr_level_2;
logical procedure expr ; expr_level_2;
boolean procedure expr_level_2 ; begin
logical procedure expr_level_2 ; begin
boolean ok;
logical ok;
ok := expr_level_3;
ok := expr_level_3;
while ok and have( "or" ) do ok := next and expr_level_3;
while ok and have( "or" ) do ok := next and expr_level_3;
ok
ok
end expr_level_2 ;
end expr_level_2 ;
boolean procedure expr_level_3 ; begin
logical procedure expr_level_3 ; begin
boolean ok;
logical ok;
ok := expr_level_4;
ok := expr_level_4;
while have( "and" ) and ok do ok := next and expr_level_4;
while have( "and" ) and ok do ok := next and expr_level_4;
ok
ok
end expr_level_3 ;
end expr_level_3 ;
boolean procedure expr_level_4 ; begin
logical procedure expr_level_4 ; begin
boolean ok;
logical ok;
ok := true;
ok := true;
if have( "not" ) then ok := next;
if have( "not" ) then ok := next;
Line 78: Line 78:
ok
ok
end expr_level_4 ;
end expr_level_4 ;
boolean procedure expr_level_5 ; begin
logical procedure expr_level_5 ; begin
boolean ok;
logical ok;
ok := expr_level_6;
ok := expr_level_6;
while ok and ( have( "+" ) or have( "-" ) ) do ok := next and expr_level_6;
while ok and ( have( "+" ) or have( "-" ) ) do ok := next and expr_level_6;
ok
ok
end expr_level_5 ;
end expr_level_5 ;
boolean procedure expr_level_6 ; begin
logical procedure expr_level_6 ; begin
boolean ok;
logical ok;
ok := primary;
ok := primary;
while ok and ( have( "*" ) or have( "/" ) ) do ok := next and primary;
while ok and ( have( "*" ) or have( "/" ) ) do ok := next and primary;
ok
ok
end expr_level_6 ;
end expr_level_6 ;
boolean procedure primary ;
logical procedure primary ;
if haveIdentifier or haveInteger or have( "true" ) or have( "false" ) then begin
if haveIdentifier or haveInteger or have( "true" ) or have( "false" ) then begin
void( next );
void( next );
Line 96: Line 96:
end
end
else if have( "(" ) then next and expr and mustBe( ")" )
else if have( "(" ) then next and expr and mustBe( ")" )
else error( "Expecting identifier, integer or ""(""" ) ;
else error( "Expecting identifier, literal or ""(""" ) ;
logical procedure addAndNextChar ; begin
logical procedure addAndNextChar ; begin
if syPos = 255 then void( error( "Symbol too long" ) )
if syPos = 255 then void( error( "Symbol too long" ) )
Line 105: Line 105:
nextChar
nextChar
end addAndNextChar ;
end addAndNextChar ;
boolean procedure next ; begin
logical procedure next ; begin
logical ok;
logical ok;
haveInteger := haveIdentifier := false;
haveInteger := haveIdentifier := false;
Line 125: Line 125:
ok
ok
end next ;
end next ;
boolean procedure skipSpaces ; begin
logical procedure skipSpaces ; begin
boolean ok;
logical ok;
ok := not haveEof;
ok := not haveEof;
while ok and currCh = " " do ok := nextChar;
while ok and currCh = " " do ok := nextChar;
ok
ok
end skipSpaces ;
end skipSpaces ;
boolean procedure haveLetter ; not haveEof and ( ( currCh >= "a" and currCh <= "z" )
logical procedure haveLetter ; not haveEof and ( ( currCh >= "a" and currCh <= "z" )
or ( currCh >= "A" and currCh <= "Z" ) );
or ( currCh >= "A" and currCh <= "Z" ) );
boolean procedure haveDigit ; not haveEof and ( currCh >= "0" and currCh <= "9" );
logical procedure haveDigit ; not haveEof and ( currCh >= "0" and currCh <= "9" );
boolean procedure have ( string(12) value text ) ; text = sy;
logical procedure have ( string(12) value text ) ; text = sy;
boolean procedure mustBe ( string(12) value text ) ; begin
logical procedure mustBe ( string(12) value text ) ; begin
boolean ok, haveSy;
logical ok;
ok := have( text );
ok := have( text );
if ok then haveSy := next
if ok then void( next )
else begin
else begin
string(256) msg;
string(256) msg;
Line 147: Line 147:
ok
ok
end mustBe ;
end mustBe ;
boolean procedure nextChar ; begin
logical procedure nextChar ; begin
haveEof := currPos > maxPos;
haveEof := currPos > maxPos;
if not haveEof then begin
if not haveEof then begin
Line 161: Line 161:
length
length
end strlen ;
end strlen ;
boolean procedure error ( string(256) value msg ) ; begin
logical procedure error ( string(256) value msg ) ; begin
if not hadError then begin
if not hadError then begin
% have the first error %
% have the first error %
Line 173: Line 173:
end ewrror ;
end ewrror ;
procedure showText ( string(256) value text; integer value length ) ; for c := 0 until length do writeon( text( c // 1 ) );
procedure showText ( string(256) value text; integer value length ) ; for c := 0 until length do writeon( text( c // 1 ) );
procedure void ( boolean value b ) ; begin end void ;
procedure void ( logical value b ) ; begin end void ;
% parse text and output messages indicating whether it is OK or not %
% parse text and output messages indicating whether it is OK or not %
hadError := false;
hadError := false;
Line 192: Line 192:
stmt( "wombat" );
stmt( "wombat" );
stmt( "wombat or monotreme" );
stmt( "wombat or monotreme" );
stmt( "( wombat and not )" );
stmt( "wombat or not" );
stmt( "a + 1" );
stmt( "a + 1" );
stmt( "a + b < c" );
stmt( "a + b < c" );
Line 200: Line 202:
stmt( "$" );
stmt( "$" );
% test cases from Go %
% test cases from Go %
stmt( "true or false = not true" );
stmt( "not true = false" );
stmt( "3 + not 5" );
stmt( "3 + not 5" );
stmt( "3 + (not 5)" );
stmt( "3 + (not 5)" );
Line 225: Line 229:
wombat: true
wombat: true
wombat or monotreme: true
wombat or monotreme: true
( wombat and not ): error at: 18 ()): Expecting identifier, literal or "(" false
wombat or not: error at: 13 (<eof>): Expression expected false
a + 1: true
a + 1: true
a + b < c: true
a + b < c: true
Line 231: Line 237:
a = b: true
a = b: true
a or b = c: true
a or b = c: true
$: error at: 1 ($): Expecting identifier, integer or "(" false
$: error at: 1 ($): Expecting identifier, literal or "(" false
3 + not 5: error at: 8 (not): Expecting identifier, integer or "(" false
true or false = not true: error at: 20 (not): Expecting identifier, literal or "(" false
not true = false: true
3 + not 5: error at: 8 (not): Expecting identifier, literal or "(" false
3 + (not 5): true
3 + (not 5): true
(42 + 3: error at: 7 (<eof>): Expected: ) false
(42 + 3: error at: 7 (<eof>): Expected: ) false
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true: true
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true: true
and 3 < 2: error at: 5 (and): Expecting identifier, integer or "(" false
and 3 < 2: error at: 5 (and): Expecting identifier, literal or "(" false
not 7 < 2: true
not 7 < 2: true
2 < 3 < 4: error at: 8 (<): Expected EOF after expression false
2 < 3 < 4: error at: 8 (<): Expected EOF after expression false
Line 243: Line 251:
4 * (32 - 16) + 9 = 73: true
4 * (32 - 16) + 9 = 73: true
235 76 + 1: error at: 7 (76): Expected EOF after expression false
235 76 + 1: error at: 7 (76): Expected EOF after expression false
a + b = not c and false: error at: 12 (not): Expecting identifier, integer or "(" false
a + b = not c and false: error at: 12 (not): Expecting identifier, literal or "(" false
a + b = (not c) and false: true
a + b = (not c) and false: true
a + b = (not c and false): true
a + b = (not c and false): true
ab_c / bd2 or < e_f7: error at: 16 (<): Expecting identifier, integer or "(" false
ab_c / bd2 or < e_f7: error at: 16 (<): Expecting identifier, literal or "(" false
g not = h: error at: 6 (not): Expected EOF after expression false
g not = h: error at: 6 (not): Expected EOF after expression false
été = false: error at: 2 (Ã): Expecting identifier, integer or "(" false
été = false: error at: 2 (Ã): Expecting identifier, literal or "(" false
i++: error at: 3 (+): Expecting identifier, integer or "(" false
i++: error at: 3 (+): Expecting identifier, literal or "(" false
j & k: error at: 4 (&): Expected EOF after expression false
j & k: error at: 4 (&): Expected EOF after expression false
l or _m: error at: 7 (_): Expecting identifier, integer or "(" false
l or _m: error at: 7 (_): Expecting identifier, literal or "(" false
</pre>
</pre>



Revision as of 20:19, 7 January 2020

Compiler/Verifying syntax is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Verifying Syntax
A Syntax Analyzer that verifies a token stream,
outputs a string "true" if the token stream matches the grammar requirement,
outputs a string "false" if the token stream does not match the grammar.

Task
The program reads input from a file of token stream,
reads it and outputs a string "true" if the token stream matches the grammar,
outputs a string "false" and error messages if the token stream does not match the grammar,
based on the grammar below. The grammar is written in Extended Backus-Naur Form (EBNF).

Grammar

stmt         =         expr ; 

expr         =         expr_level_2; 
expr_level_2 =         expr_level_3 {"or" expr_level_3} ; 
expr_level_3 =         expr_level_4 {"and" expr_level_4} ; 
expr_level_4 = ["not"] expr_level_5 [('=' | '<') expr_level_5] ; 
expr_level_5 =         expr_level_6 {('+' | '-') expr_level_6} ; 
expr_level_6 =         primary      {('*' | '/') primary} ; 

primary      =         Identifier
                     | Integer
                     | '(' expr ')'
                     | "true"
                     | "false"
                     ;
Integer      =         Digit {Digit};

Identifier   =         Letter {Letter | Digit | '_'};

Digit        =         "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;

Letter       =         "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" 
                     | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" 
                     | "u" | "v" | "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" 
                     | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" 
                     | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" 
                     | "Y" | "Z" ;


ALGOL W

Includes the test cases from the Go sample. Note, strings are limited to 256 characters in Algol W. <lang algolw>begin

   % verify expressions match expected syntax                                %
   procedure stmt ( string(256) value text ) ; begin
       % the parsing procedures return true if the expression matches the    %
       % required element, false otherwise - a parse tree is not built       %
       logical     haveInteger, haveIdentifier, haveEof, hadError;
       integer     currPos, maxPos, syPos;
       string(1)   currCh;
       string(256) srcText;
       string(256) sy;
       logical procedure expr ; expr_level_2;
       logical procedure expr_level_2 ; begin
           logical ok;
           ok := expr_level_3;
           while ok and have( "or" ) do ok := next and expr_level_3;
           ok
       end expr_level_2 ;
       logical procedure expr_level_3 ; begin
           logical ok;
           ok := expr_level_4;
           while have( "and" ) and ok do ok := next and expr_level_4;
           ok
       end expr_level_3 ;
       logical procedure expr_level_4 ; begin
           logical ok;
           ok := true;
           if have( "not" ) then ok := next;
           if ok then ok := expr_level_5;
           if ok and ( have( "=" ) or have( "<" ) ) then ok := next and expr_level_5;
           ok
       end expr_level_4 ;
       logical procedure expr_level_5 ; begin
           logical ok;
           ok := expr_level_6;
           while ok and ( have( "+" ) or have( "-" ) ) do ok := next and expr_level_6;
           ok
       end expr_level_5 ;
       logical procedure expr_level_6 ; begin
           logical ok;
           ok := primary;
           while ok and ( have( "*" ) or have( "/" ) ) do ok := next and primary;
           ok
       end expr_level_6 ;
       logical procedure primary ;
           if haveIdentifier or haveInteger or have( "true" ) or have( "false" ) then begin
               void( next );
               true
               end
           else if have( "(" ) then next and expr and mustBe( ")" )
           else error( "Expecting identifier, literal or ""(""" ) ;
       logical procedure addAndNextChar ; begin
           if syPos = 255 then void( error( "Symbol too long" ) )
           else if syPos < 255 then begin
               sy( syPos // 1 ) := currCh;
               syPos := syPos + 1
           end if_syPos_eq_255__lt_255 ;
           nextChar
       end addAndNextChar ;
       logical procedure next ; begin
           logical ok;
           haveInteger := haveIdentifier := false;
           ok          := skipSpaces;
           sy          := "";
           syPos       := 0;
           if not ok then sy := "<eof>"
           else begin
               if haveDigit then begin
                   haveInteger := true;
                   while addAndNextChar and haveDigit do begin end
                   end
               else if haveLetter then begin
                   while addAndNextChar and ( haveLetter or haveDigit or currCh = "_" ) do begin end;
                   haveIdentifier := sy not = "and" and sy not = "or" and sy not = "not" and sy not = "true" and sy not = "false"
                   end
               else void( addAndNextChar );
           end if_not_ok__ ;
           ok
       end next ;
       logical procedure skipSpaces ; begin
           logical ok;
           ok := not haveEof;
           while ok and currCh = " " do ok := nextChar;
           ok
       end skipSpaces ;
       logical procedure haveLetter ; not haveEof and (  ( currCh >= "a" and currCh <= "z" )
                                                      or ( currCh >= "A" and currCh <= "Z" ) );
       logical procedure haveDigit  ; not haveEof and (    currCh >= "0" and currCh <= "9" );
       logical procedure have   ( string(12) value text ) ; text = sy;
       logical procedure mustBe ( string(12) value text ) ; begin
           logical ok;
           ok := have( text );
           if ok then void( next )
           else begin
               string(256) msg;
               msg := "Expected:";
               msg( strlen( msg ) + 2 // 12 ) := text;
               void( error( msg ) )
           end if_ok;
           ok
       end mustBe ;
       logical procedure nextChar ; begin
           haveEof := currPos > maxPos;
           if not haveEof then begin
               currCh  := srcText( currPos // 1 ); 
               currPos := currPos + 1
           end if_not_haveEof ;
           not haveEof
       end nextChar ;
       integer procedure strlen ( string(256) value text ) ; begin
           integer length;
           length := 255;
           while length >= 0 and text( length // 1 ) = " " do length := length - 1;
           length
       end strlen ;
       logical procedure error ( string(256) value msg ) ; begin
           if not hadError then begin
               % have the first error %
               writeon( " error at: ", I_W := 1, currPos, "(" );
               showText( sy, strlen( sy ) );
               writeon( "): " );
               showText( msg, strlen( msg ) );
               hadError := true
           end if_not_hadError ;
           false
       end ewrror ;
       procedure showText ( string(256) value text; integer value length ) ; for c := 0 until length do writeon( text( c // 1 ) );
       procedure void ( logical value b ) ; begin end void ;
       % parse text and output messages indicating whether it is OK or not %
       hadError := false;
       sy       := "<bof>";
       srcText  := text;
       currPos  := 0;
       maxPos   := strlen( srcText );
       write();
       showText( srcText, maxPos );
       writeon( ": " );
       if not nextChar               then void( error( "Blank source text"             ) )
       else if not ( next and expr ) then void( error( "Expression expected"           ) )
       else if not haveEof           then void( error( "Expected EOF after expression" ) );
       if hadError then writeon( " false" )
       else             writeon( " true"  )
   end stmt ;
   % test cases %
   stmt( "wombat" );
   stmt( "wombat or monotreme" );
   stmt( "( wombat and not )" );
   stmt( "wombat or not" );
   stmt( "a + 1" );
   stmt( "a + b < c" );
   stmt( "a + b - c * d / e < f and not ( g = h )" );
   stmt( "a + b - c * d / e < f and not ( g = h" );
   stmt( "a = b" );
   stmt( "a or b = c" );
   stmt( "$" );
   % test cases from Go %
   stmt( "true or false = not true" );
   stmt( "not true = false" );
   stmt( "3 + not 5" );
   stmt( "3 + (not 5)" );
   stmt( "(42 + 3" );
   stmt( " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true" );
   stmt( " and 3 < 2" );
   stmt( "not 7 < 2" );
   stmt( "2 < 3 < 4" );
   stmt( "2 < foobar - 3 < 4" );
   stmt( "2 < foobar and 3 < 4" );
   stmt( "4 * (32 - 16) + 9 = 73" );
   stmt( "235 76 + 1" );
   stmt( "a + b = not c and false" );
   stmt( "a + b = (not c) and false" );
   stmt( "a + b = (not c and false)" );
   stmt( "ab_c / bd2 or < e_f7" );
   stmt( "g not = h" );
   stmt( "été = false" );
   stmt( "i++" );
   stmt( "j & k" );
   stmt( "l or _m" )

end.</lang>

Output:
wombat:  true
wombat or monotreme:  true
( wombat and not ):  error at: 18  ()): Expecting identifier, literal or "(" false
wombat or not:  error at: 13  (<eof>): Expression expected false
a + 1:  true
a + b < c:  true
a + b - c * d / e < f and not ( g = h ):  true
a + b - c * d / e < f and not ( g = h:  error at: 37  (<eof>): Expected: ) false
a = b:  true
a or b = c:  true
$:  error at: 1  ($): Expecting identifier, literal or "(" false
true or false = not true:  error at: 20  (not): Expecting identifier, literal or "(" false
not true = false:  true
3 + not 5:  error at: 8  (not): Expecting identifier, literal or "(" false
3 + (not 5):  true
(42 + 3:  error at: 7  (<eof>): Expected: ) false
 not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true:  true
 and 3 < 2:  error at: 5  (and): Expecting identifier, literal or "(" false
not 7 < 2:  true
2 < 3 < 4:  error at: 8  (<): Expected EOF after expression false
2 < foobar - 3 < 4:  error at: 17  (<): Expected EOF after expression false
2 < foobar and 3 < 4:  true
4 * (32 - 16) + 9 = 73:  true
235 76 + 1:  error at: 7  (76): Expected EOF after expression false
a + b = not c and false:  error at: 12  (not): Expecting identifier, literal or "(" false
a + b = (not c) and false:  true
a + b = (not c and false):  true
ab_c / bd2 or < e_f7:  error at: 16  (<): Expecting identifier, literal or "(" false
g not = h:  error at: 6  (not): Expected EOF after expression false
été = false:  error at: 2  (Ã): Expecting identifier, literal or "(" false
i++:  error at: 3  (+): Expecting identifier, literal or "(" false
j & k:  error at: 4  (&): Expected EOF after expression false
l or _m:  error at: 7  (_): Expecting identifier, literal or "(" false

C

<lang C>// cverifyingsyntaxrosetta.c // http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax

/*

  1. Makefile

CFLAGS = -O3 -Wall -Wfatal-errors all: cverifyingsyntaxrosetta

  • /
  1. include <stdio.h>
  2. include <string.h>
  3. include <ctype.h>
  4. include <setjmp.h>
  1. define AT(CHAR) ( *pos == CHAR && ++pos )
  2. define TEST(STR) ( strncmp( pos, STR, strlen(STR) ) == 0 \
 && ! isalnum(pos[strlen(STR)]) && pos[strlen(STR)] != '_' )
  1. define IS(STR) ( TEST(STR) && (pos += strlen(STR)) )

static char *pos; // current position in source static char *startpos; // start of source static jmp_buf jmpenv;

static int error(char *message)

 {
 printf("false  %s\n%*s^ %s\n", startpos, pos - startpos + 7, "", message);
 longjmp( jmpenv, 1 );
 }

static int expr(int level)

 {
 while( isspace(*pos) ) ++pos;                     // skip white space
 if( AT('(') )                                     // find a primary (operand)
   {
   if( expr(0) && ! AT(')') ) error("missing close paren");
   }
 else if( level <= 4 && IS("not") && expr(6) ) { }
 else if( TEST("or") || TEST("and") || TEST("not") )
   {
   error("expected a primary, found an operator");
   }
 else if( isdigit(*pos) ) pos += strspn( pos, "0123456789" );
 else if( isalpha(*pos) ) pos += strspn( pos, "0123456789_"
   "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" );
 else error("expected a primary");
 do                    // then look for zero or more valid following operators
   {
   while( isspace(*pos) ) ++pos;
   }
 while(
   level <= 2 && IS("or") ? expr(3) :
   level <= 3 && IS("and") ? expr(4) :
   level <= 4 && (AT('=') || AT('<')) ? expr(5) :
   level == 5 && (*pos == '=' || *pos == '<') ? error("non-associative") :
   level <= 6 && (AT('+') || AT('-')) ? expr(7) :
   level <= 7 && (AT('*') || AT('/')) ? expr(8) :
   0 );
 return 1;
 }

static void parse(char *source)

 {
 startpos = pos = source;
 if( setjmp(jmpenv) ) return; // for catching errors during recursion
 expr(0);
 if( *pos ) error("unexpected character following valid parse");
 printf(" true  %s\n", source);
 }

static char *tests[] = {

 "3 + not 5",
 "3 + (not 5)",
 "(42 + 3",
 "(42 + 3 some_other_syntax_error",
 "not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true",
 "and 3 < 2",
 "not 7 < 2",
 "2 < 3 < 4",
 "2 < foobar - 3 < 4",
 "2 < foobar and 3 < 4",
 "4 * (32 - 16) + 9 = 73",
 "235 76 + 1",
 "a + b = not c and false",
 "a + b = (not c) and false",
 "a + b = (not c and false)",
 "ab_c / bd2 or < e_f7",
 "g not = h",
 "i++",
 "j & k",
 "l or _m",
 "wombat",
 "WOMBAT or monotreme",
 "a + b - c * d / e < f and not ( g = h )",
 "$",
 };

int main(int argc, char *argv[])

 {
 for( int i = 0; i < sizeof(tests)/sizeof(*tests); i++ ) parse(tests[i]);
 }</lang>
Output:
false  3 + not 5
           ^ expected a primary, found an operator
 true  3 + (not 5)
false  (42 + 3
              ^ missing close paren
false  (42 + 3 some_other_syntax_error
               ^ missing close paren
 true  not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true
false  and 3 < 2
       ^ expected a primary, found an operator
 true  not 7 < 2
false  2 < 3 < 4
             ^ non-associative
false  2 < foobar - 3 < 4
                      ^ non-associative
 true  2 < foobar and 3 < 4
 true  4 * (32 - 16) + 9 = 73
false  235 76 + 1
           ^ unexpected character following valid parse
false  a + b = not c and false
               ^ expected a primary, found an operator
 true  a + b = (not c) and false
 true  a + b = (not c and false)
false  ab_c / bd2 or < e_f7
                     ^ expected a primary
false  g not = h
         ^ unexpected character following valid parse
false  i++
         ^ expected a primary
false  j & k
         ^ unexpected character following valid parse
false  l or _m
            ^ expected a primary
 true  wombat
 true  WOMBAT or monotreme
 true  a + b - c * d / e < f and not ( g = h )
false  $
       ^ expected a primary

Go

If "and", "or", "not" and "=" are replaced by the corresponding symbols: "&&", "||", "!" and "==", then the task grammar is a subset of Go's own grammar - operator precedence and identifier construction are the same.

After making the appropriate substitutions, we can therefore use the parser in Go's standard library to verify whether the statements are valid or not. As expressions cannot be statements in Go, we simply parse the latter as expressions.

However, before applying the parser, we first need to ensure that the expressions don't include any characters (including non-ASCII) or usages thereof which Go would otherwise permit. Note that it's not necessary to specifically check for "++" and "--" as these are statements in Go and can't appear in expressions anyway.

In particular, after substitutions, "= not", "+ not" etc. would be allowed by the Go parser so we need to exclude them. Curiously, the Go parser allows something like "2 < 3 < 4" even though it doesn't compile. We need therefore to exclude that also (see Talk page). <lang go>package main

import (

   "fmt"
   "go/parser"
   "regexp"
   "strings"

)

var (

   re1 = regexp.MustCompile(`[^_a-zA-Z0-9\+\-\*/=<\(\)\s]`)
   re2 = regexp.MustCompile(`\b_\w*\b`)
   re3 = regexp.MustCompile(`[=<+*/-]\s*not`)
   re4 = regexp.MustCompile(`(=|<)\s*[^(=< ]+\s*([=<+*/-])`)

)

var subs = [][2]string{

   {"=", "=="}, {" not ", " ! "}, {"(not ", "(! "}, {" or ", " || "}, {" and ", " && "},

}

func possiblyValid(expr string) error {

   matches := re1.FindStringSubmatch(expr)
   if matches != nil {
       return fmt.Errorf("invalid character %q found", []rune(matches[0])[0])
   }
   if re2.MatchString(expr) {
       return fmt.Errorf("identifier cannot begin with an underscore")
   }
   if re3.MatchString(expr) {
       return fmt.Errorf("expected operand, found 'not'")
   }
   matches = re4.FindStringSubmatch(expr)
   if matches != nil {
       return fmt.Errorf("operator %q is non-associative", []rune(matches[1])[0])
   }
   return nil

}

func modify(err error) string {

   e := err.Error()
   for _, sub := range subs {
       e = strings.ReplaceAll(e, strings.TrimSpace(sub[1]), strings.TrimSpace(sub[0]))
   }
   return strings.Split(e, ":")[2][1:] // remove location info as may be inaccurate

}

func main() {

   exprs := []string{
       "$",
       "one",
       "either or both",
       "a + 1",
       "a + b < c",
       "a = b",
       "a or b = c",
       "3 + not 5",
       "3 + (not 5)",
       "(42 + 3",
       "(42 + 3)",
       " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
       " and 3 < 2",
       "not 7 < 2",
       "2 < 3 < 4",
       "2 < (3 < 4)",
       "2 < foobar - 3 < 4",
       "2 < foobar and 3 < 4",
       "4 * (32 - 16) + 9 = 73",
       "235 76 + 1",
       "true or false = not true",
       "true or false = (not true)",
       "not true or false = false",
       "not true = false",
       "a + b = not c and false",
       "a + b = (not c) and false",
       "a + b = (not c and false)",
       "ab_c / bd2 or < e_f7",
       "g not = h",
       "été = false",
       "i++",
       "j & k",
       "l or _m",
   } 
  
   for _, expr := range exprs {
       fmt.Printf("Statement to verify: %q\n", expr)
       if err := possiblyValid(expr); err != nil {
           fmt.Printf("\"false\" -> %s\n\n", err.Error())
           continue
       }
       expr = fmt.Sprintf(" %s ", expr) // make sure there are spaces at both ends
       for _, sub := range subs {
           expr = strings.ReplaceAll(expr, sub[0], sub[1])
       }
       _, err := parser.ParseExpr(expr)
       if err != nil {
           fmt.Println(`"false" ->`, modify(err))
       } else {
           fmt.Println(`"true"`)
       }
       fmt.Println()
   }

}</lang>

Output:
Statement to verify: "$"
"false" -> invalid character '$' found

Statement to verify: "one"
"true"

Statement to verify: "either or both"
"true"

Statement to verify: "a + 1"
"true"

Statement to verify: "a + b < c"
"true"

Statement to verify: "a = b"
"true"

Statement to verify: "a or b = c"
"true"

Statement to verify: "3 + not 5"
"false" -> expected operand, found 'not'

Statement to verify: "3 + (not 5)"
"true"

Statement to verify: "(42 + 3"
"false" -> expected ')', found newline

Statement to verify: "(42 + 3)"
"true"

Statement to verify: " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true"
"true"

Statement to verify: " and 3 < 2"
"false" -> expected operand, found 'and'

Statement to verify: "not 7 < 2"
"true"

Statement to verify: "2 < 3 < 4"
"false" -> operator '<' is non-associative

Statement to verify: "2 < (3 < 4)"
"true"

Statement to verify: "2 < foobar - 3 < 4"
"false" -> operator '<' is non-associative

Statement to verify: "2 < foobar and 3 < 4"
"true"

Statement to verify: "4 * (32 - 16) + 9 = 73"
"true"

Statement to verify: "235 76 + 1"
"false" -> expected 'EOF', found 76

Statement to verify: "true or false = not true"
"false" -> expected operand, found 'not'

Statement to verify: "true or false = (not true)"
"true"

Statement to verify: "not true or false = false"
"true"

Statement to verify: "not true = false"
"true"

Statement to verify: "a + b = not c and false"
"false" -> expected operand, found 'not'

Statement to verify: "a + b = (not c) and false"
"true"

Statement to verify: "a + b = (not c and false)"
"true"

Statement to verify: "ab_c / bd2 or < e_f7"
"false" -> expected operand, found '<'

Statement to verify: "g not = h"
"false" -> expected 'EOF', found 'not'

Statement to verify: "été = false"
"false" -> invalid character 'é' found

Statement to verify: "i++"
"false" -> expected 'EOF', found '++'

Statement to verify: "j & k"
"false" -> invalid character '&' found

Statement to verify: "l or _m"
"false" -> identifier cannot begin with an underscore

Julia

<lang julia>function substituteinnerparentheses(s, subs)

   ((i = findlast('(', s)) == nothing) && return (s, false)
   ((j = findfirst(')', s[i:end])) == nothing) && return (s, false)
   okparse(s[i+1:j+i-2]) || return (s, false)
   return s[1:i-1] * " " * subs * " " * s[j+i:end], true

end

function okparse(s)

   while findfirst('(', s) != nothing
       s, okinparentheses = substituteinnerparentheses(s, "true")
       okinparentheses || return false
   end
   s = strip(s)
   # Julia allows expressions like 2 + + + 3, or like true = not false, but these are not allowed here
   # = or < can be used only once within parentheses
   if occursin(r"(and|or|[\=\<\+\-\*\/])\s*(and|or|[\=\<\+\-\*\/])", s) ||
       occursin(r"(^(and|^or|^[\=\<\+\-\*\/]))|((and|or|[\=\<\+\-\*\/])$)", s) ||
       occursin(r"(\=|\<)\s*not\s", s) || count(c -> c == '=' || c == '<', s) > 1
       return false
   end
   # Julia allows ., ,, ; and operators like % but these are not allowed here
   # permitted: -+*/ true false and or not, ascii identifiers, and integers
   for item in split(s, r"\s+")
       !occursin(
           r"^[a-zA-Z][a-zA-Z_0-9]*$|^\d+$|^true$|^false$|^or$|^and$|^not$|^\=$|^\<$|^\+$|^-$|^\*$|^\/$",
           item) && return false
   end
   # change and, or, and not to the corresponding Julia operators
   s = replace(replace(replace(s, "and" => "&&"), "or" => "||"), "not" => "!")
   try 
       # Use Julia's parser, which will throw exception if it parses an error
       Meta.parse(s)    
   catch
       return false
   end
   return true

end

teststatements = [ " not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true", " and 3 < 2", "not 7 < 2", "4 * (32 - 16) + 9 = 73", "235 76 + 1", "true or false = not true", "not true = false", "2 < 5 < 9" ]

for s in teststatements

   println("The compiler parses the statement { $s } and outputs: ", okparse(s))

end

</lang>

Output:
The compiler parses the statement {  not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true } and outputs: true
The compiler parses the statement {  and 3 < 2 } and outputs: false
The compiler parses the statement { not 7 < 2 } and outputs: true
The compiler parses the statement { 4 * (32 - 16) + 9 = 73 } and outputs: true
The compiler parses the statement { 235 76 + 1 } and outputs: false
The compiler parses the statement { true or false = not true } and outputs: false
The compiler parses the statement { not true = false } and outputs: true
The compiler parses the statement { 2 < 5 < 9 } and outputs: false

Perl

Made fix for 'not' see Discussion. Added 'not' and non-assoc fixes. Cooler output. <lang perl>#!/usr/bin/perl

use strict; # http://www.rosettacode.org/wiki/Compiler/_Verifying_Syntax use warnings;

sub error

 {
 my $pos = pos($_) // 0;
 die $_, ' ' x ($pos + 7), "^ $_[0]\n";
 }

sub want { /\G\Q$_[0]/gc or error $_[1] }

sub expr

 {
 my $level = shift;
 /\G\h+/gc;
 /\G\(/gc && want ')', "expected a closing paren", expr(0) or
   $level <= 4 && /\Gnot\b/gc && expr(5) or
   /\G (?!(?:and|or|not)\b) (?:\d+|[a-zA-Z]\w*)/gcx or
   error "expected a primary";
 /\G\h+/gc ? 1 :
   $level <= 2   && /\Gor\b/gc     ? expr(3) :
   $level <= 3   && /\Gand\b/gc    ? expr(4) :
   $level <= 4   && /\G[=<]/gc     ? expr(4.5) :
   $level == 4.5 && /\G(?=[=<])/gc ? error "non-associative operator" :
   $level <= 5   && /\G[+-]/gc     ? expr(6) :
   $level <= 6   && /\G[*\/]/gc    ? expr(7) :
   return 1 while 1;
 }

while( )

 {
 print eval { want "\n", "expected end of input", expr(0) } ?
   " true  $_" : "false  $@";
 }

__DATA__ 3 + not 5 3 + (not 5) (42 + 3 (42 + 3 syntax_error not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true and 3 < 2 not 7 < 2 2 < 3 < 4 2 < foobar - 3 < 4 2 < foobar and 3 < 4 4 * (32 - 16) + 9 = 73 235 76 + 1 a + b = not c and false a + b = (not c) and false a + b = (not c and false) ab_c / bd2 or < e_f7 g not = h i++ j & k l or _m UPPER_cAsE_aNd_letter_and_12345_test</lang>

Output:
false  3 + not 5
           ^ expected a primary
 true  3 + (not 5)
false  (42 + 3
              ^ expected a closing paren
false  (42 + 3 syntax_error
               ^ expected a closing paren
 true  not 3 < 4 or (true or 3/4+8*5-5*2 < 56) and 4*3 < 12 or not true
false  and 3 < 2
       ^ expected a primary
 true  not 7 < 2
false  2 < 3 < 4
             ^ non-associative operator
false  2 < foobar - 3 < 4
                      ^ non-associative operator
 true  2 < foobar and 3 < 4
 true  4 * (32 - 16) + 9 = 73
false  235 76 + 1
           ^ expected end of input
false  a + b = not c and false
               ^ expected a primary
 true  a + b = (not c) and false
 true  a + b = (not c and false)
false  ab_c / bd2 or < e_f7
                     ^ expected a primary
false  g not = h
         ^ expected end of input
false  i++
         ^ expected a primary
false  j & k
         ^ expected end of input
false  l or _m
            ^ expected a primary
 true  UPPER_cAsE_aNd_letter_and_12345_test

Phix

<lang Phix>-- demo\rosetta\Compiler\Verify_Syntax.exw string src integer ch, sdx

procedure skip_spaces()

   while 1 do
       if sdx>length(src) then exit end if
       ch = src[sdx]
       if not find(ch," \t\r\n") then exit end if
       sdx += 1
   end while

end procedure

enum SYMBOL, INTEGER, IDENT, ERROR, EOF constant toktypes = {"SYMBOL","INTEGER","IDENT","ERROR","EOF"} sequence tok

function sprintok(string fmt)

   tok[1] = toktypes[tok[1]]
   return sprintf(fmt,{tok})

end function

procedure next_token() -- yeilds one of: -- {SYMBOL,ch} where ch is one of "()+-/*=&<", or -- {INTEGER,n}, or -- {IDENT,string}, or -- {ERROR,msg}, or -- {EOF}

   skip_spaces()
   integer tokstart = sdx
   if tok[1]=ERROR then
       ?{"erm, tok is",tok} -- looping??
   elsif sdx>length(src) then
       tok = {EOF}
   elsif find(ch,"()+-/*=&<") then
       sdx += 1
       tok = {SYMBOL,ch&""}
   elsif (ch>='0' and ch<='9') then
       integer n = ch-'0'
       while true do
           sdx += 1
           if sdx>length(src) then exit end if
           ch = src[sdx]
           if ch<'0' or ch>'9' then exit end if
           n = n*10 + ch-'0'
       end while
       tok = {INTEGER,n}       
   elsif (ch>='a' and ch<='z')
      or (ch>='A' and ch<='Z') then
       while true do
           sdx += 1
           if sdx>length(src) then exit end if
           ch = src[sdx]
           if ch!='_' 
           and (ch<'a' or ch>'z')
           and (ch<'A' or ch>'Z')
           and (ch<'0' or ch>'9') then
               exit
           end if
       end while
       tok = {IDENT,src[tokstart..sdx-1]}
   elsif ch='_' then
       tok = {ERROR,"identifiers may not start with _"}
       sdx += 1
   else
       tok = {ERROR,sprintf("illegal char (%c/%d)",ch)}
       sdx += 1
   end if

end procedure

forward procedure or_expr()

procedure primary()

   integer tt = tok[1]
   if tt=IDENT
   or tt=INTEGER then
       next_token()
   elsif tok={SYMBOL,"("} then
       next_token()
       or_expr()
       if tok!={SYMBOL,")"} then
           tok = {ERROR,") expected"}
       else
           next_token()
       end if
   else
       tok = {ERROR,sprintok("invalid [%v]")}
   end if

end procedure

procedure mul_expr()

   while true do
       primary()
       if not find(tok,{{SYMBOL,"*"},{SYMBOL,"/"}}) then exit end if
       next_token()
   end while

end procedure

procedure sum_expr()

   while true do
       mul_expr()
       if not find(tok,{{SYMBOL,"+"},{SYMBOL,"-"}}) then exit end if
       next_token()
   end while

end procedure

procedure cmp_expr()

   if tok=={IDENT,"not"} then next_token() end if
   sum_expr()
   if find(tok,{{SYMBOL,"="},{SYMBOL,"<"}}) then
       next_token()
       sum_expr()
   end if

end procedure

procedure and_expr()

   while true do
       cmp_expr()
       if tok!={IDENT,"and"} then exit end if
       next_token()
   end while

end procedure

procedure or_expr()

   while true do
       and_expr()
       if tok!={IDENT,"or"} then exit end if
       next_token()
   end while

end procedure

procedure statement()

   or_expr()

end procedure

procedure verify_syntax(string source)

   src = source
   sdx = 1
   tok = {0} -- ("not error"/invalid-ish)
   next_token()
   statement()
   printf(1,"%30s  ==>  %s\n",{source,iff(tok[1]=EOF?"true":sprintok("false [tok=%v]"))})

end procedure

constant tests = {

       "$",
       "one",
       "either or both",
       "a + 1",
       "a + b < c",
       "a = b",
       "a or b = c",
       "3 + not 5",
       "3 + (not 5)",
       "(42 + 3",
       "(42 + 3)",
       " not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true",
       " and 3 < 2",
       "not 7 < 2",
       "2 < 3 < 4",
       "2 < (3 < 4)",
       "2 < foobar - 3 < 4",
       "2 < foobar and 3 < 4",
       "4 * (32 - 16) + 9 = 73",
       "235 76 + 1",
       "true or false = not true",
       "true or false = (not true)",
       "not true or false = false",
       "not true = false",
       "a + b = not c and false",
       "a + b = (not c) and false",
       "a + b = (not c and false)",
       "ab_c / bd2 or < e_f7",
       "g not = h",
       "i++",
       "j & k",
       "l or _m"}

printf(1,"Verify Syntax:\n") for i=1 to length(tests) do

   verify_syntax(tests[i])

end for</lang>

Output:

Note that "= not c" fails, whereas "= (not c)" passes, see talk page. (Arguably the task definition should be fixed.)

Verify Syntax:
                             $  ==>  false [tok={"ERROR",`invalid [{"ERROR","illegal char ($/36)"}]`}]
                           one  ==>  true
                either or both  ==>  true
                         a + 1  ==>  true
                     a + b < c  ==>  true
                         a = b  ==>  true
                    a or b = c  ==>  true
                     3 + not 5  ==>  false [tok={"INTEGER",5}]
                   3 + (not 5)  ==>  true
                       (42 + 3  ==>  false [tok={"ERROR",") expected"}]
                      (42 + 3)  ==>  true
 not 3 < 4 or (true or 3 /  4 + 8 *  5 - 5 * 2 < 56) and 4 * 3  < 12 or not true  ==>  true
                     and 3 < 2  ==>  false [tok={"INTEGER",3}]
                     not 7 < 2  ==>  true
                     2 < 3 < 4  ==>  false [tok={"SYMBOL","<"}]
                   2 < (3 < 4)  ==>  true
            2 < foobar - 3 < 4  ==>  false [tok={"SYMBOL","<"}]
          2 < foobar and 3 < 4  ==>  true
        4 * (32 - 16) + 9 = 73  ==>  true
                    235 76 + 1  ==>  false [tok={"INTEGER",76}]
      true or false = not true  ==>  false [tok={"IDENT","true"}]
    true or false = (not true)  ==>  true
     not true or false = false  ==>  true
              not true = false  ==>  true
       a + b = not c and false  ==>  false [tok={"IDENT","c"}]
     a + b = (not c) and false  ==>  true
     a + b = (not c and false)  ==>  true
          ab_c / bd2 or < e_f7  ==>  false [tok={"ERROR",`invalid [{"SYMBOL","<"}]`}]
                     g not = h  ==>  false [tok={"IDENT","not"}]
                           i++  ==>  false [tok={"ERROR",`invalid [{"SYMBOL","+"}]`}]
                         j & k  ==>  false [tok={"SYMBOL","&"}]
                       l or _m  ==>  false [tok={"ERROR",`invalid [{"ERROR","identifiers may not start with _"}]`}]