Compiler/Verifying syntax: Difference between revisions
(→{{header|Go}}: Amended regular expression to disallow single character identifier '_') |
m (→{{header|Go}}: Corrected typo.) |
||
Line 76: | Line 76: | ||
} |
} |
||
if re2.MatchString(expr) { |
if re2.MatchString(expr) { |
||
return fmt.Errorf("identifier |
return fmt.Errorf("identifier cannot begin with an underscore") |
||
} |
} |
||
return nil |
return nil |
||
Line 161: | Line 161: | ||
Statement to verify: "l or _m" |
Statement to verify: "l or _m" |
||
"false" -> identifier |
"false" -> identifier cannot begin with an underscore |
||
</pre> |
</pre> |
||
Revision as of 10:54, 23 December 2019
Verifying Syntax
A Syntax Analyzer that verifies a token stream,
outputs a string "true" if the token stream matches the grammar requirement,
outputs a string "false" if the token stream does not match the grammar.
Task
The program reads input from a file of token stream,
reads it and outputs a string "true" if the token stream matches the grammar,
outputs a string "false" and error messages if the token stream does not match the grammar,
based on the grammar below. The grammar is written in Extended Backus-Naur Form (EBNF).
Grammar
stmt = expr ; expr = expr_level_2; expr_level_2 = expr_level_3 {"or" expr_level_3} ; expr_level_3 = expr_level_4 {"and" expr_level_4} ; expr_level_4 = ["not"] expr_level_5 [('=' | '<') expr_level_5] ; expr_level_5 = expr_level_6 {('+' | '-') expr_level_6} ; expr_level_6 = primary {('*' | '/') primary} ; primary = Identifier | Integer | '(' expr ')' | "true" | "false" ; Integer = Digit {Digit}; Identifier = Letter {Letter | Digit | '_'}; Digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; Letter = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ;
Go
If "and", "or", "not" and "=" are replaced by the corresponding symbols: "&&", "||", "!" and "==", then the task grammar is a subset of Go's own grammar - operator precedence and identifier construction are the same.
After making the appropriate substitutions, we can therefore use the parser in Go's standard library to verify whether the statements are valid or not. As expressions cannot be statements in Go, we simply parse the latter as expressions.
However, before applying the parser, we first need to ensure that the expressions don't include any characters (including non-ASCII) or usages thereof which Go would otherwise permit. Note that it's not necessary to specifically check for '++' and '--' as these are statements in Go and can't appear in expressions anyway. <lang go>package main
import (
"fmt" "go/parser" "regexp" "strings"
)
var (
re1 = regexp.MustCompile(`[^_a-zA-Z0-9\+\-\*/=<\(\) ]`) re2 = regexp.MustCompile(`\b_\w*\b`)
)
var subs = [][2]string{
{"=", "=="}, {" not ==", " !="}, {" not <", " >="}, {" not ", " ! "}, {" or ", " || "}, {" and ", " && "},
}
func possiblyValid(expr string) error {
matches := re1.FindStringSubmatch(expr) if matches != nil { return fmt.Errorf("invalid character %q found", []rune(matches[0])[0]) } if re2.MatchString(expr) { return fmt.Errorf("identifier cannot begin with an underscore") } return nil
}
func modify(err error) string {
e := err.Error() for _, sub := range subs { e = strings.ReplaceAll(e, strings.TrimSpace(sub[1]), strings.TrimSpace(sub[0])) } return strings.Split(e, ":")[2][1:] // remove location info as may be inaccurate
}
func main() {
exprs := []string{ " not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true", " and 3 < 2", "not 7 < 2", "4 * (32 - 16) + 9 = 73", "235 76 + 1", "a + b = not c and false", "ab_c / bd2 or < e_f7", "g not = h", "été = false", "i++", "j & k", "l or _m", }
for _, expr := range exprs { fmt.Printf("Statement to verify: %q\n", expr) if err := possiblyValid(expr); err != nil { fmt.Printf("\"false\" -> %s\n\n", err.Error()) continue } expr = fmt.Sprintf(" %s ", expr) // make sure there are spaces at both ends for _, sub := range subs { expr = strings.ReplaceAll(expr, sub[0], sub[1]) } _, err := parser.ParseExpr(expr) if err != nil { fmt.Println(`"false" ->`, modify(err)) } else { fmt.Println(`"true"`) } fmt.Println() }
}</lang>
- Output:
Statement to verify: " not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true" "true" Statement to verify: " and 3 < 2" "false" -> expected operand, found 'and' Statement to verify: "not 7 < 2" "true" Statement to verify: "4 * (32 - 16) + 9 = 73" "true" Statement to verify: "235 76 + 1" "false" -> expected 'EOF', found 76 Statement to verify: "a + b = not c and false" "true" Statement to verify: "ab_c / bd2 or < e_f7" "false" -> expected operand, found '<' Statement to verify: "g not = h" "true" Statement to verify: "été = false" "false" -> invalid character 'é' found Statement to verify: "i++" "false" -> expected 'EOF', found '++' Statement to verify: "j & k" "false" -> invalid character '&' found Statement to verify: "l or _m" "false" -> identifier cannot begin with an underscore
Julia
<lang julia>function substituteinnerparentheses(s, subs)
((i = findlast('(', s)) == nothing) && return (s, false) ((j = findfirst(')', s[i:end])) == nothing) && return (s, false) okparse(s[i+1:j+i-2]) || return (s, false) return s[1:i-1] * " " * subs * " " * s[j+i:end], true
end
function okparse(s)
while findfirst('(', s) != nothing s, okinparentheses = substituteinnerparentheses(s, "true") okinparentheses || return false end s = strip(s) # Julia allows expressions like 2 + + + 3 but these are not allowed here if occursin(r"(and|or|[\=\<\+\-\*\/])\s*(and|or|[\=\<\+\-\*\/])", s) || occursin(r"(^(and|^or|^[\=\<\+\-\*\/]))|((and|or|[\=\<\+\-\*\/])$)", s) return false end # Julia allows ., ,, ; and operators like % but these are not allowed here # permitted: -+*/ true false and or not, ascii identifiers, and integers for item in split(s, r"\s+") !occursin( r"^[a-zA-Z][a-zA-Z_0-9]*$|^\d+$|^true$|^false$|^or$|^and$|^not$|^\=$|^\<$|^\+$|^-$|^\*$|^\/$", item) && return false end # change and, or, and not to the corresponding Julia operators s = replace(replace(replace(s, "and" => "&&"), "or" => "||"), "not" => "!") try # Use Julia's parser, which will throw exception if it parses an error Meta.parse(s) catch return false end return true
end
teststatements = [ " not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true", " and 3 < 2", "not 7 < 2", "4 * (32 - 16) + 9 = 73", "235 76 + 1" ]
for s in teststatements
println("The compiler parses the statement { $s } and outputs: ", okparse(s))
end
</lang>
- Output:
The compiler parses the statement { not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true } and outputs: true The compiler parses the statement { and 3 < 2 } and outputs: false The compiler parses the statement { not 7 < 2 } and outputs: true The compiler parses the statement { 4 * (32 - 16) + 9 = 73 } and outputs: true The compiler parses the statement { 235 76 + 1 } and outputs: false