Parse EBNF: Difference between revisions

Added Algol 68
m (→‎{{header|Phix}}: syntax coloured)
(Added Algol 68)
 
(6 intermediate revisions by 4 users not shown)
Line 11:
See [[Parse EBNF/Tests|the tests]].
<br><br>
 
=={{header|ALGOL 68}}==
 
The source of the Algol 68 sample is somewhat largish, so is on a separate page on Rosetta Code here: [[Parse EBNF/ALGOL 68]].
{{out}}
<pre>
Valid EBNF: a/z: 'a' { a = 'a1' ( 'a2' | 'a3' ) { 'a4' } [ 'a5' ] 'a6' ; } 'z'
a: seq
literal "a1"
oneof
seq
literal "a2"
seq
literal "a3"
list
literal "a4"
opt
literal "a5"
literal "a6"
 
 
"a1a3a4a4a5a6" is valid according to a
 
"a1 a2a6" is valid according to a
 
"a1 a3 a4 a6" is valid according to a
 
**** Syntax error near "a1 a4 a5 a6"
"a1 a4 a5 a6" is not valid according to a
 
**** Syntax error near "a5 a5 a6"
"a1 a2 a4 a5 a5 a6" is not valid according to a
 
**** Unexpected text: "a7" at the end of source
"a1 a2 a4 a5 a6 a7" is not valid according to a
 
**** Syntax error near "your ad here"
"your ad here" is not valid according to a
 
 
Valid EBNF: Arithmetic expressions:
'Arithmetic expressions'
{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
plus = '+' | '-' .
times = '*' | '/' .
number = digit { digit } .
digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
}
expr: seq
rule term
list
rule plus
rule term
term: seq
rule factor
list
rule times
rule factor
factor: oneof
seq
rule number
seq
literal "("
rule expr
literal ")"
plus: oneof
seq
literal "+"
seq
literal "-"
times: oneof
seq
literal "*"
seq
literal "/"
number: seq
rule digit
list
rule digit
digit: oneof
seq
literal "0"
seq
literal "1"
seq
literal "2"
seq
literal "3"
seq
literal "4"
seq
literal "5"
seq
literal "6"
seq
literal "7"
seq
literal "8"
seq
literal "9"
 
 
"2" is valid according to Arithmetic expressions
 
"2*3 + 4/23 - 7" is valid according to Arithmetic expressions
 
"(3 + 4) * 6-2+(4*(4))" is valid according to Arithmetic expressions
 
**** Syntax error near "-2"
"-2" is not valid according to Arithmetic expressions
 
**** Unexpected text: "+" at the end of source
"3 +" is not valid according to Arithmetic expressions
 
**** Syntax error near " 3"
"(4 + 3" is not valid according to Arithmetic expressions
 
 
**** Expected "{", not "a"
Invalid EBNF: a = '1';
 
**** Expected "}", not "(eof)"
Invalid EBNF: { a = '1' ;
 
**** Expected "=", not "world"
Invalid EBNF: { hello world = '1'; }
 
**** Rule "bar" not defined
Invalid EBNF: { foo = bar . }
</pre>
 
=={{header|Go}}==
Line 16 ⟶ 149:
<br>
A more or less faithful translation except that indices are 0-based rather than 1-based and so 1 less than in the Phix results.
<langsyntaxhighlight lang="go">package main
 
import (
Line 420 ⟶ 553:
fmt.Println()
}
}</langsyntaxhighlight>
 
{{out}}
Line 503 ⟶ 636:
We use Parsec to generate Parsec.
 
<langsyntaxhighlight lang="haskell">import Control.Applicative
import Control.Monad
import Data.Maybe
Line 675 ⟶ 808:
lc c = char c <* ws
 
ws = many $ oneOf " \n\t"</langsyntaxhighlight>
 
=={{header|Julia}}==
 
Creates a Grammar struct from an EBNF grammar, which has a matching Regex for each production of the EBNF grammar.
 
Tested with Julia v1.7.2
 
<syntaxhighlight lang="julia">
struct Grammar
regex::Regex
rules::Dict{Symbol,Regex}
root::Regex
function Grammar(regex::Regex)
names = keys(match(regex, ""))
new(regex,
Dict{Symbol,Regex}(
Symbol(name) => Regex(
"$(regex.pattern)(?&$(name))",
regex.compile_options,
regex.match_options
)
for name in names if name isa String),
Regex(
"$(regex.pattern)^\\s*(?&$(first(names)))\\s*\$",
regex.compile_options,
regex.match_options))
end
end
 
Base.getindex(grammar::Grammar, key) = grammar.rules[key]
Base.occursin(grammar::Grammar, str::AbstractString) = occursin(grammar.root, str)
Base.show(io::IO, grammar::Grammar) = print(io, "Grammar(", grammar.regex, ")")
 
const ebnfgrammar = Grammar(r"""
(?(DEFINE) # EBNF syntax
(?<syntax>
(?&title)? \s*+ { \s*+ ( (?&production) \s*+ )* } \s*+ (?&comment)? )
(?<production>
(?&identifier) \s*+ = \s*+ (?&expression) \s*+ [.;] )
(?<expression>
(?&term) ( \s*+ \| \s*+ (?&term) )* )
(?<term>
(?&factor) ( \s*+ (?&factor) )* )
(?<factor>
(?&identifier)
| (?&literal)
| \[ \s*+ (?&expression) \s*+ \]
| \( \s*+ (?&expression) \s*+ \)
| { \s*+ (?&expression) \s*+ } )
(?<identifier>
[^'"\s=|(){}[\].;]+ )
(?<title>
(?&literal) )
(?<comment>
(?&literal) )
(?<literal>
' [^']+ '
| " [^"]+ " )
)"""x)
 
function syntax(grammar::Grammar, str::AbstractString)
if occursin(grammar[:syntax], str)
"""(?(DEFINE)$(join(production(grammar, eachmatch(grammar[:production], str)))))"""
else
throw(ErrorException("Invalid EBNF syntax."))
end
end
 
production(grammar::Grammar, iter) = (production(grammar, m.match) for m in iter)
function production(grammar::Grammar, str::AbstractString)
rstrip(c -> isspace(c) || c == '.' || c == ';', str)
id, expr = split(str, r"\s*=\s*", limit=2)
"(?<$(id)>" * join(
expression(grammar, eachmatch(grammar[:expression], expr)),
"\\s*+|\\s*+") * "\\s*+)"
end
 
expression(grammar::Grammar, iter) = (expression(grammar, m.match) for m in iter)
function expression(grammar::Grammar, str::AbstractString)
join(term(grammar, eachmatch(grammar[:term], str)), "\\s*+|\\s*+")
end
 
term(grammar::Grammar, iter) = (term(grammar, m.match) for m in iter)
function term(grammar::Grammar, str::AbstractString)
join(factor(grammar, eachmatch(grammar[:factor], str)), "\\s*+")
end
 
factor(grammar::Grammar, iter) = (factor(grammar, m.match) for m in iter)
function factor(grammar::Grammar, str::AbstractString)
str = strip(str)
if startswith(str, r"['\"]")
literal(grammar, str)
elseif startswith(str, r"[[({]")
"(\\s*+" * expression(grammar, str[begin+1:end-1]) * (
if startswith(str, '[')
"\\s*+)?"
elseif startswith(str, '{')
"\\s*+)*"
else
"\\s*+)"
end
)
else
"(?&$(str))"
end
end
 
literal(::Grammar, str::AbstractString) = "\\Q$(str[begin+1:end-1])\\E"
 
grammar(ebnf::AbstractString) = Grammar(Regex(syntax(ebnfgrammar, ebnf)))
 
using Test
 
oneliner = grammar("""
"a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"
""")
 
@testset "One liner" begin
@test occursin(oneliner, "a1a3a4a4a5a6")
@test occursin(oneliner, "a1 a2a6")
@test occursin(oneliner, "a1 a3 a4 a6")
@test !occursin(oneliner, "a1 a4 a5 a6")
@test !occursin(oneliner, "a1 a2 a4 a5 a5 a6")
@test !occursin(oneliner, "a1 a2 a4 a5 a6 a7")
@test !occursin(oneliner, "your ad here")
end
 
arithmetic = grammar("""
{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}
""")
 
@testset "Arithmetic expressions" begin
@test occursin(arithmetic, "2")
@test occursin(arithmetic, "2*3 + 4/23 - 7")
@test occursin(arithmetic, "(3 + 4) * 6-2+(4*(4))")
@test !occursin(arithmetic, "-2")
@test !occursin(arithmetic, "3 +")
@test !occursin(arithmetic, "(4 + 3")
end
 
@testset "Invalid EBNF" begin
@test_throws ErrorException grammar("""a = "1";""")
@test_throws ErrorException grammar("""{ a = "1";""")
@test_throws ErrorException grammar("""{ hello world = "1"; }""")
@test_throws ErrorException grammar("{ foo = bar . }")
end
 
</syntaxhighlight>
 
 
=={{header|Modula-2}}==
<langsyntaxhighlight lang="modula2">MODULE EBNF;
 
FROM ASCII IMPORT EOL;
Line 766 ⟶ 1,060:
Tabulate (T0);
Tabulate (T1);
END EBNF.</langsyntaxhighlight>
And the source for the EBNF scanner. I hope you like nested procedures.
<langsyntaxhighlight lang="modula2">IMPLEMENTATION MODULE EBNFScanner;
 
FROM ASCII IMPORT LF;
Line 935 ⟶ 1,229:
Ino := 0;
ch := ' '
END EBNFScanner.</langsyntaxhighlight>
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">#!/usr/bin/perl
 
use strict; # http://www.rosettacode.org/wiki/Parse_EBNF
Line 1,112 ⟶ 1,406:
----------------------------------------------------------------------
{ foo = bar . } "undefined production check"
----------------------------------------------------------------------</langsyntaxhighlight>
{{out}}
<pre>
Line 1,226 ⟶ 1,520:
 
=={{header|Phix}}==
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">src</span>
Line 1,543 ⟶ 1,837:
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</langsyntaxhighlight>-->
In real use, I would be tempted to use numeric literals rather than string tags in such structures, but the latter certainly make things ten times easier to debug, plus I got an instantly legible syntax tree dump (the bit just after "===>" below) practically for free.
{{out}}
Line 1,610 ⟶ 1,904:
 
=={{header|PicoLisp}}==
<langsyntaxhighlight PicoLisplang="picolisp">(de EBNF
"expr : term ( ( PLUS | MINUS ) term )* ;"
"term : factor ( ( MULT | DIV ) factor )* ;"
Line 1,619 ⟶ 1,913:
(unless (and (match '(@S : @E ;) (str E)) (not (cdr @S)))
(quit "Invalid EBNF" E) )
(put (car @S) 'ebnf @E) ) )</langsyntaxhighlight>
<langsyntaxhighlight PicoLisplang="picolisp">(de matchEbnf (Pat)
(cond
((asoq Pat '((PLUS . +) (MINUS . -) (MULT . *) (DIV . /)))
Line 1,662 ⟶ 1,956:
(let *Lst (str Str "")
(catch NIL
(parseLst (get 'expr 'ebnf)) ) ) )</langsyntaxhighlight>
Output:
<pre>: (parseEbnf "1 + 2 * -3 / 7 - 3 * 4")
Line 1,674 ⟶ 1,968:
It is implemented and exercised using the flavor of EBNF and test cases specified on the [[Parse EBNF/Tests|test page]].
 
<syntaxhighlight lang="raku" perl6line># A Raku grammar to parse EBNF
grammar EBNF {
rule TOP { ^ <title>? '{' [ <production> ]+ '}' <comment>? $ }
rule production { <name> '=' <expression> <[.;]> }
rule expression { <term> +% "|" }
rule term { <factor>+ }
rule factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
rule group { '(' <expression> ')' }
rule repeat { '{' <expression> '}' }
rule optional { '[' <expression> ']' }
token identifier { <-[\|\(\)\{\}\[\]\.\;\"\'\s]>+ } #"
token literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #"
token title { <literal> }
token comment { <literal> }
token name { <identifier> <?before \h* '='> }
}
 
Line 1,699 ⟶ 1,993:
">]+\$\}\n " ~ $<production>>>.ast ~ "\}"
}
method production($/) {
make 'token ' ~ $<name> ~ ' {' ~
$<expression>.ast ~ "}\n"
}
method expression($/) { make join '|', $<term>>>.ast }
method term($/) { make join '\h*', $<factor>>>.ast }
method factor($/) {
make $<literal> ?? $<literal> !!
$<group> ?? '[' ~ $<group>.ast ~ ']' !!
Line 1,712 ⟶ 2,006:
'<' ~ $<identifier> ~ '>'
}
method repeat($/) { make $<expression>.ast }
method optional($/) { make $<expression>.ast }
method group($/) { make $<expression>.ast }
}
 
Line 1,742 ⟶ 2,036:
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
Line 1,810 ⟶ 2,104:
say '*' x 79, "\n";
unlink $fn;
}</langsyntaxhighlight>
 
Output:
Line 2,081 ⟶ 2,375:
{{in progress|lang=Ruby|day=12|month=May|year=2011}}
{{incomplete|Ruby|The tokenizer is here, but the parser is very incomplete.}}
<langsyntaxhighlight lang="ruby">#--
# The tokenizer splits the input into Tokens like "identifier",
# ":", ")*" and so on. This design uses a StringScanner on each line of
Line 2,219 ⟶ 2,513:
parse
end
</syntaxhighlight>
</lang>
 
=={{header|Tcl}}==
Line 2,225 ⟶ 2,519:
 
Demonstration lexer and parser. Note that this parser supports parenthesized expressions, making the grammar recursive.
<langsyntaxhighlight lang="tcl">package require Tcl 8.6
 
# Utilities to make the coroutine easier to use
Line 2,359 ⟶ 2,653:
}
throw SYNTAX "\"$payload\" at position $index"
}</langsyntaxhighlight>
 
<langsyntaxhighlight lang="tcl"># Demonstration code
puts [parse "1 - 2 - -3 * 4 + 5"]
puts [parse "1 - 2 - -3 * (4 + 5)"]</langsyntaxhighlight>
Output:
<pre>
Line 2,373 ⟶ 2,667:
{{trans|Phix}}
{{libheader|Wren-fmt}}
{{libheader|Wren-traititerate}}
{{libheader|Wren-str}}
{{libheader|Wren-seq}}
Translated via the Go entry.
<langsyntaxhighlight ecmascriptlang="wren">import "./fmt" for Conv, Fmt
import "./traititerate" for Stepped
import "./str" for Char
import "./seq" for Lst
 
var src = ""
Line 2,744 ⟶ 3,038:
System.print()
i = i + 1
}</langsyntaxhighlight>
 
{{out}}
3,028

edits