Parse EBNF: Difference between revisions

From Rosetta Code
Content added Content deleted
m (moved EBNF Parser to EBNF parser: capitalization policy)
(→‎Tcl: Added implementation)
Line 10: Line 10:


factor : NUMBER ;</pre>
factor : NUMBER ;</pre>

=={{header|Tcl}}==
Demonstration lexer and parser:
<lang tcl>package require Tcl 8.6

# Utilities to make the coroutine easier to use
proc provide args {while {![yield $args]} {yield}}
proc next lexer {$lexer 1}
proc pushback lexer {$lexer 0}

# Lexical analyzer coroutine core
proc lexer {str} {
yield [info coroutine]
set symbols {+ PLUS - MINUS * MULT / DIV ( LPAR ) RPAR}
set idx 0
while 1 {
switch -regexp -matchvar m -- $str {
{^\s+} {
# No special action for whitespace
}
{^([-+*/()])} {
provide [dict get $symbols [lindex $m 1]] [lindex $m 1] $idx
}
{^(\d+)} {
provide NUMBER [lindex $m 1] $idx
}
{^$} {
provide EOT "EOT" $idx
return
}
. {
provide PARSE_ERROR [lindex $m 0] $idx
}
}
# Trim the matched string
set str [string range $str [string length [lindex $m 0]] end]
incr idx [string length [lindex $m 0]]
}
}

# Utility functions to help with making an LL(1) parser; ParseLoop handles
# EBNF looping constructs, ParseSeq handles sequence constructs.
proc ParseLoop {lexer def} {
upvar 1 token token payload payload index index
foreach {a b} $def {
if {$b ne "-"} {set b [list set c $b]}
lappend m $a $b
}
lappend m default {pushback $lexer; break}
while 1 {
lassign [next $lexer] token payload index
switch -- $token {*}$m
if {[set c [catch {uplevel 1 $c} res opt]]} {
dict set opt -level [expr {[dict get $opt -level]+1}]
return -options $opt $res
}
}
}
proc ParseSeq {lexer def} {
upvar 1 token token payload payload index index
foreach {t s} $def {
lassign [next $lexer] token payload index
switch -- $token $t {
if {[set c [catch {uplevel 1 $s} res opt]]} {
dict set opt -level [expr {[dict get $opt -level]+1}]
return -options $opt $res
}
} EOT {
throw SYNTAX "end of text at position $index"
} default {
throw SYNTAX "\"$payload\" at position $index"
}
}
}

# Main parser driver; contains "master" grammar that ensures that the whole
# text is matched and not just a prefix substring. Note also that the parser
# runs the lexer as a coroutine (with a fixed name in this basic demonstration
# code).
proc parse {str} {
set lexer [coroutine l lexer $str]
try {
set parsed [parse.expr $lexer]
ParseLoop $lexer {
EOT {
return $parsed
}
}
throw SYNTAX "\"$payload\" at position $index"
} trap SYNTAX msg {
return -code error "syntax error: $msg"
} finally {
catch {rename $lexer ""}
}
}

# Now the descriptions of how to match each production in the grammar...
proc parse.expr {lexer} {
set expr [parse.term $lexer]
ParseLoop $lexer {
PLUS - MINUS {
set expr [list $token $expr [parse.term $lexer]]
}
}
return $expr
}
proc parse.term {lexer} {
set term [parse.factor $lexer]
ParseLoop $lexer {
MULT - DIV {
set term [list $token $term [parse.factor $lexer]]
}
}
return $term
}
proc parse.factor {lexer} {
ParseLoop $lexer {
NUMBER {
return $payload
}
MINUS {
ParseSeq $lexer {
NUMBER {return -$payload}
}
}
LPAR {
set result [parse.expr $lexer]
ParseSeq $lexer {
RPAR {return $result}
}
break
}
EOT {
throw SYNTAX "end of text at position $index"
}
}
throw SYNTAX "\"$payload\" at position $index"
}</lang>

<lang tcl># Demonstration code
puts [parse "1 - 2 - -3 * 4 + 5"]
puts [parse "1 - 2 - -3 * (4 + 5)"]</lang>
Output:
<pre>
PLUS {MINUS {MINUS 1 2} {MULT -3 4}} 5
MINUS {MINUS 1 2} {MULT -3 {PLUS 4 5}}
</pre>

Revision as of 22:06, 5 December 2010

Parse EBNF is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Create a simple parser for EBNF grammars. Here is an ebnf grammar in itself and a parser for it in php.

Here are simple parser rules for a calculator taken from the antlr tutorial

expr	: term ( ( PLUS | MINUS )  term )* ;

term	: factor ( ( MULT | DIV ) factor )* ;

factor	: NUMBER ;

Tcl

Demonstration lexer and parser: <lang tcl>package require Tcl 8.6

  1. Utilities to make the coroutine easier to use

proc provide args {while {![yield $args]} {yield}} proc next lexer {$lexer 1} proc pushback lexer {$lexer 0}

  1. Lexical analyzer coroutine core

proc lexer {str} {

   yield [info coroutine]
   set symbols {+ PLUS - MINUS * MULT / DIV ( LPAR ) RPAR}
   set idx 0
   while 1 {

switch -regexp -matchvar m -- $str { {^\s+} { # No special action for whitespace } {^([-+*/()])} { provide [dict get $symbols [lindex $m 1]] [lindex $m 1] $idx } {^(\d+)} { provide NUMBER [lindex $m 1] $idx } {^$} { provide EOT "EOT" $idx return } . { provide PARSE_ERROR [lindex $m 0] $idx } } # Trim the matched string set str [string range $str [string length [lindex $m 0]] end] incr idx [string length [lindex $m 0]]

   }

}

  1. Utility functions to help with making an LL(1) parser; ParseLoop handles
  2. EBNF looping constructs, ParseSeq handles sequence constructs.

proc ParseLoop {lexer def} {

   upvar 1 token token payload payload index index
   foreach {a b} $def {

if {$b ne "-"} {set b [list set c $b]} lappend m $a $b

   }
   lappend m default {pushback $lexer; break}
   while 1 {

lassign [next $lexer] token payload index switch -- $token {*}$m if {[set c [catch {uplevel 1 $c} res opt]]} { dict set opt -level [expr {[dict get $opt -level]+1}] return -options $opt $res }

   }

} proc ParseSeq {lexer def} {

   upvar 1 token token payload payload index index
   foreach {t s} $def {

lassign [next $lexer] token payload index switch -- $token $t { if {[set c [catch {uplevel 1 $s} res opt]]} { dict set opt -level [expr {[dict get $opt -level]+1}] return -options $opt $res } } EOT { throw SYNTAX "end of text at position $index" } default { throw SYNTAX "\"$payload\" at position $index" }

   }

}

  1. Main parser driver; contains "master" grammar that ensures that the whole
  2. text is matched and not just a prefix substring. Note also that the parser
  3. runs the lexer as a coroutine (with a fixed name in this basic demonstration
  4. code).

proc parse {str} {

   set lexer [coroutine l lexer $str]
   try {

set parsed [parse.expr $lexer] ParseLoop $lexer { EOT { return $parsed } } throw SYNTAX "\"$payload\" at position $index"

   } trap SYNTAX msg {

return -code error "syntax error: $msg"

   } finally {

catch {rename $lexer ""}

   }

}

  1. Now the descriptions of how to match each production in the grammar...

proc parse.expr {lexer} {

   set expr [parse.term $lexer]
   ParseLoop $lexer {

PLUS - MINUS { set expr [list $token $expr [parse.term $lexer]] }

   }
   return $expr

} proc parse.term {lexer} {

   set term [parse.factor $lexer]
   ParseLoop $lexer {

MULT - DIV { set term [list $token $term [parse.factor $lexer]] }

   }
   return $term

} proc parse.factor {lexer} {

   ParseLoop $lexer {

NUMBER { return $payload } MINUS { ParseSeq $lexer { NUMBER {return -$payload} } } LPAR { set result [parse.expr $lexer] ParseSeq $lexer { RPAR {return $result} } break } EOT { throw SYNTAX "end of text at position $index" }

   }
   throw SYNTAX "\"$payload\" at position $index"

}</lang>

<lang tcl># Demonstration code puts [parse "1 - 2 - -3 * 4 + 5"] puts [parse "1 - 2 - -3 * (4 + 5)"]</lang> Output:

PLUS {MINUS {MINUS 1 2} {MULT -3 4}} 5
MINUS {MINUS 1 2} {MULT -3 {PLUS 4 5}}