Recursive descent parser generator: Difference between revisions
Recursive descent parser generator (view source)
Revision as of 10:53, 2 February 2024
, 3 months ago→{{header|Wren}}: Changed to Wren S/H
Thundergnat (talk | contribs) (Rename Perl 6 -> Raku, alphabetize, minor clean-up) |
m (→{{header|Wren}}: Changed to Wren S/H) |
||
(6 intermediate revisions by 6 users not shown) | |||
Line 24:
It can't handle newlines as part of the grammar, the error checking is fairly limited and the error reporting is basically non-existent, but the parser it generates (not shown below) is human readable.
<
#include <fstream>
#include <
#include <map>
#include <regex>
#include <set>
#include <
#include <string>
using namespace std;
Line 40:
int main(int argc, char **argv) {
<< endl;
<< endl
<< "void advanceToken() {" << endl
<< " static smatch results;" << endl
<< endl
<< " prevToken = nextToken;" << endl
<< " prevTokenValue = nextTokenValue;" << endl
<< endl;
for (auto i = terminals.begin(); i != terminals.end(); ++i) {
string name = i->first + "_pattern";
string pattern = i->second;
<< " if (regex_search(input, results, " << name << ", regex_constants::match_continuous)) {" << endl
<< " nextToken = \"" << i->first << "\";" << endl
<< " nextTokenValue = results[1];" << endl
<< " input = regex_replace(input, " << name << ", \"\");" << endl
<< " return;" << endl
<< " }" << endl
<< endl;
}
outFile << " static regex eof(R\"(\\s*)\");" << endl
<< " if (regex_match(input, results, eof, regex_constants::match_continuous)) {" << endl
<< " nextToken = \"\";" << endl
<< " nextTokenValue = \"\";" << endl
<< " return;" << endl
<< " }" << endl
<< endl
<< " throw \"Unknown token\";" << endl
<< "}" << endl
<< endl
<< "bool same(string symbol) {" << endl
<< " if (symbol == nextToken) {" << endl
<< " advanceToken();" << endl
<< " return true;" << endl
<< " }" << endl
<< " return false;" << endl
<< "}" << endl
<< endl;
// Copy the header code to the output
while (true) {
getline(inFile, line);
// Copy lines until we reach the first rule
break;
}
// Build the nonterminal table
while (true) {
// results already contains the last matched rule
string name = results[1];
stringstream ss(results[2]);
string tempString;
vector<string> tempVector;
while (ss >> tempString)
tempVector.push_back(tempString);
nonterminalRules[name].push_back(tempVector);
// Read code until another rule is found
string code = "";
while (true) {
getline(inFile, line);
if (!inFile || regex_match(line, results, rulePattern))
break;
// Replace $1 with results[1], etc.
line = regex_replace(line, argPattern, "results[$1]");
code += line + "\n";
}
nonterminalCode[name].push_back(code);
// Stop when we reach the end of the file
if (!inFile)
break;
}
// Generate the first sets, inefficiently
bool done = false;
while (!done)
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
string name = i->first;
done = true;
if (nonterminalFirst.find(i->first) == nonterminalFirst.end())
nonterminalFirst[i->first] = set<string>();
for (int j = 0; j < i->second.size(); ++j) {
if (i->second[j].size() == 0)
nonterminalFirst[i->first].insert("");
else {
string first = i->second[j][0];
if (nonterminalFirst.find(first) != nonterminalFirst.end()) {
for (auto k = nonterminalFirst[first].begin(); k != nonterminalFirst[first].end(); ++k) {
if (nonterminalFirst[name].find(*k) == nonterminalFirst[name].end()) {
nonterminalFirst[name].insert(*k);
done = false;
}
}
} else if (nonterminalFirst[name].find(first) == nonterminalFirst[name].end()) {
nonterminalFirst[name].insert(first);
done = false;
}
}
}
}
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
string name = i->first + "_rule";
outFile << "string " << name << "();" << endl;
}
outFile << endl;
// Generate the nonterminal functions
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
string name = i->first + "_rule";
outFile << "string " << name << "() {" << endl
<< " vector<string> results;" << endl
<< " results.push_back(\"\");" << endl
<< endl;
// Check if this rule can match an empty string
int epsilon = -1;
for (int j = 0; epsilon == -1 && j < i->second.size(); ++j)
if (i->second[j].size() == 0)
epsilon = j;
// Generate each production
// Nothing to generate for an empty rule
if (j == epsilon)
continue;
string token = i->second[j][0];
outFile << " if (nextToken == \"" << i->second[j][0] << "\") {" << endl;
else {
bool first = true;
for (auto k = nonterminalFirst[token].begin(); k != nonterminalFirst[token].end(); ++k, first = false) {
if (!first)
outFile << " || ";
outFile << "nextToken == \"" << (*k) << "\"";
}
outFile << ") {" << endl;
}
for (int k = 0; k < i->second[j].size(); ++k) {
outFile << " if(same(\"" << i->second[j][k] << "\"))" << endl
<< " results.push_back(prevTokenValue);" << endl
<< " else" << endl
<< " throw \"Syntax error - mismatched token\";" << endl;
} else
outFile << " results.push_back(" << i->second[j][k] << "_rule());" << endl;
}
// Copy rule code to output
outFile << nonterminalCode[i->first][j];
outFile << " }" << endl << endl;
}
if (epsilon == -1)
outFile << " throw \"Syntax error - unmatched token\";" << endl;
else
outFile << nonterminalCode[i->first][epsilon];
}
// Generate the main function
<< " if(argc < 2) {" << endl
<< " cout << \"Usage: <input file>\" << endl;" << endl
<< " return 1;" << endl
<< " }" << endl
<< endl
<< " ifstream file(argv[1]);" << endl
<< " string line;" << endl
<< " input = \"\";" << endl
<< endl
<< " while(true) {" << endl
<< " getline(file, line);" << endl
<< " if(!file) break;" << endl
<< " input += line + \"\\n\";" << endl
<< " }" << endl
<< endl
<< " advanceToken();" << endl
<< endl
<< " start_rule();" << endl
<< "}" << endl;
}
</syntaxhighlight>
Example grammar:
Line 362 ⟶ 367:
I've therefore applied this parser to the expression designated by the task, which only involves binary expressions and identifiers, and written a separate routine to convert the resulting AST into three-address code.
<
import (
Line 466 ⟶ 471:
last = i
}
}</
{{out}}
Line 535 ⟶ 540:
Implementation:
<
inlocale=: 4 :0 L:0
Line 575 ⟶ 580:
emit=: smoutput
</syntaxhighlight>
Task example:
<
z0001=:four*five
z0002=:one+two
z0003=:z0002*three
z0004=:z0003-z0001
z0004</
See also https://github.com/jsoftware/general_misc/blob/master/trace.ijs for a model of the underlying parser being employed here.
Line 590 ⟶ 595:
=={{header|Julia}}==
The Julia compiler's own parser is a recursive descent parser, and can be used directly here.
<
function testparser(s)
Line 598 ⟶ 603:
testparser("(one + two) * three - four * five")
</
<pre>
$(Expr(:thunk, CodeInfo(
Line 610 ⟶ 615:
</pre>
=={{header|
<syntaxhighlight lang="perl">#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings;
use Path::Tiny;
my $h = qr/\G\s*/;
my $identifier = qr/$h([a-z]\w*)\b/i;
my $literal = qr/$h(['"])(.+?)\1/s;
my (%rules, %called, $usercode, %patches);
my $filename = './generatedparser.pl';
sub node { bless [ @_[1..$#_] ], $_[0] }
sub error { die "ERROR: ", s/\G\s*\K/<**ERROR @_**>/r, "\n" }
sub want { /$h\Q$_[1]\E/gc ? shift : error "missing '$_[1]' " }
sub addin { node $_[0] => ref $_[1] eq $_[0] ? @{$_[1]} : $_[1], pop }
local $_ = do { local $/; @ARGV ? <> : <DATA> }; # the EBNF
$usercode = s/^(#usercode.*)//ms ? $1 : "# no usercode included\n";;
$patches{PATCHUSER} = $usercode . "#end usercode\n"; # grammar support code
s/^\h*#.*\n//gm; # remove comment lines
$patches{PATCHGRAMMAR} = s/^(?:\h*\n)+//r =~ s/\n(?:\h*\n)+\z//r;
while( /$identifier\s*=/gc ) # the start of a rule
{
my $name
$rules{startsymbol} //= node RULE => $name;
my $tree = expr(0);
$rules{$name} = $rules{$name} ? addin ALT => $rules{$name}, $tree : $tree;
/$h[.;]/gc or error 'missing rule terminator, needs . or ;';
}
/\G\s*\z/ or error "incomplete parse at ", substr $_, pos($_) // 0;
%rules or error "no rules found ";
delete @called{keys %rules};
%called and die "\nERROR: undefined rule(s) <@{[ keys %called]}>\n";
sub expr # precedence climbing parser for grammer rules
{
my $tree =
/$h(NAME)\b/gc ? node $1 : # internal name matcher
/$identifier/gc ? do { $called{$1}++; node RULE => $1 } :
/$literal/gc ? node LIT => $2
/$h<(\w+)>/gc ? node ACTION => $1 :
/$h\[/gc ? node OPTION => want expr(0), ']' :
/$h\{/gc ? node REPEAT => want expr(0), '}' :
/$h\(/gc ? want expr(0), ')' :
error 'Invalid expression';
$tree =
/\G\s+/gc ? $tree :
$_[0] <= 1 && /\G(?=[[<{('"a-z])/gci ? addin SEQ => $tree, expr(2) :
$_[0] <= 0 && /\G\|/gc ? addin ALT => $tree, expr(1) :
return $tree while 1;
}
my $perlcode = "# generated code (put in Rule:: package)\n";
for my $rule ( sort keys %rules )
{
my $body = $rules{$rule}->code;
$perlcode .= "\nsub Rule::$rule\n\t{\n\t$body\n\t}\n";
}
$perlcode =~ s/^(?:\h*\n)+(?=\h*\})//gm;
$perlcode .= "\n# preceding code was generated for rules\n";
$patches{PATCHGENERATED} = $perlcode;
sub ALT::code
{
my $all = join " or\n\t", map $_->code, @{ $_[0] };
"( # alt\n\t$all )"
}
sub SEQ::code
{
my $all = join " and\n\t", map $_->code, @{ $_[0] };
"( # seq\n\t$all )"
}
sub REPEAT::code { "do { # repeat\n\t1 while @{[ $_[0][0]->code ]} ; 1 }" }
sub OPTION::code { "( # option\n\t@{[ $_[0][0]->code ]} or 1 )" }
sub RULE::code { "Rule::$_[0][0]()" }
sub ACTION::code { "( $_[0][0]() || 1 )" }
sub NAME::code { "( /\\G\$whitespace(\\w+)/gc and push \@stack, \$1 )" }
sub LIT::code { "( /\\G\$whitespace(\Q$_[0][0]\E)/gc and push \@stack, \$1 )" }
$_ = <<'END'; ##################################### template for generated code
#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings; # WARNING: this code is generated
my @stack;
my $whitespace = qr/\s*/;
my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode
PATCHGRAMMAR
GRAMMAR
PATCHUSER
PATCHGENERATED
local $_ = shift // '(one + two) * three - four * five';
eval { begin() }; # eval because it is optional
Rule::startsymbol();
eval { end() }; # eval because it is optional
/\G\s*\z/ or die "ERROR: incomplete parse\n";
END
s/(PATCH\w+)/$patches{$1}/g; # insert grammar variable stuff
path( $filename )->spew( $_ );
chmod 0555, $filename; # readonly, executable
exec 'perl', $filename, @ARGV or die "exec failed $!";
__DATA__
expr = term { plus term <gen3addr> } .
term = factor { times factor <gen3addr> } .
factor = primary [ '^' factor <gen3addr> ] .
primary = '(' expr ')' <removeparens> | NAME .
plus = "+" | "-" .
times = "*" | "/" .
#usercode -- separator for included code for actions
my $temp = '0000';
sub begin { print "parsing: $_\n\n" }
sub gen3addr
{
@stack >= 3 or die "not enough on stack";
my @three = splice @stack, -3, 3, my $t = '_' . ++$temp;
print "$t = @three\n";
}
sub removeparens
{
@stack >= 3 or die "not enough on stack";
splice @stack, -3, 3, $stack[-2];
}</syntaxhighlight>
Running the above with no arguments uses a default grammar that will solve the specified example. It produces
the following perl script (and then runs it).
<syntaxhighlight lang="perl">#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings; # WARNING: this code is generated
my @stack;
my $whitespace = qr/\s*/;
my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode
expr = term { plus term <gen3addr> } .
term = factor { times factor <gen3addr> } .
factor = primary [ '^' factor <gen3addr> ] .
primary = '(' expr ')' <removeparens> | NAME .
plus = "+" | "-" .
times = "*" | "/" .
GRAMMAR
#usercode -- separator for included code for actions
my $temp = '0000';
sub begin { print "parsing: $_\n\n" }
sub gen3addr
{
@stack >= 3 or die "not enough on stack";
my @three = splice @stack, -3, 3, my $t = '_' . ++$temp;
print "$t = @three\n";
}
sub removeparens
{
@stack >= 3 or die "not enough on stack";
splice @stack, -3, 3, $stack[-2];
}
#end usercode
# generated code (put in Rule:: package)
sub Rule::expr
{
( # seq
Rule::term() and
do { # repeat
1 while ( # seq
Rule::plus() and
Rule::term() and
( gen3addr() || 1 ) ) ; 1 } )
}
sub Rule::factor
{
( # seq
Rule::primary() and
( # option
( # seq
( /\G$whitespace(\^)/gc and push @stack, $1 ) and
Rule::factor() and
( gen3addr() || 1 ) ) or 1 ) )
}
sub Rule::plus
{
( # alt
( /\G$whitespace(\+)/gc and push @stack, $1 ) or
( /\G$whitespace(\-)/gc and push @stack, $1 ) )
}
sub Rule::primary
{
( # alt
( # seq
( /\G$whitespace(\()/gc and push @stack, $1 ) and
Rule::expr() and
( /\G$whitespace(\))/gc and push @stack, $1 ) and
( removeparens() || 1 ) ) or
( /\G$whitespace(\w+)/gc and push @stack, $1 ) )
}
sub Rule::startsymbol
{
Rule::expr()
}
sub Rule::term
{
( # seq
Rule::factor() and
do { # repeat
1 while ( # seq
Rule::times() and
Rule::factor() and
( gen3addr() || 1 ) ) ; 1 } )
}
sub Rule::times
{
( # alt
( /\G$whitespace(\*)/gc and push @stack, $1 ) or
( /\G$whitespace(\/)/gc and push @stack, $1 ) )
}
# preceding code was generated for rules
local $_ = shift // '(one + two) * three - four * five';
eval { begin() }; # eval because it is optional
Rule::startsymbol();
eval { end() }; # eval because it is optional
/\G\s*\z/ or die "ERROR: incomplete parse\n";</syntaxhighlight>
The above script can also be run stand-alone and produces the following output.
{{out}}
<pre>
parsing: (one + two) * three - four * five
_0001 = one + two
_0002 = _0001 * three
_0003 = four * five
_0004 = _0002 - _0003
</pre>
Different grammars and input can be specified on the command line.
<pre>
recursivedescentparsergenerator.pl arithexpr.y '2 * 3 + 4 * 5'
</pre>
and giving this file as "arithexpr.y"
<syntaxhighlight lang="perl"># test arith expr
expr = term { '+' term <fadd> | '-' term <fsub> } .
term = factor { '*' factor <fmul> | '/' factor <fdiv> } .
factor = '(' expr ')' <noparen> | NAME .
#usercode
sub noparen { splice @stack, -3, 3, $stack[-2]; }
sub fadd { splice @stack, -3, 3, $stack[-3] + $stack[-1] }
sub fsub { splice @stack, -3, 3, $stack[-3] - $stack[-1] }
sub fmul { splice @stack, -3, 3, $stack[-3] * $stack[-1] }
sub fdiv { splice @stack, -3, 3, $stack[-3] / $stack[-1] }
sub begin { print "expr = $_\n" }
sub end { print "answer = @{[pop @stack]}\n" }</syntaxhighlight>
will produce the following
{{out}}
<pre>
expr = 2 * 3 + 4 * 5
answer = 26
</pre>
=={{header|Phix}}==
Technically the task is asking for code which generates something like the following, so I suppose
it would actually meet the spec if it began with <code>constant src = """</code> and ended with
<code>""" puts(1,src)</code>... (and like several/most other entries on this page, this does not use a formal grammer)
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\RecursiveDescentParser.exw
-- =======================================
--</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">src</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">sdx</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tok</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\t'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\r'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\n'</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #000080;font-style:italic;">-- yeilds one of:
-- {"SYMBOL",ch} where ch is one of "()+-/*", or
-- {"IDENT",string}, or {"EOF"}</span>
<span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">tokstart</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sdx</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"EOF"</span><span style="color: #0000FF;">}</span>
<span style="color: #008080;">elsif</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"()+-/*"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"SYMBOL"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">&</span><span style="color: #008000;">""</span><span style="color: #0000FF;">}</span>
<span style="color: #008080;">elsif</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'A'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'Z'</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">!=</span><span style="color: #008000;">'_'</span>
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'A'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'Z'</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'0'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'9'</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"IDENT"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tokstart</span><span style="color: #0000FF;">..</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]}</span>
<span style="color: #008080;">else</span>
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">forward</span> <span style="color: #008080;">function</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #008000;">"IDENT"</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tok</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">={</span><span style="color: #008000;">"SYMBOL"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"("</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">!={</span><span style="color: #008000;">"SYMBOL"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">")"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">else</span>
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #008000;">"SYMBOL"</span> <span style="color: #008080;">or</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],{</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"/"</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">}</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #008000;">"SYMBOL"</span> <span style="color: #008080;">or</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],{</span><span style="color: #008000;">"+"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">}</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nxt</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #008000;">"SYMBOL"</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">op</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span>
<span style="color: #000000;">lhs</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]),</span>
<span style="color: #000000;">rhs</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]),</span>
<span style="color: #000000;">nid</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"_%04d"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nxt</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s = %s %s %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">nid</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lhs</span><span style="color: #0000FF;">,</span><span style="color: #000000;">op</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rhs</span><span style="color: #0000FF;">})</span>
<span style="color: #000000;">nxt</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">nid</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #008000;">"IDENT"</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">parse</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">source</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">src</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">source</span>
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">ast</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">!={</span><span style="color: #008000;">"EOF"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #7060A8;">pp</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">,{</span><span style="color: #004600;">pp_Nest</span><span style="color: #0000FF;">,</span><span style="color: #000000;">4</span><span style="color: #0000FF;">})</span>
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">parse</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"(one + two) * three - four * five"</span><span style="color: #0000FF;">)</span>
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
Line 754 ⟶ 1,049:
Instead of writing a full-blown generator, it is possible to solve the task partly by making use of the built-in optimizer and study the relevant AST output
<pre>
- QAST::Op(callstatic &say) <sunk> :statement_id<2> say ($one + $two) * $three - $four * $five
- QAST::Op(callstatic &infix:<->) <wanted> -
Line 770 ⟶ 1,065:
<pre>
one two + three * four five * -
</pre>
=={{header|Wren}}==
{{trans|Phix}}
{{libheader|Wren-str}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="wren">import "./str" for Char
import "./fmt" for Fmt
class RDP {
construct parse(source) {
_src = source
_sdx = 0
_ch = null
_tok = null
_nxt = 1
nextToken()
var ast = sumExpr()
if (_tok[0] != "EOF") Fiber.abort("Something went wrong.")
printAst(ast, 0)
System.print()
showAst(ast)
}
skipSpaces() {
while (true) {
if (_sdx >= _src.count) return
_ch = _src[_sdx]
if (!" \t\r\n".contains(_ch)) return
_sdx = _sdx + 1
}
}
// yields one of:
// ["SYMBOL", ch] where ch is one of "()+-/*", or
// ["IDENT", string] or ["EOF"]
nextToken() {
skipSpaces()
var tokStart = _sdx
if (_sdx >= _src.count) {
_tok = ["EOF"]
} else if ("()+-/*".contains(_ch)) {
_sdx = _sdx + 1
_tok = ["SYMBOL", _ch]
} else if (Char.isAsciiLetter(_ch)) {
while (true) {
_sdx = _sdx + 1
if (_sdx >= _src.count) break
_ch = _src[_sdx]
if (!Char.isAsciiAlphaNum(_ch) && _ch != "_") break
}
_tok = ["IDENT", _src[tokStart..._sdx]]
} else {
Fiber.abort("Invalid token '%(_ch)'.")
}
}
primary() {
var res = []
if (_tok[0] == "IDENT") {
res = _tok.toList
nextToken()
} else if (_tok[0] == "SYMBOL" && _tok[1] == "(") {
nextToken()
res = sumExpr()
if (_tok[0] != "SYMBOL" || _tok[1] != ")") Fiber.abort("Unexpected token '%(_tok)'.")
nextToken()
} else {
Fiber.abort("Unexpected token '%(_tok)'.")
}
return res
}
mulExpr() {
var res = primary()
while (true) {
if (_tok[0] != "SYMBOL" || !"*/".contains(_tok[1])) break
res = [_tok, res, null]
nextToken()
res[2] = primary()
}
return res
}
sumExpr() {
var res = mulExpr()
while (true) {
if (_tok[0] != "SYMBOL" || !"+-".contains(_tok[1])) break
res = [_tok, res, null]
nextToken()
res[2] = mulExpr()
}
return res
}
showAst(ast) {
if (ast[0][0] == "SYMBOL") {
var op = ast[0][1]
var lhs = showAst(ast[1])
var rhs = showAst(ast[2])
var thiz = Fmt.swrite("_$04d", _nxt)
Fmt.print("$s = $s $s $s", thiz, lhs, op, rhs)
_nxt = _nxt + 1
return thiz
} else if (ast[0] == "IDENT") {
return ast[1]
}
Fiber.abort("Something went wrong.")
}
printAst(ast, level) {
for (e in ast) {
var indent = " " * level
if (!(e is List)) {
System.print(indent + e)
} else {
System.print(indent + "{")
printAst(e, level+1)
System.print(indent + "}")
}
}
}
}
RDP.parse("(one + two) * three - four * five")</syntaxhighlight>
{{out}}
<pre>
{
SYMBOL
-
}
{
{
SYMBOL
*
}
{
{
SYMBOL
+
}
{
IDENT
one
}
{
IDENT
two
}
}
{
IDENT
three
}
}
{
{
SYMBOL
*
}
{
IDENT
four
}
{
IDENT
five
}
}
_0001 = one + two
_0002 = _0001 * three
_0003 = four * five
_0004 = _0002 - _0003
</pre>
|