Parsing/Shunting-yard algorithm: Difference between revisions
(→Tcl: Added implementation) |
|||
Line 1: | Line 1: | ||
{{draft task}} |
{{draft task}} |
||
Given the operator characteristics and input from the [[wp:Shunting-yard_algorithm|Shunting-yard algorithm]] page and tables |
Given the operator characteristics and input from the [[wp:Shunting-yard_algorithm|Shunting-yard algorithm]] page and tables |
||
Use the algorithm to show the changes in the operator stack and RPN output |
Use the algorithm to show the changes in the operator stack and RPN output |
||
Line 488: | Line 487: | ||
The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'</pre> |
The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'</pre> |
||
=={{header|Tcl}}== |
|||
<lang tcl>package require Tcl 8.5 |
|||
# Helpers |
|||
proc tokenize {str} { |
|||
regexp -all -inline {[\d.]+|[-*^+/()]} $str |
|||
} |
|||
proc precedence op { |
|||
dict get {^ 4 * 3 / 3 + 2 - 2} $op |
|||
} |
|||
proc associativity op { |
|||
if {$op eq "^"} {return "right"} else {return "left"} |
|||
} |
|||
proc shunting {expression} { |
|||
set stack {} |
|||
foreach token [tokenize $expression] { |
|||
if {[string is double $token]} { |
|||
puts "add to output: $token" |
|||
lappend output $token |
|||
} elseif {$token eq "("} { |
|||
puts "push parenthesis" |
|||
lappend stack $token |
|||
} elseif {$token eq ")"} { |
|||
puts "popping to parenthesis" |
|||
while {[lindex $stack end] ne "("} { |
|||
lappend output [lindex $stack end] |
|||
set stack [lreplace $stack end end] |
|||
puts "...popped [lindex $output end] to output" |
|||
} |
|||
set stack [lreplace $stack end end] |
|||
puts "...found parenthesis" |
|||
} else { |
|||
puts "adding operator: $token" |
|||
set p [precedence $token] |
|||
set a [associativity $token] |
|||
while {[llength $stack]} { |
|||
set o2 [lindex $stack end] |
|||
if { |
|||
$o2 ne "(" && |
|||
(($a eq "left" && $p <= [precedence $o2]) || |
|||
($a eq "right" && $p < [precedence $o2])) |
|||
} then { |
|||
puts "...popped operator $o2 to output" |
|||
lappend output $o2 |
|||
set stack [lreplace $stack end end] |
|||
} else { |
|||
break |
|||
} |
|||
} |
|||
lappend stack $token |
|||
} |
|||
puts "\t\tOutput:\t$output\n\t\tStack:\t$stack" |
|||
} |
|||
puts "transferring tokens from stack to output" |
|||
lappend output {*}[lreverse $stack] |
|||
} |
|||
puts [shunting "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3"]</lang> |
|||
Output: |
|||
<pre> |
|||
add to output: 3 |
|||
Output: 3 |
|||
Stack: |
|||
adding operator: + |
|||
Output: 3 |
|||
Stack: + |
|||
add to output: 4 |
|||
Output: 3 4 |
|||
Stack: + |
|||
adding operator: * |
|||
Output: 3 4 |
|||
Stack: + * |
|||
add to output: 2 |
|||
Output: 3 4 2 |
|||
Stack: + * |
|||
adding operator: / |
|||
...popped operator * to output |
|||
Output: 3 4 2 * |
|||
Stack: + / |
|||
push parenthesis |
|||
Output: 3 4 2 * |
|||
Stack: + / ( |
|||
add to output: 1 |
|||
Output: 3 4 2 * 1 |
|||
Stack: + / ( |
|||
adding operator: - |
|||
Output: 3 4 2 * 1 |
|||
Stack: + / ( - |
|||
add to output: 5 |
|||
Output: 3 4 2 * 1 5 |
|||
Stack: + / ( - |
|||
popping to parenthesis |
|||
...popped - to output |
|||
...found parenthesis |
|||
Output: 3 4 2 * 1 5 - |
|||
Stack: + / |
|||
adding operator: ^ |
|||
Output: 3 4 2 * 1 5 - |
|||
Stack: + / ^ |
|||
add to output: 2 |
|||
Output: 3 4 2 * 1 5 - 2 |
|||
Stack: + / ^ |
|||
adding operator: ^ |
|||
Output: 3 4 2 * 1 5 - 2 |
|||
Stack: + / ^ ^ |
|||
add to output: 3 |
|||
Output: 3 4 2 * 1 5 - 2 3 |
|||
Stack: + / ^ ^ |
|||
transferring tokens from stack to output |
|||
3 4 2 * 1 5 - 2 3 ^ ^ / + |
|||
</pre> |
Revision as of 08:25, 6 December 2011
Given the operator characteristics and input from the Shunting-yard algorithm page and tables Use the algorithm to show the changes in the operator stack and RPN output as each individual token is processed.
- Assume an input of a correct, space separated, string of tokens
- Generate a space separated output string representing the RPN
- Test with the input string
'3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'
then print and display the output here. - Operator precedence is given in this table:
operator precedence associativity ^ 4 Right * 3 Left / 3 Left + 2 Left - 2 Left
- Extra credit
- Add extra text explaining the actions and an optional comment for the action on receipt of each token.
- Note
- the handling of functions and arguments is not required.
- See also
- Parsing/RPN calculator algorithm for a method of calculating a final value from this output RPN expression.
- Parsing/RPN to infix conversion.
C
Requires a functioning ANSI terminal and Enter key. <lang c>#include <sys/types.h>
- include <regex.h>
- include <stdio.h>
typedef struct { const char *s; int len, prec, assoc; } str_tok_t;
typedef struct { const char * str; int assoc, prec; regex_t re; } pat_t;
enum assoc { A_NONE, A_L, A_R }; pat_t pat_eos = {"", A_NONE, 0};
pat_t pat_ops[] = { {"^\\)", A_NONE, -1}, {"^\\*\\*", A_R, 3}, {"^\\^", A_R, 3}, {"^\\*", A_L, 2}, {"^/", A_L, 2}, {"^\\+", A_L, 1}, {"^-", A_L, 1}, {0} };
pat_t pat_arg[] = { {"^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"}, {"^[a-zA-Z_][a-zA-Z_0-9]*"}, {"^\\(", A_L, -1}, {0} };
str_tok_t stack[256]; /* assume these are big enough */ str_tok_t queue[256]; int l_queue, l_stack;
- define qpush(x) queue[l_queue++] = x
- define spush(x) stack[l_stack++] = x
- define spop() stack[--l_stack]
void display(const char *s) { int i; printf("\033[1;1H\033[JText | %s", s); printf("\nStack| "); for (i = 0; i < l_stack; i++) printf("%.*s ", stack[i].len, stack[i].s); // uses C99 format strings printf("\nQueue| "); for (i = 0; i < l_queue; i++) printf("%.*s ", queue[i].len, queue[i].s); puts("\n\n<press enter>"); getchar(); }
int prec_booster;
- define fail(s1, s2) {fprintf(stderr, "[Error %s] %s\n", s1, s2); return 0;}
int init(void) { int i; pat_t *p;
for (i = 0, p = pat_ops; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);
for (i = 0, p = pat_arg; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);
return 1; }
pat_t* match(const char *s, pat_t *p, str_tok_t * t, const char **e) { int i; regmatch_t m;
while (*s == ' ') s++; *e = s;
if (!*s) return &pat_eos;
for (i = 0; p[i].str; i++) { if (regexec(&(p[i].re), s, 1, &m, REG_NOTEOL)) continue; t->s = s; *e = s + (t->len = m.rm_eo - m.rm_so); return p + i; } return 0; }
int parse(const char *s) { pat_t *p; str_tok_t *t, tok;
prec_booster = l_queue = 0; display(s); while (*s) { p = match(s, pat_arg, &tok, &s); if (!p || p == &pat_eos) fail("parse arg", s);
/* Odd logic here. Don't actually stack the parens: don't need to. */ if (p->prec == -1) { prec_booster += 100; continue; } qpush(tok); display(s);
re_op: p = match(s, pat_ops, &tok, &s); if (!p) fail("parse op", s);
tok.assoc = p->assoc; tok.prec = p->prec;
if (p->prec > 0) tok.prec = p->prec + prec_booster; else if (p->prec == -1) { if (prec_booster < 100) fail("unmatched )", s); tok.prec = prec_booster; }
while (l_stack) { t = stack + l_stack - 1; if (!(t->prec == tok.prec && t->assoc == A_L) && t->prec <= tok.prec) break; qpush(spop()); display(s); }
if (p->prec == -1) { prec_booster -= 100; goto re_op; }
if (!p->prec) { display(s); if (prec_booster) fail("unmatched (", s); return 1; }
spush(tok); display(s); }
return 1; }
int main() { int i; const char *tests[] = { "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3", /* RC mandated: OK */ "123", /* OK */ "3+4 * 2 / ( 1 - 5 ) ^ 2 ^ 3.14", /* OK */ "(((((((1+2+3**(4 + 5))))))", /* bad parens */ "a^(b + c/d * .1e5)!", /* unknown op */ "(1**2)**3", /* OK */ 0 };
if (!init()) return 1; for (i = 0; tests[i]; i++) { printf("Testing string `%s' <enter>\n", tests[i]); getchar();
printf("string `%s': %s\n\n", tests[i], parse(tests[i]) ? "Ok" : "Error"); }
return 0; }</lang>
- Output
Note: This cannot give a flavour of the true interactive output where tokens are shuffled around every time the enter key is pressed.
Testing string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' <enter> Text | 3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3 Stack| Queue| <press enter> Text | + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3 Stack| Queue| 3 <press enter> Text | 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3 Stack| + Queue| 3 <press enter> Text | * 2 / ( 1 - 5 ) ^ 2 ^ 3 Stack| + Queue| 3 4 <press enter> Text | 2 / ( 1 - 5 ) ^ 2 ^ 3 Stack| + * Queue| 3 4 <press enter> Text | / ( 1 - 5 ) ^ 2 ^ 3 Stack| + * Queue| 3 4 2 <press enter> Text | ( 1 - 5 ) ^ 2 ^ 3 Stack| + Queue| 3 4 2 * <press enter> Text | ( 1 - 5 ) ^ 2 ^ 3 Stack| + / Queue| 3 4 2 * <press enter> Text | - 5 ) ^ 2 ^ 3 Stack| + / Queue| 3 4 2 * 1 <press enter> Text | 5 ) ^ 2 ^ 3 Stack| + / - Queue| 3 4 2 * 1 <press enter> Text | ) ^ 2 ^ 3 Stack| + / - Queue| 3 4 2 * 1 5 <press enter> Text | ^ 2 ^ 3 Stack| + / Queue| 3 4 2 * 1 5 - <press enter> Text | 2 ^ 3 Stack| + / ^ Queue| 3 4 2 * 1 5 - <press enter> Text | ^ 3 Stack| + / ^ Queue| 3 4 2 * 1 5 - 2 <press enter> Text | 3 Stack| + / ^ ^ Queue| 3 4 2 * 1 5 - 2 <press enter> Text | Stack| + / ^ ^ Queue| 3 4 2 * 1 5 - 2 3 <press enter> Text | Stack| + / ^ Queue| 3 4 2 * 1 5 - 2 3 ^ <press enter> Text | Stack| + / Queue| 3 4 2 * 1 5 - 2 3 ^ ^ <press enter> Text | Stack| + Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / <press enter> Text | Stack| Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + <press enter> Text | Stack| Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + <press enter> string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3': Ok Testing string `123' <enter> ^C
Python
Parenthesis are added to the operator table then special-cased in the code. This solution includes the extra credit. <lang python>from collections import namedtuple from pprint import pprint as pp
OpInfo = namedtuple('OpInfo', 'prec assoc') L, R = 'Left Right'.split()
ops = {
'^': OpInfo(prec=4, assoc=R), '*': OpInfo(prec=3, assoc=L), '/': OpInfo(prec=3, assoc=L), '+': OpInfo(prec=2, assoc=L), '-': OpInfo(prec=2, assoc=L), '(': OpInfo(prec=9, assoc=L), ')': OpInfo(prec=0, assoc=L), }
NUM, LPAREN, RPAREN = 'NUMBER ( )'.split()
def get_input(inp = None):
'Inputs an expression and returns list of (TOKENTYPE, tokenvalue)' if inp is None: inp = input('expression: ') tokens = inp.strip().split() tokenvals = [] for token in tokens: if token in ops: tokenvals.append((token, ops[token])) #elif token in (LPAREN, RPAREN): # tokenvals.append((token, token)) else: tokenvals.append((NUM, token)) return tokenvals
def shunting(tokenvals):
outq, stack = [], [] table = ['TOKEN,ACTION,RPN OUTPUT,OP STACK,NOTES'.split(',')] for token, val in tokenvals: note = action = if token is NUM: action = 'Add number to output' outq.append(val) table.append( (val, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) elif token in ops: t1, (p1, a1) = token, val v = t1 note = 'Pop ops from stack to output' while stack: t2, (p2, a2) = stack[-1] if (a1 == L and p1 <= p2) or (a1 == R and p1 < p2): if t1 != RPAREN: if t2 != LPAREN: stack.pop() action = '(Pop op)' outq.append(t2) else: break else: if t2 != LPAREN: stack.pop() action = '(Pop op)' outq.append(t2) else: stack.pop() action = '(Pop & discard "(")' table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) break table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) v = note = else: note = break note = note = if t1 != RPAREN: stack.append((token, val)) action = 'Push op token to stack' else: action = 'Discard ")"' table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) note = 'Drain stack to output' while stack: v = t2, (p2, a2) = stack[-1] action = '(Pop op)' stack.pop() outq.append(t2) table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) v = note = return table
if __name__ == '__main__':
infix = '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' print( 'For infix expression: %r\n' % infix ) rp = shunting(get_input(infix)) maxcolwidths = [len(max(x, key=len)) for x in zip(*rp)] row = rp[0] print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row))) for row in rp[1:]: print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
print('\n The final output RPN is: %r' % rp[-1][2])</lang>
- Sample output
For infix expression: '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' TOKEN ACTION RPN OUTPUT OP STACK NOTES 3 Add number to output 3 + Push op token to stack 3 + 4 Add number to output 3 4 + * Push op token to stack 3 4 + * 2 Add number to output 3 4 2 + * / (Pop op) 3 4 2 * + Pop ops from stack to output Push op token to stack 3 4 2 * + / ( Push op token to stack 3 4 2 * + / ( 1 Add number to output 3 4 2 * 1 + / ( - Push op token to stack 3 4 2 * 1 + / ( - 5 Add number to output 3 4 2 * 1 5 + / ( - ) (Pop op) 3 4 2 * 1 5 - + / ( Pop ops from stack to output (Pop & discard "(") 3 4 2 * 1 5 - + / Discard ")" 3 4 2 * 1 5 - + / ^ Push op token to stack 3 4 2 * 1 5 - + / ^ 2 Add number to output 3 4 2 * 1 5 - 2 + / ^ ^ Push op token to stack 3 4 2 * 1 5 - 2 + / ^ ^ 3 Add number to output 3 4 2 * 1 5 - 2 3 + / ^ ^ (Pop op) 3 4 2 * 1 5 - 2 3 ^ + / ^ Drain stack to output (Pop op) 3 4 2 * 1 5 - 2 3 ^ ^ + / (Pop op) 3 4 2 * 1 5 - 2 3 ^ ^ / + (Pop op) 3 4 2 * 1 5 - 2 3 ^ ^ / + The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'
Tcl
<lang tcl>package require Tcl 8.5
- Helpers
proc tokenize {str} {
regexp -all -inline {[\d.]+|[-*^+/()]} $str
} proc precedence op {
dict get {^ 4 * 3 / 3 + 2 - 2} $op
} proc associativity op {
if {$op eq "^"} {return "right"} else {return "left"}
}
proc shunting {expression} {
set stack {} foreach token [tokenize $expression] {
if {[string is double $token]} { puts "add to output: $token" lappend output $token } elseif {$token eq "("} { puts "push parenthesis" lappend stack $token } elseif {$token eq ")"} { puts "popping to parenthesis" while {[lindex $stack end] ne "("} { lappend output [lindex $stack end] set stack [lreplace $stack end end] puts "...popped [lindex $output end] to output" } set stack [lreplace $stack end end] puts "...found parenthesis" } else { puts "adding operator: $token" set p [precedence $token] set a [associativity $token] while {[llength $stack]} { set o2 [lindex $stack end] if { $o2 ne "(" && (($a eq "left" && $p <= [precedence $o2]) || ($a eq "right" && $p < [precedence $o2])) } then { puts "...popped operator $o2 to output" lappend output $o2 set stack [lreplace $stack end end] } else { break } } lappend stack $token } puts "\t\tOutput:\t$output\n\t\tStack:\t$stack"
} puts "transferring tokens from stack to output" lappend output {*}[lreverse $stack]
}
puts [shunting "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3"]</lang> Output:
add to output: 3 Output: 3 Stack: adding operator: + Output: 3 Stack: + add to output: 4 Output: 3 4 Stack: + adding operator: * Output: 3 4 Stack: + * add to output: 2 Output: 3 4 2 Stack: + * adding operator: / ...popped operator * to output Output: 3 4 2 * Stack: + / push parenthesis Output: 3 4 2 * Stack: + / ( add to output: 1 Output: 3 4 2 * 1 Stack: + / ( adding operator: - Output: 3 4 2 * 1 Stack: + / ( - add to output: 5 Output: 3 4 2 * 1 5 Stack: + / ( - popping to parenthesis ...popped - to output ...found parenthesis Output: 3 4 2 * 1 5 - Stack: + / adding operator: ^ Output: 3 4 2 * 1 5 - Stack: + / ^ add to output: 2 Output: 3 4 2 * 1 5 - 2 Stack: + / ^ adding operator: ^ Output: 3 4 2 * 1 5 - 2 Stack: + / ^ ^ add to output: 3 Output: 3 4 2 * 1 5 - 2 3 Stack: + / ^ ^ transferring tokens from stack to output 3 4 2 * 1 5 - 2 3 ^ ^ / +