Parsing/Shunting-yard algorithm: Difference between revisions
(Add link.) |
(→{{header|C}}: I CAN'T bloody show the output on this page because it's interactive! What do you think it requires the enter key for?) |
||
Line 35: | Line 35: | ||
=={{header|C}}== |
=={{header|C}}== |
||
{{output?|C}} |
|||
Requires a functioning ANSI terminal and Enter key. |
Requires a functioning ANSI terminal and Enter key. |
||
<lang c>#include <sys/types.h> |
<lang c>#include <sys/types.h> |
Revision as of 17:00, 5 December 2011
Given the operator characteristics and input from the Shunting-yard algorithm page and tables Use the algorithm to show the changes in the operator stack and RPN output as each individual token is processed.
- Assume an input of a correct, space separated, string of tokens
- Generate a space separated output string representing the RPN
- Test with the input string
'3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'
then print and display the output here. - Operator precedence is given in this table:
operator precedence associativity ^ 4 Right * 3 Left / 3 Left + 2 Left - 2 Left
- Extra credit
- Add extra text explaining the actions and an optional comment for the action on receipt of each token.
- Note
- the handling of functions and arguments is not required.
- See also
- Parsing/RPN calculator algorithm for a method of calculating a final value from this output RPN expression.
- Parsing/RPN to infix conversion.
C
Requires a functioning ANSI terminal and Enter key. <lang c>#include <sys/types.h>
- include <regex.h>
- include <stdio.h>
typedef struct { char *s; int len, prec, assoc; } str_tok_t;
typedef struct { char * str; int assoc, prec; regex_t re; } pat_t;
enum assoc { A_NONE, A_L, A_R }; pat_t pat_eos = {"", A_NONE, 0};
pat_t pat_ops[] = { {"^\\)", A_NONE, -1}, {"^\\*\\*", A_R, 3}, {"^\\^", A_R, 3}, {"^\\*", A_L, 2}, {"^/", A_L, 2}, {"^\\+", A_L, 1}, {"^-", A_L, 1}, {0} };
pat_t pat_arg[] = { {"^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"}, {"^[a-zA-Z_][a-zA-Z_0-9]*"}, {"^\\(", A_L, -1}, {0} };
str_tok_t stack[256]; /* assume these are big enough */ str_tok_t queue[256]; int l_queue, l_stack;
- define qpush(x) queue[l_queue++] = x
- define spush(x) stack[l_stack++] = x
- define spop() stack[--l_stack]
void display(char *s) { int i; printf("\033[1;1H\033[JText | %s", s); printf("\nStack| "); for (i = 0; i < l_stack; i++) printf("%.*s ", stack[i].len, stack[i].s); printf("\nQueue| "); for (i = 0; i < l_queue; i++) printf("%.*s ", queue[i].len, queue[i].s); puts("\n\n<press enter>"); getchar(); }
int prec_booster;
- define fail(s1, s2) {fprintf(stderr, "[Error %s] %s\n", s1, s2); return 0;}
int init(void) { int i; pat_t *p;
for (i = 0, p = pat_ops; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);
for (i = 0, p = pat_arg; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);
return 1; }
pat_t* match(char *s, pat_t *p, str_tok_t * t, char **e) { int i; regmatch_t m;
while (*s == ' ') s++; *e = s;
if (!*s) return &pat_eos;
for (i = 0; p[i].str; i++) { if (regexec(&(p[i].re), s, 1, &m, REG_NOTEOL)) continue; t->s = s; *e = s + (t->len = m.rm_eo - m.rm_so); return p + i; } return 0; }
int parse(char *s) { pat_t *p; str_tok_t *t, tok;
prec_booster = l_queue = 0; display(s); while (*s) { p = match(s, pat_arg, &tok, &s); if (!p || p == &pat_eos) fail("parse arg", s);
/* Odd logic here. Don't actually stack the parens: don't need to. */ if (p->prec == -1) { prec_booster += 100; continue; } qpush(tok); display(s);
re_op: p = match(s, pat_ops, &tok, &s); if (!p) fail("parse op", s);
tok.assoc = p->assoc; tok.prec = p->prec;
if (p->prec > 0) tok.prec = p->prec + prec_booster; else if (p->prec == -1) { if (prec_booster < 100) fail("unmatched )", s); tok.prec = prec_booster; }
while (l_stack) { t = stack + l_stack - 1; if (!(t->prec == tok.prec && t->assoc == A_L) && t->prec <= tok.prec) break; qpush(spop()); display(s); }
if (p->prec == -1) { prec_booster -= 100; goto re_op; }
if (!p->prec) { display(s); if (prec_booster) fail("unmatched (", s); return 1; }
spush(tok); display(s); }
return 1; }
int main() { int i; char *tests[] = { "123", /* OK */ "3+4 * 2 / ( 1 - 5 ) ^ 2 ^ 3.14", /* OK */ "(((((((1+2+3**(4 + 5))))))", /* bad parens */ "a^(b + c/d * .1e5)!", /* unknown op */ "(1**2)**3", /* OK */ 0 };
if (!init()) return 1; for (i = 0; tests[i]; i++) { printf("Testing string `%s' <enter>\n", tests[i]); getchar();
printf("string `%s': %s\n\n", tests[i], parse(tests[i]) ? "Ok" : "Error"); }
return 0; }</lang>
Python
Parenthesis are added to the operator table then special-cased in the code. This solution includes the extra credit. <lang python>from collections import namedtuple from pprint import pprint as pp
OpInfo = namedtuple('OpInfo', 'prec assoc') L, R = 'Left Right'.split()
ops = {
'^': OpInfo(prec=4, assoc=R), '*': OpInfo(prec=3, assoc=L), '/': OpInfo(prec=3, assoc=L), '+': OpInfo(prec=2, assoc=L), '-': OpInfo(prec=2, assoc=L), '(': OpInfo(prec=9, assoc=L), ')': OpInfo(prec=0, assoc=L), }
NUM, LPAREN, RPAREN = 'NUMBER ( )'.split()
def get_input(inp = None):
'Inputs an expression and returns list of (TOKENTYPE, tokenvalue)' if inp is None: inp = input('expression: ') tokens = inp.strip().split() tokenvals = [] for token in tokens: if token in ops: tokenvals.append((token, ops[token])) #elif token in (LPAREN, RPAREN): # tokenvals.append((token, token)) else: tokenvals.append((NUM, token)) return tokenvals
def shunting(tokenvals):
outq, stack = [], [] table = ['TOKEN,ACTION,RPN OUTPUT,OP STACK,NOTES'.split(',')] for token, val in tokenvals: note = action = if token is NUM: action = 'Add number to output' outq.append(val) table.append( (val, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) elif token in ops: t1, (p1, a1) = token, val v = t1 note = 'Pop ops from stack to output' while stack: t2, (p2, a2) = stack[-1] if (a1 == L and p1 <= p2) or (a1 == R and p1 < p2): if t1 != RPAREN: if t2 != LPAREN: stack.pop() action = '(Pop op)' outq.append(t2) else: break else: if t2 != LPAREN: stack.pop() action = '(Pop op)' outq.append(t2) else: stack.pop() action = '(Pop & discard "(")' table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) break table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) v = note = else: note = break note = note = if t1 != RPAREN: stack.append((token, val)) action = 'Push op token to stack' else: action = 'Discard ")"' table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) note = 'Drain stack to output' while stack: v = t2, (p2, a2) = stack[-1] action = '(Pop op)' stack.pop() outq.append(t2) table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) v = note = return table
if __name__ == '__main__':
infix = '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' print( 'For infix expression: %r\n' % infix ) rp = shunting(get_input(infix)) maxcolwidths = [len(max(x, key=len)) for x in zip(*rp)] row = rp[0] print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row))) for row in rp[1:]: print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
print('\n The final output RPN is: %r' % rp[-1][2])</lang>
- Sample output
For infix expression: '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' TOKEN ACTION RPN OUTPUT OP STACK NOTES 3 Add number to output 3 + Push op token to stack 3 + 4 Add number to output 3 4 + * Push op token to stack 3 4 + * 2 Add number to output 3 4 2 + * / (Pop op) 3 4 2 * + Pop ops from stack to output Push op token to stack 3 4 2 * + / ( Push op token to stack 3 4 2 * + / ( 1 Add number to output 3 4 2 * 1 + / ( - Push op token to stack 3 4 2 * 1 + / ( - 5 Add number to output 3 4 2 * 1 5 + / ( - ) (Pop op) 3 4 2 * 1 5 - + / ( Pop ops from stack to output (Pop & discard "(") 3 4 2 * 1 5 - + / Discard ")" 3 4 2 * 1 5 - + / ^ Push op token to stack 3 4 2 * 1 5 - + / ^ 2 Add number to output 3 4 2 * 1 5 - 2 + / ^ ^ Push op token to stack 3 4 2 * 1 5 - 2 + / ^ ^ 3 Add number to output 3 4 2 * 1 5 - 2 3 + / ^ ^ (Pop op) 3 4 2 * 1 5 - 2 3 ^ + / ^ Drain stack to output (Pop op) 3 4 2 * 1 5 - 2 3 ^ ^ + / (Pop op) 3 4 2 * 1 5 - 2 3 ^ ^ / + (Pop op) 3 4 2 * 1 5 - 2 3 ^ ^ / + The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'