Parsing/Shunting-yard algorithm: Difference between revisions

Content added Content deleted

Inline

Revision as of 19:25, 5 December 2011

Given the operator characteristics and input from the Shunting-yard algorithm page and tables Use the algorithm to show the changes in the operator stack and RPN output as each individual token is processed.

Assume an input of a correct, space separated, string of tokens
Generate a space separated output string representing the RPN
Test with the input string '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' then print and display the output here.
Operator precedence is given in this table:

operator	precedence	associativity
^	4	Right
*	3	Left
/	3	Left
+	2	Left
-	2	Left

Extra credit

Add extra text explaining the actions and an optional comment for the action on receipt of each token.

Note

the handling of functions and arguments is not required.

See also

Parsing/RPN calculator algorithm for a method of calculating a final value from this output RPN expression.
Parsing/RPN to infix conversion.

C

This example does not show the output mentioned in the task description on this page (or a page linked to from here). Please ensure that it meets all task requirements and remove this message.
Note that phrases in task descriptions such as "print and display" and "print and show" for example, indicate that (reasonable length) output be a part of a language's solution.

This example is incomplete. The test from the task description is not used Please ensure that it meets all task requirements and remove this message.

Requires a functioning ANSI terminal and Enter key. <lang c>#include <sys/types.h>

include <regex.h>
include <stdio.h>

typedef struct { char *s; int len, prec, assoc; } str_tok_t;

typedef struct { char * str; int assoc, prec; regex_t re; } pat_t;

enum assoc { A_NONE, A_L, A_R }; pat_t pat_eos = {"", A_NONE, 0};

pat_t pat_ops[] = { {"^\\)", A_NONE, -1}, {"^\\*\\*", A_R, 3}, {"^\\^", A_R, 3}, {"^\\*", A_L, 2}, {"^/", A_L, 2}, {"^\\+", A_L, 1}, {"^-", A_L, 1}, {0} };

pat_t pat_arg[] = { {"^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"}, {"^[a-zA-Z_][a-zA-Z_0-9]*"}, {"^\\(", A_L, -1}, {0} };

str_tok_t stack[256]; /* assume these are big enough */ str_tok_t queue[256]; int l_queue, l_stack;

define qpush(x) queue[l_queue++] = x
define spush(x) stack[l_stack++] = x
define spop() stack[--l_stack]

void display(char *s) { int i; printf("\033[1;1H\033[JText | %s", s); printf("\nStack| "); for (i = 0; i < l_stack; i++) printf("%.*s ", stack[i].len, stack[i].s); printf("\nQueue| "); for (i = 0; i < l_queue; i++) printf("%.*s ", queue[i].len, queue[i].s); puts("\n\n<press enter>"); getchar(); }

int prec_booster;

define fail(s1, s2) {fprintf(stderr, "[Error %s] %s\n", s1, s2); return 0;}

int init(void) { int i; pat_t *p;

for (i = 0, p = pat_ops; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

for (i = 0, p = pat_arg; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

return 1; }

pat_t* match(char *s, pat_t *p, str_tok_t * t, char **e) { int i; regmatch_t m;

while (*s == ' ') s++; *e = s;

if (!*s) return &pat_eos;

for (i = 0; p[i].str; i++) { if (regexec(&(p[i].re), s, 1, &m, REG_NOTEOL)) continue; t->s = s; *e = s + (t->len = m.rm_eo - m.rm_so); return p + i; } return 0; }

int parse(char *s) { pat_t *p; str_tok_t *t, tok;

prec_booster = l_queue = 0; display(s); while (*s) { p = match(s, pat_arg, &tok, &s); if (!p || p == &pat_eos) fail("parse arg", s);

/* Odd logic here. Don't actually stack the parens: don't need to. */ if (p->prec == -1) { prec_booster += 100; continue; } qpush(tok); display(s);

re_op: p = match(s, pat_ops, &tok, &s); if (!p) fail("parse op", s);

tok.assoc = p->assoc; tok.prec = p->prec;

if (p->prec > 0) tok.prec = p->prec + prec_booster; else if (p->prec == -1) { if (prec_booster < 100) fail("unmatched )", s); tok.prec = prec_booster; }

while (l_stack) { t = stack + l_stack - 1; if (!(t->prec == tok.prec && t->assoc == A_L) && t->prec <= tok.prec) break; qpush(spop()); display(s); }

if (p->prec == -1) { prec_booster -= 100; goto re_op; }

if (!p->prec) { display(s); if (prec_booster) fail("unmatched (", s); return 1; }

spush(tok); display(s); }

return 1; }

int main() { int i; char *tests[] = { "123", /* OK */ "3+4 * 2 / ( 1 - 5 ) ^ 2 ^ 3.14", /* OK */ "(((((((1+2+3**(4 + 5))))))", /* bad parens */ "a^(b + c/d * .1e5)!", /* unknown op */ "(1**2)**3", /* OK */ 0 };

if (!init()) return 1; for (i = 0; tests[i]; i++) { printf("Testing string `%s' <enter>\n", tests[i]); getchar();

printf("string `%s': %s\n\n", tests[i], parse(tests[i]) ? "Ok" : "Error"); }

return 0; }</lang>

Python

Parenthesis are added to the operator table then special-cased in the code. This solution includes the extra credit. <lang python>from collections import namedtuple from pprint import pprint as pp

OpInfo = namedtuple('OpInfo', 'prec assoc') L, R = 'Left Right'.split()

ops = {

'^': OpInfo(prec=4, assoc=R),
'*': OpInfo(prec=3, assoc=L),
'/': OpInfo(prec=3, assoc=L),
'+': OpInfo(prec=2, assoc=L),
'-': OpInfo(prec=2, assoc=L),
'(': OpInfo(prec=9, assoc=L),
')': OpInfo(prec=0, assoc=L),
}

NUM, LPAREN, RPAREN = 'NUMBER ( )'.split()

def get_input(inp = None):

   'Inputs an expression and returns list of (TOKENTYPE, tokenvalue)'
   
   if inp is None:
       inp = input('expression: ')
   tokens = inp.strip().split()
   tokenvals = []
   for token in tokens:
       if token in ops:
           tokenvals.append((token, ops[token]))
       #elif token in (LPAREN, RPAREN):
       #    tokenvals.append((token, token))
       else:    
           tokenvals.append((NUM, token))
   return tokenvals

def shunting(tokenvals):

   outq, stack = [], []
   table = ['TOKEN,ACTION,RPN OUTPUT,OP STACK,NOTES'.split(',')]
   for token, val in tokenvals:
       note = action = 
       if token is NUM:
           action = 'Add number to output'
           outq.append(val)
           table.append( (val, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       elif token in ops:
           t1, (p1, a1) = token, val
           v = t1
           note = 'Pop ops from stack to output' 
           while stack:
               t2, (p2, a2) = stack[-1]
               if (a1 == L and p1 <= p2) or (a1 == R and p1 < p2):
                   if t1 != RPAREN:
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           break
                   else:        
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           stack.pop()
                           action = '(Pop & discard "(")'
                           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                           break
                   table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                   v = note = 
               else:
                   note = 
                   break
               note =  
           note =  
           if t1 != RPAREN:
               stack.append((token, val))
               action = 'Push op token to stack'
           else:
               action = 'Discard ")"'
           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
   note = 'Drain stack to output'
   while stack:
       v = 
       t2, (p2, a2) = stack[-1]
       action = '(Pop op)'
       stack.pop()
       outq.append(t2)
       table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       v = note = 
   return table

if __name__ == '__main__':

   infix = '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'
   print( 'For infix expression: %r\n' % infix )
   rp = shunting(get_input(infix))
   maxcolwidths = [len(max(x, key=len)) for x in zip(*rp)]
   row = rp[0]
   print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
   for row in rp[1:]:
       print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))

   print('\n The final output RPN is: %r' % rp[-1][2])</lang>

Sample output

For infix expression: '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'

TOKEN         ACTION                RPN OUTPUT         OP STACK            NOTES            
3     Add number to output   3                                                              
+     Push op token to stack 3                         +                                    
4     Add number to output   3 4                       +                                    
*     Push op token to stack 3 4                       + *                                  
2     Add number to output   3 4 2                     + *                                  
/     (Pop op)               3 4 2 *                   +        Pop ops from stack to output
      Push op token to stack 3 4 2 *                   + /                                  
(     Push op token to stack 3 4 2 *                   + / (                                
1     Add number to output   3 4 2 * 1                 + / (                                
-     Push op token to stack 3 4 2 * 1                 + / ( -                              
5     Add number to output   3 4 2 * 1 5               + / ( -                              
)     (Pop op)               3 4 2 * 1 5 -             + / (    Pop ops from stack to output
      (Pop & discard "(")    3 4 2 * 1 5 -             + /                                  
      Discard ")"            3 4 2 * 1 5 -             + /                                  
^     Push op token to stack 3 4 2 * 1 5 -             + / ^                                
2     Add number to output   3 4 2 * 1 5 - 2           + / ^                                
^     Push op token to stack 3 4 2 * 1 5 - 2           + / ^ ^                              
3     Add number to output   3 4 2 * 1 5 - 2 3         + / ^ ^                              
      (Pop op)               3 4 2 * 1 5 - 2 3 ^       + / ^    Drain stack to output       
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^     + /                                  
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ /   +                                    
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ / +                                      

 The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'

@@ Line 35: / Line 35: @@
 =={{header|C}}==
+{{output?|C}}
+{{incomplete|C|The test from the task description is not used}}
 Requires a functioning ANSI terminal and Enter key.
 <lang c>#include <sys/types.h>