Parsing/Shunting-yard algorithm

From Rosetta Code
Revision as of 05:44, 6 December 2011 by rosettacode>Paddy3118 (→‎{{header|C}}: Where there's a will ...)
Parsing/Shunting-yard algorithm is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Given the operator characteristics and input from the Shunting-yard algorithm page and tables Use the algorithm to show the changes in the operator stack and RPN output as each individual token is processed.

  • Assume an input of a correct, space separated, string of tokens
  • Generate a space separated output string representing the RPN
  • Test with the input string '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' then print and display the output here.
  • Operator precedence is given in this table:
operator precedence associativity
^ 4 Right
* 3 Left
/ 3 Left
+ 2 Left
- 2 Left
Extra credit
  • Add extra text explaining the actions and an optional comment for the action on receipt of each token.
Note
  • the handling of functions and arguments is not required.
See also

C

Requires a functioning ANSI terminal and Enter key. <lang c>#include <sys/types.h>

  1. include <regex.h>
  2. include <stdio.h>

typedef struct { char *s; int len, prec, assoc; } str_tok_t;

typedef struct { char * str; int assoc, prec; regex_t re; } pat_t;

enum assoc { A_NONE, A_L, A_R }; pat_t pat_eos = {"", A_NONE, 0};

pat_t pat_ops[] = { {"^\\)", A_NONE, -1}, {"^\\*\\*", A_R, 3}, {"^\\^", A_R, 3}, {"^\\*", A_L, 2}, {"^/", A_L, 2}, {"^\\+", A_L, 1}, {"^-", A_L, 1}, {0} };

pat_t pat_arg[] = { {"^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"}, {"^[a-zA-Z_][a-zA-Z_0-9]*"}, {"^\\(", A_L, -1}, {0} };

str_tok_t stack[256]; /* assume these are big enough */ str_tok_t queue[256]; int l_queue, l_stack;

  1. define qpush(x) queue[l_queue++] = x
  2. define spush(x) stack[l_stack++] = x
  3. define spop() stack[--l_stack]

void display(char *s) { int i; printf("\033[1;1H\033[JText | %s", s); printf("\nStack| "); for (i = 0; i < l_stack; i++) printf("%.*s ", stack[i].len, stack[i].s); printf("\nQueue| "); for (i = 0; i < l_queue; i++) printf("%.*s ", queue[i].len, queue[i].s); puts("\n\n<press enter>"); getchar(); }

int prec_booster;

  1. define fail(s1, s2) {fprintf(stderr, "[Error %s] %s\n", s1, s2); return 0;}

int init(void) { int i; pat_t *p;

for (i = 0, p = pat_ops; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

for (i = 0, p = pat_arg; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

return 1; }

pat_t* match(char *s, pat_t *p, str_tok_t * t, char **e) { int i; regmatch_t m;

while (*s == ' ') s++; *e = s;

if (!*s) return &pat_eos;

for (i = 0; p[i].str; i++) { if (regexec(&(p[i].re), s, 1, &m, REG_NOTEOL)) continue; t->s = s; *e = s + (t->len = m.rm_eo - m.rm_so); return p + i; } return 0; }

int parse(char *s) { pat_t *p; str_tok_t *t, tok;

prec_booster = l_queue = 0; display(s); while (*s) { p = match(s, pat_arg, &tok, &s); if (!p || p == &pat_eos) fail("parse arg", s);

/* Odd logic here. Don't actually stack the parens: don't need to. */ if (p->prec == -1) { prec_booster += 100; continue; } qpush(tok); display(s);

re_op: p = match(s, pat_ops, &tok, &s); if (!p) fail("parse op", s);

tok.assoc = p->assoc; tok.prec = p->prec;

if (p->prec > 0) tok.prec = p->prec + prec_booster; else if (p->prec == -1) { if (prec_booster < 100) fail("unmatched )", s); tok.prec = prec_booster; }

while (l_stack) { t = stack + l_stack - 1; if (!(t->prec == tok.prec && t->assoc == A_L) && t->prec <= tok.prec) break; qpush(spop()); display(s); }

if (p->prec == -1) { prec_booster -= 100; goto re_op; }

if (!p->prec) { display(s); if (prec_booster) fail("unmatched (", s); return 1; }

spush(tok); display(s); }

return 1; }

int main() { int i; char *tests[] = { "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3", /* RC mandated: OK */ "123", /* OK */ "3+4 * 2 / ( 1 - 5 ) ^ 2 ^ 3.14", /* OK */ "(((((((1+2+3**(4 + 5))))))", /* bad parens */ "a^(b + c/d * .1e5)!", /* unknown op */ "(1**2)**3", /* OK */ 0 };

if (!init()) return 1; for (i = 0; tests[i]; i++) { printf("Testing string `%s' <enter>\n", tests[i]); getchar();

printf("string `%s': %s\n\n", tests[i], parse(tests[i]) ? "Ok" : "Error"); }

return 0; }</lang>

Output

Note: This cannot give a flavour of the true interactive output where tokens are shuffled around every time the enter key is pressed.

Testing string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'   <enter>

Text | 3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| 
Queue| 

<press enter>

Text |  + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| 
Queue| 3 

<press enter>

Text |  4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 

<press enter>

Text |  * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 4 

<press enter>

Text |  2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + * 
Queue| 3 4 

<press enter>

Text |  / ( 1 - 5 ) ^ 2 ^ 3
Stack| + * 
Queue| 3 4 2 

<press enter>

Text |  ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 4 2 * 

<press enter>

Text |  ( 1 - 5 ) ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 

<press enter>

Text |  - 5 ) ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 1 

<press enter>

Text |  5 ) ^ 2 ^ 3
Stack| + / - 
Queue| 3 4 2 * 1 

<press enter>

Text |  ) ^ 2 ^ 3
Stack| + / - 
Queue| 3 4 2 * 1 5 

<press enter>

Text |  ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 1 5 - 

<press enter>

Text |  2 ^ 3
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 

<press enter>

Text |  ^ 3
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 2 

<press enter>

Text |  3
Stack| + / ^ ^ 
Queue| 3 4 2 * 1 5 - 2 

<press enter>

Text | 
Stack| + / ^ ^ 
Queue| 3 4 2 * 1 5 - 2 3 

<press enter>

Text | 
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 2 3 ^ 

<press enter>

Text | 
Stack| + / 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ 

<press enter>

Text | 
Stack| + 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / 

<press enter>

Text | 
Stack| 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + 

<press enter>

Text | 
Stack| 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + 

<press enter>

string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3': Ok

Testing string `123'   <enter>
^C

Python

Parenthesis are added to the operator table then special-cased in the code. This solution includes the extra credit. <lang python>from collections import namedtuple from pprint import pprint as pp

OpInfo = namedtuple('OpInfo', 'prec assoc') L, R = 'Left Right'.split()

ops = {

'^': OpInfo(prec=4, assoc=R),
'*': OpInfo(prec=3, assoc=L),
'/': OpInfo(prec=3, assoc=L),
'+': OpInfo(prec=2, assoc=L),
'-': OpInfo(prec=2, assoc=L),
'(': OpInfo(prec=9, assoc=L),
')': OpInfo(prec=0, assoc=L),
}

NUM, LPAREN, RPAREN = 'NUMBER ( )'.split()


def get_input(inp = None):

   'Inputs an expression and returns list of (TOKENTYPE, tokenvalue)'
   
   if inp is None:
       inp = input('expression: ')
   tokens = inp.strip().split()
   tokenvals = []
   for token in tokens:
       if token in ops:
           tokenvals.append((token, ops[token]))
       #elif token in (LPAREN, RPAREN):
       #    tokenvals.append((token, token))
       else:    
           tokenvals.append((NUM, token))
   return tokenvals

def shunting(tokenvals):

   outq, stack = [], []
   table = ['TOKEN,ACTION,RPN OUTPUT,OP STACK,NOTES'.split(',')]
   for token, val in tokenvals:
       note = action = 
       if token is NUM:
           action = 'Add number to output'
           outq.append(val)
           table.append( (val, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       elif token in ops:
           t1, (p1, a1) = token, val
           v = t1
           note = 'Pop ops from stack to output' 
           while stack:
               t2, (p2, a2) = stack[-1]
               if (a1 == L and p1 <= p2) or (a1 == R and p1 < p2):
                   if t1 != RPAREN:
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           break
                   else:        
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           stack.pop()
                           action = '(Pop & discard "(")'
                           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                           break
                   table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                   v = note = 
               else:
                   note = 
                   break
               note =  
           note =  
           if t1 != RPAREN:
               stack.append((token, val))
               action = 'Push op token to stack'
           else:
               action = 'Discard ")"'
           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
   note = 'Drain stack to output'
   while stack:
       v = 
       t2, (p2, a2) = stack[-1]
       action = '(Pop op)'
       stack.pop()
       outq.append(t2)
       table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       v = note = 
   return table

if __name__ == '__main__':

   infix = '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'
   print( 'For infix expression: %r\n' % infix )
   rp = shunting(get_input(infix))
   maxcolwidths = [len(max(x, key=len)) for x in zip(*rp)]
   row = rp[0]
   print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
   for row in rp[1:]:
       print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
   print('\n The final output RPN is: %r' % rp[-1][2])</lang>
Sample output
For infix expression: '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'

TOKEN         ACTION                RPN OUTPUT         OP STACK            NOTES            
3     Add number to output   3                                                              
+     Push op token to stack 3                         +                                    
4     Add number to output   3 4                       +                                    
*     Push op token to stack 3 4                       + *                                  
2     Add number to output   3 4 2                     + *                                  
/     (Pop op)               3 4 2 *                   +        Pop ops from stack to output
      Push op token to stack 3 4 2 *                   + /                                  
(     Push op token to stack 3 4 2 *                   + / (                                
1     Add number to output   3 4 2 * 1                 + / (                                
-     Push op token to stack 3 4 2 * 1                 + / ( -                              
5     Add number to output   3 4 2 * 1 5               + / ( -                              
)     (Pop op)               3 4 2 * 1 5 -             + / (    Pop ops from stack to output
      (Pop & discard "(")    3 4 2 * 1 5 -             + /                                  
      Discard ")"            3 4 2 * 1 5 -             + /                                  
^     Push op token to stack 3 4 2 * 1 5 -             + / ^                                
2     Add number to output   3 4 2 * 1 5 - 2           + / ^                                
^     Push op token to stack 3 4 2 * 1 5 - 2           + / ^ ^                              
3     Add number to output   3 4 2 * 1 5 - 2 3         + / ^ ^                              
      (Pop op)               3 4 2 * 1 5 - 2 3 ^       + / ^    Drain stack to output       
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^     + /                                  
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ /   +                                    
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ / +                                      

 The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'