Parsing/Shunting-yard algorithm

From Rosetta Code
Revision as of 05:24, 8 December 2011 by rosettacode>Paddy3118 (Promoted to task)
Task
Parsing/Shunting-yard algorithm
You are encouraged to solve this task according to the task description, using any language you may know.

Given the operator characteristics and input from the Shunting-yard algorithm page and tables Use the algorithm to show the changes in the operator stack and RPN output as each individual token is processed.

  • Assume an input of a correct, space separated, string of tokens
  • Generate a space separated output string representing the RPN
  • Test with the input string '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' then print and display the output here.
  • Operator precedence is given in this table:
operator precedence associativity
^ 4 Right
* 3 Left
/ 3 Left
+ 2 Left
- 2 Left
Extra credit
  • Add extra text explaining the actions and an optional comment for the action on receipt of each token.
Note
  • the handling of functions and arguments is not required.
See also

AutoHotkey

Works with: AutoHotkey_L

<lang AHK>SetBatchLines -1

  1. NoEnv

expr := "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3"

output := "Testing string '" expr "'`r`n`r`nToken`tOutput Queue"

          . Space(StrLen(expr)-StrLen("Output Queue")+2) "OP Stack"
define a stack with semantic .push() and .pop() funcs

stack := {push: func("ObjInsert"), pop: func("ObjRemove"), peek: func("Peek")}

Loop Parse, expr, %A_Space% {

      token := A_LoopField
      if token is number
              Q .= token A_Space
      if isOp(token){
              o1 := token
              while   isOp(o2 := stack.peek())
                      and ((isLeft(o1)  and Precedence(o1) <= Precedence(o2))
                      or  (isRight(o1) and Precedence(o1) <  Precedence(o2)))
                  Q .= stack.pop() A_Space
              stack.push(o1)
      }
      If ( token = "(" )
              stack.push(token)
      If ( token = ")" )
      {
              While ((t := stack.pop()) != "(") && (t != "")
                      Q .= t A_Space
              if (t = "")
                      throw Exception("Unmatched parenthesis. "
                         . "Character number " A_Index)
      }
      output .= "`r`n" token Space(7) Q Space(StrLen(expr)+2-StrLen(Q)) 
              . Disp(stack)

} output .= "`r`n(empty stack to output)" While (t := stack.pop()) != ""

      if InStr("()", t)
              throw Exception("Unmatched parenthesis.")
      else    Q .= t A_Space, output .= "`r`n" Space(8) Q 
                      . Space(StrLen(expr)+2-StrLen(Q)) Disp(stack)

output .= "`r`n`r`nFinal string: '" Q "'" clipboard := output

isOp(t){

      return (!!InStr("+-*/^", t) && t)

} Peek(this){

      r := this.Remove(), this.Insert(r)
      return r

} IsLeft(o){

      return !!InStr("*/+-", o)

} IsRight(o){

      return o = "^"

} Precedence(o){

      return (InStr("+-/*^", o)+3)//2

} Disp(obj){

      for each, val in obj
              o := val . o
      return  o

} Space(n){

      return n>0 ? A_Space Space(n-1) : ""

}</lang>

Output
Testing string '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'

Token	Output Queue                   OP Stack
3       3                              
+       3                              +
4       3 4                            +
*       3 4                            *+
2       3 4 2                          *+
/       3 4 2 *                        /+
(       3 4 2 *                        (/+
1       3 4 2 * 1                      (/+
-       3 4 2 * 1                      -(/+
5       3 4 2 * 1 5                    -(/+
)       3 4 2 * 1 5 -                  /+
^       3 4 2 * 1 5 -                  ^/+
2       3 4 2 * 1 5 - 2                ^/+
^       3 4 2 * 1 5 - 2                ^^/+
3       3 4 2 * 1 5 - 2 3              ^^/+
(empty stack to output)
        3 4 2 * 1 5 - 2 3 ^            ^/+
        3 4 2 * 1 5 - 2 3 ^ ^          /+
        3 4 2 * 1 5 - 2 3 ^ ^ /        +
        3 4 2 * 1 5 - 2 3 ^ ^ / +      

Final string: '3 4 2 * 1 5 - 2 3 ^ ^ / + '

C

Requires a functioning ANSI terminal and Enter key. <lang c>#include <sys/types.h>

  1. include <regex.h>
  2. include <stdio.h>

typedef struct { const char *s; int len, prec, assoc; } str_tok_t;

typedef struct { const char * str; int assoc, prec; regex_t re; } pat_t;

enum assoc { A_NONE, A_L, A_R }; pat_t pat_eos = {"", A_NONE, 0};

pat_t pat_ops[] = { {"^\\)", A_NONE, -1}, {"^\\*\\*", A_R, 3}, {"^\\^", A_R, 3}, {"^\\*", A_L, 2}, {"^/", A_L, 2}, {"^\\+", A_L, 1}, {"^-", A_L, 1}, {0} };

pat_t pat_arg[] = { {"^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"}, {"^[a-zA-Z_][a-zA-Z_0-9]*"}, {"^\\(", A_L, -1}, {0} };

str_tok_t stack[256]; /* assume these are big enough */ str_tok_t queue[256]; int l_queue, l_stack;

  1. define qpush(x) queue[l_queue++] = x
  2. define spush(x) stack[l_stack++] = x
  3. define spop() stack[--l_stack]

void display(const char *s) { int i; printf("\033[1;1H\033[JText | %s", s); printf("\nStack| "); for (i = 0; i < l_stack; i++) printf("%.*s ", stack[i].len, stack[i].s); // uses C99 format strings printf("\nQueue| "); for (i = 0; i < l_queue; i++) printf("%.*s ", queue[i].len, queue[i].s); puts("\n\n<press enter>"); getchar(); }

int prec_booster;

  1. define fail(s1, s2) {fprintf(stderr, "[Error %s] %s\n", s1, s2); return 0;}

int init(void) { int i; pat_t *p;

for (i = 0, p = pat_ops; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

for (i = 0, p = pat_arg; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

return 1; }

pat_t* match(const char *s, pat_t *p, str_tok_t * t, const char **e) { int i; regmatch_t m;

while (*s == ' ') s++; *e = s;

if (!*s) return &pat_eos;

for (i = 0; p[i].str; i++) { if (regexec(&(p[i].re), s, 1, &m, REG_NOTEOL)) continue; t->s = s; *e = s + (t->len = m.rm_eo - m.rm_so); return p + i; } return 0; }

int parse(const char *s) { pat_t *p; str_tok_t *t, tok;

prec_booster = l_queue = 0; display(s); while (*s) { p = match(s, pat_arg, &tok, &s); if (!p || p == &pat_eos) fail("parse arg", s);

/* Odd logic here. Don't actually stack the parens: don't need to. */ if (p->prec == -1) { prec_booster += 100; continue; } qpush(tok); display(s);

re_op: p = match(s, pat_ops, &tok, &s); if (!p) fail("parse op", s);

tok.assoc = p->assoc; tok.prec = p->prec;

if (p->prec > 0) tok.prec = p->prec + prec_booster; else if (p->prec == -1) { if (prec_booster < 100) fail("unmatched )", s); tok.prec = prec_booster; }

while (l_stack) { t = stack + l_stack - 1; if (!(t->prec == tok.prec && t->assoc == A_L) && t->prec <= tok.prec) break; qpush(spop()); display(s); }

if (p->prec == -1) { prec_booster -= 100; goto re_op; }

if (!p->prec) { display(s); if (prec_booster) fail("unmatched (", s); return 1; }

spush(tok); display(s); }

return 1; }

int main() { int i; const char *tests[] = { "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3", /* RC mandated: OK */ "123", /* OK */ "3+4 * 2 / ( 1 - 5 ) ^ 2 ^ 3.14", /* OK */ "(((((((1+2+3**(4 + 5))))))", /* bad parens */ "a^(b + c/d * .1e5)!", /* unknown op */ "(1**2)**3", /* OK */ 0 };

if (!init()) return 1; for (i = 0; tests[i]; i++) { printf("Testing string `%s' <enter>\n", tests[i]); getchar();

printf("string `%s': %s\n\n", tests[i], parse(tests[i]) ? "Ok" : "Error"); }

return 0; }</lang>

Output

Note: This cannot give a flavour of the true interactive output where tokens are shuffled around every time the enter key is pressed.

Testing string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'   <enter>

Text | 3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| 
Queue| 

<press enter>

Text |  + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| 
Queue| 3 

<press enter>

Text |  4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 

<press enter>

Text |  * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 4 

<press enter>

Text |  2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + * 
Queue| 3 4 

<press enter>

Text |  / ( 1 - 5 ) ^ 2 ^ 3
Stack| + * 
Queue| 3 4 2 

<press enter>

Text |  ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 4 2 * 

<press enter>

Text |  ( 1 - 5 ) ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 

<press enter>

Text |  - 5 ) ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 1 

<press enter>

Text |  5 ) ^ 2 ^ 3
Stack| + / - 
Queue| 3 4 2 * 1 

<press enter>

Text |  ) ^ 2 ^ 3
Stack| + / - 
Queue| 3 4 2 * 1 5 

<press enter>

Text |  ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 1 5 - 

<press enter>

Text |  2 ^ 3
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 

<press enter>

Text |  ^ 3
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 2 

<press enter>

Text |  3
Stack| + / ^ ^ 
Queue| 3 4 2 * 1 5 - 2 

<press enter>

Text | 
Stack| + / ^ ^ 
Queue| 3 4 2 * 1 5 - 2 3 

<press enter>

Text | 
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 2 3 ^ 

<press enter>

Text | 
Stack| + / 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ 

<press enter>

Text | 
Stack| + 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / 

<press enter>

Text | 
Stack| 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + 

<press enter>

Text | 
Stack| 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + 

<press enter>

string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3': Ok

Testing string `123'   <enter>
^C

Python

Parenthesis are added to the operator table then special-cased in the code. This solution includes the extra credit. <lang python>from collections import namedtuple from pprint import pprint as pp

OpInfo = namedtuple('OpInfo', 'prec assoc') L, R = 'Left Right'.split()

ops = {

'^': OpInfo(prec=4, assoc=R),
'*': OpInfo(prec=3, assoc=L),
'/': OpInfo(prec=3, assoc=L),
'+': OpInfo(prec=2, assoc=L),
'-': OpInfo(prec=2, assoc=L),
'(': OpInfo(prec=9, assoc=L),
')': OpInfo(prec=0, assoc=L),
}

NUM, LPAREN, RPAREN = 'NUMBER ( )'.split()


def get_input(inp = None):

   'Inputs an expression and returns list of (TOKENTYPE, tokenvalue)'
   
   if inp is None:
       inp = input('expression: ')
   tokens = inp.strip().split()
   tokenvals = []
   for token in tokens:
       if token in ops:
           tokenvals.append((token, ops[token]))
       #elif token in (LPAREN, RPAREN):
       #    tokenvals.append((token, token))
       else:    
           tokenvals.append((NUM, token))
   return tokenvals

def shunting(tokenvals):

   outq, stack = [], []
   table = ['TOKEN,ACTION,RPN OUTPUT,OP STACK,NOTES'.split(',')]
   for token, val in tokenvals:
       note = action = 
       if token is NUM:
           action = 'Add number to output'
           outq.append(val)
           table.append( (val, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       elif token in ops:
           t1, (p1, a1) = token, val
           v = t1
           note = 'Pop ops from stack to output' 
           while stack:
               t2, (p2, a2) = stack[-1]
               if (a1 == L and p1 <= p2) or (a1 == R and p1 < p2):
                   if t1 != RPAREN:
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           break
                   else:        
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           stack.pop()
                           action = '(Pop & discard "(")'
                           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                           break
                   table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                   v = note = 
               else:
                   note = 
                   break
               note =  
           note =  
           if t1 != RPAREN:
               stack.append((token, val))
               action = 'Push op token to stack'
           else:
               action = 'Discard ")"'
           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
   note = 'Drain stack to output'
   while stack:
       v = 
       t2, (p2, a2) = stack[-1]
       action = '(Pop op)'
       stack.pop()
       outq.append(t2)
       table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       v = note = 
   return table

if __name__ == '__main__':

   infix = '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'
   print( 'For infix expression: %r\n' % infix )
   rp = shunting(get_input(infix))
   maxcolwidths = [len(max(x, key=len)) for x in zip(*rp)]
   row = rp[0]
   print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
   for row in rp[1:]:
       print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
   print('\n The final output RPN is: %r' % rp[-1][2])</lang>
Sample output
For infix expression: '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'

TOKEN         ACTION                RPN OUTPUT         OP STACK            NOTES            
3     Add number to output   3                                                              
+     Push op token to stack 3                         +                                    
4     Add number to output   3 4                       +                                    
*     Push op token to stack 3 4                       + *                                  
2     Add number to output   3 4 2                     + *                                  
/     (Pop op)               3 4 2 *                   +        Pop ops from stack to output
      Push op token to stack 3 4 2 *                   + /                                  
(     Push op token to stack 3 4 2 *                   + / (                                
1     Add number to output   3 4 2 * 1                 + / (                                
-     Push op token to stack 3 4 2 * 1                 + / ( -                              
5     Add number to output   3 4 2 * 1 5               + / ( -                              
)     (Pop op)               3 4 2 * 1 5 -             + / (    Pop ops from stack to output
      (Pop & discard "(")    3 4 2 * 1 5 -             + /                                  
      Discard ")"            3 4 2 * 1 5 -             + /                                  
^     Push op token to stack 3 4 2 * 1 5 -             + / ^                                
2     Add number to output   3 4 2 * 1 5 - 2           + / ^                                
^     Push op token to stack 3 4 2 * 1 5 - 2           + / ^ ^                              
3     Add number to output   3 4 2 * 1 5 - 2 3         + / ^ ^                              
      (Pop op)               3 4 2 * 1 5 - 2 3 ^       + / ^    Drain stack to output       
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^     + /                                  
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ /   +                                    
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ / +                                      

 The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'

Ruby

See Parsing/RPN/Ruby

<lang ruby>rpn = RPNExpression.from_infix("3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3")</lang> outputs

for Infix expression: 3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Term	Action	Output	Stack
3	PUSH V	["3"]	[]
+	PUSH OP	["3"]	["+"]
4	PUSH V	["3", "4"]	["+"]
*	PUSH OP	["3", "4"]	["+", "*"]
2	PUSH V	["3", "4", "2"]	["+", "*"]
/	MUL	["3", "4", "2", "*"]	["+"]	* has higher precedence than /
/	PUSH OP	["3", "4", "2", "*"]	["+", "/"]
(	OPEN_P	["3", "4", "2", "*"]	["+", "/", "("]
1	PUSH V	["3", "4", "2", "*", "1"]	["+", "/", "("]
-	PUSH OP	["3", "4", "2", "*", "1"]	["+", "/", "(", "-"]
5	PUSH V	["3", "4", "2", "*", "1", "5"]	["+", "/", "(", "-"]
)	SUB	["3", "4", "2", "*", "1", "5", "-"]	["+", "/", "("]	unwinding parenthesis
)	CLOSE_P	["3", "4", "2", "*", "1", "5", "-"]	["+", "/"]
^	PUSH OP	["3", "4", "2", "*", "1", "5", "-"]	["+", "/", "^"]
2	PUSH V	["3", "4", "2", "*", "1", "5", "-", "2"]	["+", "/", "^"]
^	PUSH OP	["3", "4", "2", "*", "1", "5", "-", "2"]	["+", "/", "^", "^"]
3	PUSH V	["3", "4", "2", "*", "1", "5", "-", "2", "3"]	["+", "/", "^", "^"]
RPN = 3 4 2 * 1 5 - 2 3 ^ ^ / +

Tcl

<lang tcl>package require Tcl 8.5

  1. Helpers

proc tokenize {str} {

   regexp -all -inline {[\d.]+|[-*^+/()]} $str

} proc precedence op {

   dict get {^ 4 * 3 / 3 + 2 - 2} $op

} proc associativity op {

   if {$op eq "^"} {return "right"} else {return "left"}

}

proc shunting {expression} {

   set stack {}
   foreach token [tokenize $expression] {

if {[string is double $token]} { puts "add to output: $token" lappend output $token } elseif {$token eq "("} { puts "push parenthesis" lappend stack $token } elseif {$token eq ")"} { puts "popping to parenthesis" while {[lindex $stack end] ne "("} { lappend output [lindex $stack end] set stack [lreplace $stack end end] puts "...popped [lindex $output end] to output" } set stack [lreplace $stack end end] puts "...found parenthesis" } else { puts "adding operator: $token" set p [precedence $token] set a [associativity $token] while {[llength $stack]} { set o2 [lindex $stack end] if { $o2 ne "(" && (($a eq "left" && $p <= [precedence $o2]) || ($a eq "right" && $p < [precedence $o2])) } then { puts "...popped operator $o2 to output" lappend output $o2 set stack [lreplace $stack end end] } else { break } } lappend stack $token } puts "\t\tOutput:\t$output\n\t\tStack:\t$stack"

   }
   puts "transferring tokens from stack to output"
   lappend output {*}[lreverse $stack]

}

puts [shunting "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3"]</lang> Output:

add to output: 3
		Output:	3
		Stack:	
adding operator: +
		Output:	3
		Stack:	+
add to output: 4
		Output:	3 4
		Stack:	+
adding operator: *
		Output:	3 4
		Stack:	+ *
add to output: 2
		Output:	3 4 2
		Stack:	+ *
adding operator: /
...popped operator * to output
		Output:	3 4 2 *
		Stack:	+ /
push parenthesis
		Output:	3 4 2 *
		Stack:	+ / (
add to output: 1
		Output:	3 4 2 * 1
		Stack:	+ / (
adding operator: -
		Output:	3 4 2 * 1
		Stack:	+ / ( -
add to output: 5
		Output:	3 4 2 * 1 5
		Stack:	+ / ( -
popping to parenthesis
...popped - to output
...found parenthesis
		Output:	3 4 2 * 1 5 -
		Stack:	+ /
adding operator: ^
		Output:	3 4 2 * 1 5 -
		Stack:	+ / ^
add to output: 2
		Output:	3 4 2 * 1 5 - 2
		Stack:	+ / ^
adding operator: ^
		Output:	3 4 2 * 1 5 - 2
		Stack:	+ / ^ ^
add to output: 3
		Output:	3 4 2 * 1 5 - 2 3
		Stack:	+ / ^ ^
transferring tokens from stack to output
3 4 2 * 1 5 - 2 3 ^ ^ / +