Parsing/Shunting-yard algorithm: Difference between revisions

From Rosetta Code
Content added Content deleted
(Promoted to task)
Line 1: Line 1:
{{draft task}}
{{task}}
Given the operator characteristics and input from the [[wp:Shunting-yard_algorithm|Shunting-yard algorithm]] page and tables
Given the operator characteristics and input from the [[wp:Shunting-yard_algorithm|Shunting-yard algorithm]] page and tables
Use the algorithm to show the changes in the operator stack and RPN output
Use the algorithm to show the changes in the operator stack and RPN output

Revision as of 05:24, 8 December 2011

Task
Parsing/Shunting-yard algorithm
You are encouraged to solve this task according to the task description, using any language you may know.

Given the operator characteristics and input from the Shunting-yard algorithm page and tables Use the algorithm to show the changes in the operator stack and RPN output as each individual token is processed.

  • Assume an input of a correct, space separated, string of tokens
  • Generate a space separated output string representing the RPN
  • Test with the input string '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3' then print and display the output here.
  • Operator precedence is given in this table:
operator precedence associativity
^ 4 Right
* 3 Left
/ 3 Left
+ 2 Left
- 2 Left
Extra credit
  • Add extra text explaining the actions and an optional comment for the action on receipt of each token.
Note
  • the handling of functions and arguments is not required.
See also

AutoHotkey

Works with: AutoHotkey_L

<lang AHK>SetBatchLines -1

  1. NoEnv

expr := "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3"

output := "Testing string '" expr "'`r`n`r`nToken`tOutput Queue"

          . Space(StrLen(expr)-StrLen("Output Queue")+2) "OP Stack"
define a stack with semantic .push() and .pop() funcs

stack := {push: func("ObjInsert"), pop: func("ObjRemove"), peek: func("Peek")}

Loop Parse, expr, %A_Space% {

      token := A_LoopField
      if token is number
              Q .= token A_Space
      if isOp(token){
              o1 := token
              while   isOp(o2 := stack.peek())
                      and ((isLeft(o1)  and Precedence(o1) <= Precedence(o2))
                      or  (isRight(o1) and Precedence(o1) <  Precedence(o2)))
                  Q .= stack.pop() A_Space
              stack.push(o1)
      }
      If ( token = "(" )
              stack.push(token)
      If ( token = ")" )
      {
              While ((t := stack.pop()) != "(") && (t != "")
                      Q .= t A_Space
              if (t = "")
                      throw Exception("Unmatched parenthesis. "
                         . "Character number " A_Index)
      }
      output .= "`r`n" token Space(7) Q Space(StrLen(expr)+2-StrLen(Q)) 
              . Disp(stack)

} output .= "`r`n(empty stack to output)" While (t := stack.pop()) != ""

      if InStr("()", t)
              throw Exception("Unmatched parenthesis.")
      else    Q .= t A_Space, output .= "`r`n" Space(8) Q 
                      . Space(StrLen(expr)+2-StrLen(Q)) Disp(stack)

output .= "`r`n`r`nFinal string: '" Q "'" clipboard := output

isOp(t){

      return (!!InStr("+-*/^", t) && t)

} Peek(this){

      r := this.Remove(), this.Insert(r)
      return r

} IsLeft(o){

      return !!InStr("*/+-", o)

} IsRight(o){

      return o = "^"

} Precedence(o){

      return (InStr("+-/*^", o)+3)//2

} Disp(obj){

      for each, val in obj
              o := val . o
      return  o

} Space(n){

      return n>0 ? A_Space Space(n-1) : ""

}</lang>

Output
Testing string '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'

Token	Output Queue                   OP Stack
3       3                              
+       3                              +
4       3 4                            +
*       3 4                            *+
2       3 4 2                          *+
/       3 4 2 *                        /+
(       3 4 2 *                        (/+
1       3 4 2 * 1                      (/+
-       3 4 2 * 1                      -(/+
5       3 4 2 * 1 5                    -(/+
)       3 4 2 * 1 5 -                  /+
^       3 4 2 * 1 5 -                  ^/+
2       3 4 2 * 1 5 - 2                ^/+
^       3 4 2 * 1 5 - 2                ^^/+
3       3 4 2 * 1 5 - 2 3              ^^/+
(empty stack to output)
        3 4 2 * 1 5 - 2 3 ^            ^/+
        3 4 2 * 1 5 - 2 3 ^ ^          /+
        3 4 2 * 1 5 - 2 3 ^ ^ /        +
        3 4 2 * 1 5 - 2 3 ^ ^ / +      

Final string: '3 4 2 * 1 5 - 2 3 ^ ^ / + '

C

Requires a functioning ANSI terminal and Enter key. <lang c>#include <sys/types.h>

  1. include <regex.h>
  2. include <stdio.h>

typedef struct { const char *s; int len, prec, assoc; } str_tok_t;

typedef struct { const char * str; int assoc, prec; regex_t re; } pat_t;

enum assoc { A_NONE, A_L, A_R }; pat_t pat_eos = {"", A_NONE, 0};

pat_t pat_ops[] = { {"^\\)", A_NONE, -1}, {"^\\*\\*", A_R, 3}, {"^\\^", A_R, 3}, {"^\\*", A_L, 2}, {"^/", A_L, 2}, {"^\\+", A_L, 1}, {"^-", A_L, 1}, {0} };

pat_t pat_arg[] = { {"^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"}, {"^[a-zA-Z_][a-zA-Z_0-9]*"}, {"^\\(", A_L, -1}, {0} };

str_tok_t stack[256]; /* assume these are big enough */ str_tok_t queue[256]; int l_queue, l_stack;

  1. define qpush(x) queue[l_queue++] = x
  2. define spush(x) stack[l_stack++] = x
  3. define spop() stack[--l_stack]

void display(const char *s) { int i; printf("\033[1;1H\033[JText | %s", s); printf("\nStack| "); for (i = 0; i < l_stack; i++) printf("%.*s ", stack[i].len, stack[i].s); // uses C99 format strings printf("\nQueue| "); for (i = 0; i < l_queue; i++) printf("%.*s ", queue[i].len, queue[i].s); puts("\n\n<press enter>"); getchar(); }

int prec_booster;

  1. define fail(s1, s2) {fprintf(stderr, "[Error %s] %s\n", s1, s2); return 0;}

int init(void) { int i; pat_t *p;

for (i = 0, p = pat_ops; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

for (i = 0, p = pat_arg; p[i].str; i++) if (regcomp(&(p[i].re), p[i].str, REG_NEWLINE|REG_EXTENDED)) fail("comp", p[i].str);

return 1; }

pat_t* match(const char *s, pat_t *p, str_tok_t * t, const char **e) { int i; regmatch_t m;

while (*s == ' ') s++; *e = s;

if (!*s) return &pat_eos;

for (i = 0; p[i].str; i++) { if (regexec(&(p[i].re), s, 1, &m, REG_NOTEOL)) continue; t->s = s; *e = s + (t->len = m.rm_eo - m.rm_so); return p + i; } return 0; }

int parse(const char *s) { pat_t *p; str_tok_t *t, tok;

prec_booster = l_queue = 0; display(s); while (*s) { p = match(s, pat_arg, &tok, &s); if (!p || p == &pat_eos) fail("parse arg", s);

/* Odd logic here. Don't actually stack the parens: don't need to. */ if (p->prec == -1) { prec_booster += 100; continue; } qpush(tok); display(s);

re_op: p = match(s, pat_ops, &tok, &s); if (!p) fail("parse op", s);

tok.assoc = p->assoc; tok.prec = p->prec;

if (p->prec > 0) tok.prec = p->prec + prec_booster; else if (p->prec == -1) { if (prec_booster < 100) fail("unmatched )", s); tok.prec = prec_booster; }

while (l_stack) { t = stack + l_stack - 1; if (!(t->prec == tok.prec && t->assoc == A_L) && t->prec <= tok.prec) break; qpush(spop()); display(s); }

if (p->prec == -1) { prec_booster -= 100; goto re_op; }

if (!p->prec) { display(s); if (prec_booster) fail("unmatched (", s); return 1; }

spush(tok); display(s); }

return 1; }

int main() { int i; const char *tests[] = { "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3", /* RC mandated: OK */ "123", /* OK */ "3+4 * 2 / ( 1 - 5 ) ^ 2 ^ 3.14", /* OK */ "(((((((1+2+3**(4 + 5))))))", /* bad parens */ "a^(b + c/d * .1e5)!", /* unknown op */ "(1**2)**3", /* OK */ 0 };

if (!init()) return 1; for (i = 0; tests[i]; i++) { printf("Testing string `%s' <enter>\n", tests[i]); getchar();

printf("string `%s': %s\n\n", tests[i], parse(tests[i]) ? "Ok" : "Error"); }

return 0; }</lang>

Output

Note: This cannot give a flavour of the true interactive output where tokens are shuffled around every time the enter key is pressed.

Testing string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'   <enter>

Text | 3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| 
Queue| 

<press enter>

Text |  + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| 
Queue| 3 

<press enter>

Text |  4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 

<press enter>

Text |  * 2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 4 

<press enter>

Text |  2 / ( 1 - 5 ) ^ 2 ^ 3
Stack| + * 
Queue| 3 4 

<press enter>

Text |  / ( 1 - 5 ) ^ 2 ^ 3
Stack| + * 
Queue| 3 4 2 

<press enter>

Text |  ( 1 - 5 ) ^ 2 ^ 3
Stack| + 
Queue| 3 4 2 * 

<press enter>

Text |  ( 1 - 5 ) ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 

<press enter>

Text |  - 5 ) ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 1 

<press enter>

Text |  5 ) ^ 2 ^ 3
Stack| + / - 
Queue| 3 4 2 * 1 

<press enter>

Text |  ) ^ 2 ^ 3
Stack| + / - 
Queue| 3 4 2 * 1 5 

<press enter>

Text |  ^ 2 ^ 3
Stack| + / 
Queue| 3 4 2 * 1 5 - 

<press enter>

Text |  2 ^ 3
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 

<press enter>

Text |  ^ 3
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 2 

<press enter>

Text |  3
Stack| + / ^ ^ 
Queue| 3 4 2 * 1 5 - 2 

<press enter>

Text | 
Stack| + / ^ ^ 
Queue| 3 4 2 * 1 5 - 2 3 

<press enter>

Text | 
Stack| + / ^ 
Queue| 3 4 2 * 1 5 - 2 3 ^ 

<press enter>

Text | 
Stack| + / 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ 

<press enter>

Text | 
Stack| + 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / 

<press enter>

Text | 
Stack| 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + 

<press enter>

Text | 
Stack| 
Queue| 3 4 2 * 1 5 - 2 3 ^ ^ / + 

<press enter>

string `3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3': Ok

Testing string `123'   <enter>
^C

Python

Parenthesis are added to the operator table then special-cased in the code. This solution includes the extra credit. <lang python>from collections import namedtuple from pprint import pprint as pp

OpInfo = namedtuple('OpInfo', 'prec assoc') L, R = 'Left Right'.split()

ops = {

'^': OpInfo(prec=4, assoc=R),
'*': OpInfo(prec=3, assoc=L),
'/': OpInfo(prec=3, assoc=L),
'+': OpInfo(prec=2, assoc=L),
'-': OpInfo(prec=2, assoc=L),
'(': OpInfo(prec=9, assoc=L),
')': OpInfo(prec=0, assoc=L),
}

NUM, LPAREN, RPAREN = 'NUMBER ( )'.split()


def get_input(inp = None):

   'Inputs an expression and returns list of (TOKENTYPE, tokenvalue)'
   
   if inp is None:
       inp = input('expression: ')
   tokens = inp.strip().split()
   tokenvals = []
   for token in tokens:
       if token in ops:
           tokenvals.append((token, ops[token]))
       #elif token in (LPAREN, RPAREN):
       #    tokenvals.append((token, token))
       else:    
           tokenvals.append((NUM, token))
   return tokenvals

def shunting(tokenvals):

   outq, stack = [], []
   table = ['TOKEN,ACTION,RPN OUTPUT,OP STACK,NOTES'.split(',')]
   for token, val in tokenvals:
       note = action = 
       if token is NUM:
           action = 'Add number to output'
           outq.append(val)
           table.append( (val, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       elif token in ops:
           t1, (p1, a1) = token, val
           v = t1
           note = 'Pop ops from stack to output' 
           while stack:
               t2, (p2, a2) = stack[-1]
               if (a1 == L and p1 <= p2) or (a1 == R and p1 < p2):
                   if t1 != RPAREN:
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           break
                   else:        
                       if t2 != LPAREN:
                           stack.pop()
                           action = '(Pop op)'
                           outq.append(t2)
                       else:    
                           stack.pop()
                           action = '(Pop & discard "(")'
                           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                           break
                   table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
                   v = note = 
               else:
                   note = 
                   break
               note =  
           note =  
           if t1 != RPAREN:
               stack.append((token, val))
               action = 'Push op token to stack'
           else:
               action = 'Discard ")"'
           table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
   note = 'Drain stack to output'
   while stack:
       v = 
       t2, (p2, a2) = stack[-1]
       action = '(Pop op)'
       stack.pop()
       outq.append(t2)
       table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
       v = note = 
   return table

if __name__ == '__main__':

   infix = '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'
   print( 'For infix expression: %r\n' % infix )
   rp = shunting(get_input(infix))
   maxcolwidths = [len(max(x, key=len)) for x in zip(*rp)]
   row = rp[0]
   print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
   for row in rp[1:]:
       print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
   print('\n The final output RPN is: %r' % rp[-1][2])</lang>
Sample output
For infix expression: '3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3'

TOKEN         ACTION                RPN OUTPUT         OP STACK            NOTES            
3     Add number to output   3                                                              
+     Push op token to stack 3                         +                                    
4     Add number to output   3 4                       +                                    
*     Push op token to stack 3 4                       + *                                  
2     Add number to output   3 4 2                     + *                                  
/     (Pop op)               3 4 2 *                   +        Pop ops from stack to output
      Push op token to stack 3 4 2 *                   + /                                  
(     Push op token to stack 3 4 2 *                   + / (                                
1     Add number to output   3 4 2 * 1                 + / (                                
-     Push op token to stack 3 4 2 * 1                 + / ( -                              
5     Add number to output   3 4 2 * 1 5               + / ( -                              
)     (Pop op)               3 4 2 * 1 5 -             + / (    Pop ops from stack to output
      (Pop & discard "(")    3 4 2 * 1 5 -             + /                                  
      Discard ")"            3 4 2 * 1 5 -             + /                                  
^     Push op token to stack 3 4 2 * 1 5 -             + / ^                                
2     Add number to output   3 4 2 * 1 5 - 2           + / ^                                
^     Push op token to stack 3 4 2 * 1 5 - 2           + / ^ ^                              
3     Add number to output   3 4 2 * 1 5 - 2 3         + / ^ ^                              
      (Pop op)               3 4 2 * 1 5 - 2 3 ^       + / ^    Drain stack to output       
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^     + /                                  
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ /   +                                    
      (Pop op)               3 4 2 * 1 5 - 2 3 ^ ^ / +                                      

 The final output RPN is: '3 4 2 * 1 5 - 2 3 ^ ^ / +'

Ruby

See Parsing/RPN/Ruby

<lang ruby>rpn = RPNExpression.from_infix("3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3")</lang> outputs

for Infix expression: 3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3
Term	Action	Output	Stack
3	PUSH V	["3"]	[]
+	PUSH OP	["3"]	["+"]
4	PUSH V	["3", "4"]	["+"]
*	PUSH OP	["3", "4"]	["+", "*"]
2	PUSH V	["3", "4", "2"]	["+", "*"]
/	MUL	["3", "4", "2", "*"]	["+"]	* has higher precedence than /
/	PUSH OP	["3", "4", "2", "*"]	["+", "/"]
(	OPEN_P	["3", "4", "2", "*"]	["+", "/", "("]
1	PUSH V	["3", "4", "2", "*", "1"]	["+", "/", "("]
-	PUSH OP	["3", "4", "2", "*", "1"]	["+", "/", "(", "-"]
5	PUSH V	["3", "4", "2", "*", "1", "5"]	["+", "/", "(", "-"]
)	SUB	["3", "4", "2", "*", "1", "5", "-"]	["+", "/", "("]	unwinding parenthesis
)	CLOSE_P	["3", "4", "2", "*", "1", "5", "-"]	["+", "/"]
^	PUSH OP	["3", "4", "2", "*", "1", "5", "-"]	["+", "/", "^"]
2	PUSH V	["3", "4", "2", "*", "1", "5", "-", "2"]	["+", "/", "^"]
^	PUSH OP	["3", "4", "2", "*", "1", "5", "-", "2"]	["+", "/", "^", "^"]
3	PUSH V	["3", "4", "2", "*", "1", "5", "-", "2", "3"]	["+", "/", "^", "^"]
RPN = 3 4 2 * 1 5 - 2 3 ^ ^ / +

Tcl

<lang tcl>package require Tcl 8.5

  1. Helpers

proc tokenize {str} {

   regexp -all -inline {[\d.]+|[-*^+/()]} $str

} proc precedence op {

   dict get {^ 4 * 3 / 3 + 2 - 2} $op

} proc associativity op {

   if {$op eq "^"} {return "right"} else {return "left"}

}

proc shunting {expression} {

   set stack {}
   foreach token [tokenize $expression] {

if {[string is double $token]} { puts "add to output: $token" lappend output $token } elseif {$token eq "("} { puts "push parenthesis" lappend stack $token } elseif {$token eq ")"} { puts "popping to parenthesis" while {[lindex $stack end] ne "("} { lappend output [lindex $stack end] set stack [lreplace $stack end end] puts "...popped [lindex $output end] to output" } set stack [lreplace $stack end end] puts "...found parenthesis" } else { puts "adding operator: $token" set p [precedence $token] set a [associativity $token] while {[llength $stack]} { set o2 [lindex $stack end] if { $o2 ne "(" && (($a eq "left" && $p <= [precedence $o2]) || ($a eq "right" && $p < [precedence $o2])) } then { puts "...popped operator $o2 to output" lappend output $o2 set stack [lreplace $stack end end] } else { break } } lappend stack $token } puts "\t\tOutput:\t$output\n\t\tStack:\t$stack"

   }
   puts "transferring tokens from stack to output"
   lappend output {*}[lreverse $stack]

}

puts [shunting "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3"]</lang> Output:

add to output: 3
		Output:	3
		Stack:	
adding operator: +
		Output:	3
		Stack:	+
add to output: 4
		Output:	3 4
		Stack:	+
adding operator: *
		Output:	3 4
		Stack:	+ *
add to output: 2
		Output:	3 4 2
		Stack:	+ *
adding operator: /
...popped operator * to output
		Output:	3 4 2 *
		Stack:	+ /
push parenthesis
		Output:	3 4 2 *
		Stack:	+ / (
add to output: 1
		Output:	3 4 2 * 1
		Stack:	+ / (
adding operator: -
		Output:	3 4 2 * 1
		Stack:	+ / ( -
add to output: 5
		Output:	3 4 2 * 1 5
		Stack:	+ / ( -
popping to parenthesis
...popped - to output
...found parenthesis
		Output:	3 4 2 * 1 5 -
		Stack:	+ /
adding operator: ^
		Output:	3 4 2 * 1 5 -
		Stack:	+ / ^
add to output: 2
		Output:	3 4 2 * 1 5 - 2
		Stack:	+ / ^
adding operator: ^
		Output:	3 4 2 * 1 5 - 2
		Stack:	+ / ^ ^
add to output: 3
		Output:	3 4 2 * 1 5 - 2 3
		Stack:	+ / ^ ^
transferring tokens from stack to output
3 4 2 * 1 5 - 2 3 ^ ^ / +