Shunting-yard Algorithm, implemented based on reference pseudocode

Posted on

Problem

I have a lot of problems making this code look more elegant or to reduce its size. The algorithm is mostly copied from the pseudocode of wikipedia page. The difference is I prepare my string with the tokenSplit method.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace YardVer3
{
    class Program
    {
        public class Token
        {
            public Token()
            {
                Value = string.Empty;
                Info = '?';
                Precedence = -1;
            }
            public Token(string text)
            {
                Value = text;
            }
            public string Value;
            public char Info;       //d - digit f-function r,l - right/left operator e- unHandled
            public int Precedence;
        }
        static void Main(string[] args)
        {

            string inLine = "3 + 4 × 2 ÷ ( 1 − 5 ) ^ 2 ^ 3";
            Token[] lineToken = new Token[inLine.Length];
            for (int i = 0; i < lineToken.Length; i++)
            {
                lineToken[i] = new Token();
            }
            int tokenIndex = 0;
            tokenIndex = tokenSplit(inLine, tokenIndex, lineToken);

            //Start of Shunting Yard Algorithm(Referenced by Wikipedia PseudoCode)
            Queue<Token> rpnLine = new Queue<Token>();
            Stack<Token> opStack = new Stack<Token>();
            for (int i = 0; i < tokenIndex; i++)
            {
                switch (lineToken[i].Info)
                {
                    case 'd':
                        rpnLine.Enqueue(lineToken[i]);
                        break;
                    case 'f':
                        opStack.Push((lineToken[i]));
                        break;
                    case '(':
                        opStack.Push(new Token("("));
                        break;
                    case 'l':
                    case 'r':
                        if (opStack.Count>0)
                        {
                            Token t = opStack.Peek();
                            while (  t.Info == 'f' || 
                                    (t.Precedence > lineToken[i].Precedence) || 
                                    (t.Precedence == lineToken[i].Precedence && t.Info == 'l'))
                            {
                                rpnLine.Enqueue(opStack.Pop());
                                t = opStack.Peek();
                            }
                        }
                        opStack.Push(lineToken[i]);
                        break;
                    case ')':
                        while (opStack.Count > 0)
                        {
                            if (opStack.Peek().Value != "(")
                            {
                                rpnLine.Enqueue(opStack.Pop());
                            }
                            else
                            {
                                opStack.Pop();
                                break;
                            }
                        }
                        break;
                    default:
                        Console.WriteLine("Problem Accured");
                        break;
                }
            }
            while (opStack.Count >0)
            {
                rpnLine.Enqueue(opStack.Pop());
            }
            Console.WriteLine(inLine);
            while (rpnLine.Count > 0)
            {
                Console.Write(rpnLine.Dequeue().Value);
                Console.Write(' ');
            }
            Console.WriteLine();
        }
        private static int tokenSplit(string inLine, int tokenIndex, Token[] lineToken)
        {
            Match digit = Regex.Match(inLine, @"d+.?d*");
            Match funct = Regex.Match(inLine, @"[a-zA-Z]+");
            for (int i = 0; i < inLine.Length; i++)
            {
                if (char.IsDigit(inLine[i]))
                {
                    i += digit.Length - 1;
                    lineToken[tokenIndex].Info = 'd';
                    lineToken[tokenIndex].Value = digit.Value;                   
                    tokenIndex++;
                    digit = digit.NextMatch();
                }
                else if (char.IsLetter(inLine[i]))
                {
                    i += funct.Length - 1;
                    lineToken[tokenIndex].Info = 'f';                    
                    lineToken[tokenIndex].Value = funct.Value;                    
                    tokenIndex++;
                    funct = funct.NextMatch();
                }
                else if (inLine[i] != ' ' && inLine[i] != ',')
                {
                    lineToken[tokenIndex] = tokenInfoFill(inLine[i]);                        
                    lineToken[tokenIndex].Value = inLine[i].ToString();     
                    tokenIndex++;
                }
            }
            return tokenIndex;
        }
        public static Token tokenInfoFill(char c)
        {
            Token temp = new Token();
            switch (c)
            {
                case '+':
                case '-':
                case '−':
                    temp.Precedence = 2;
                    temp.Info = 'l';
                    return temp;
                case '*':
                case '×':
                case '/':
                case '÷':
                    temp.Precedence = 3;
                    temp.Info = 'l';
                    return temp;
                case '^':
                    temp.Precedence = 4;
                    temp.Info = 'r';
                    return temp;
                case '(':
                case ')':
                    temp.Precedence = -2;
                    temp.Info = c;
                    return temp;
                default:
                    return temp;
            }
        }
    }
}

Solution

Your Token class looks a lot like a struct (value type) to me. The main difference between a class and a struct is that two classes can have the exact same data and not be considered the same, while two structs are considered the same if the data values it contains are the same. Additionally, a struct can never be null, and must have a default value (which appears to be your intention based on the default constructor). If this is how this works, you should change this to a struct and override the .Equals and .GetHashCode functions.

Your Token class should also not be a child class of Program. Child classes are typically used when the child is special to the class–perhaps it will only ever be used within that class, or something.


string inLine = "3 + 4 × 2 ÷ ( 1 − 5 ) ^ 2 ^ 3";
Token[] lineToken = new Token[inLine.Length];

Here, you create an array of tokens for each character in the input. You don’t appear to account for spaces anywhere, so your array is nearly twice as large as it needs to be. You could use a List here, or a stack or a queue structure, if you only use it in a LIFO or FIFO structure; that would dynamically resize as you pushed tokens to it.

Alternately, you could exclude tokens you don’t want from the size. Reading your code, it looks like ' ' (space) and ',' (comma) are excluded. You could do inLine.Where(c => c != ' ' && c != ',').Count(), or something.


for (int i = 0; i < lineToken.Length; i++)
{
    lineToken[i] = new Token();
}

Here, you initialize each item to a non-null Token immediately. If you use a struct, this would be initialized to a non-null default value by default.


private static int tokenSplit(string inLine, int tokenIndex, Token[] lineToken)

Using camelCase is a Java thing. C# conventions dictate PascalCase for function names.


case '+':
case '-':
case '−':

No need to duplicate a case value.


In tokenInfoFill, you pass in a char value. Then you create an empty token, switch over the passed-in value, and assign the Precedence and Info values. Then return this newly-created token and assign the Value to char value you passed in. I would assign the Value when you create the token as well, to make this cleaner for the caller.

Additionally, I would not use a switch here. It’s not the worst option–certainly better than ifs, but I would use a dictionary and C# 7’s value tuples.

private readonly Dictionary<char, (int precedence, char info)> dict = new Dictionary<char, (int precedence, char info)>
{
    ['+'] = (2, 'l'),
    // ...
};

// in your method, you can deconstruct the tuple automatically
(temp.Precedence, temp.Info) = dict[c];

Finally, I would not use unique chars for the info flag. I would use an enum.

enum Info
{
    Unknown = 0,  // default value
    Digit = 1,
    Function = 2,
    Operator = 3,
    RightParenthesis = 4,
    LeftParenthesis = 5
}

Now you can do temp.Info = Info.Digit and

switch (temp.Info)
{
    case Info.Digit:
        break;
}

This way, you know exactly what you are looking at by the name. If necessary, you could also create groups of “info” flags using bit flags since an enum is an int behind the scenes (by default, that is–you can override that and make it a short or long or other numeric type).

Leave a Reply

Your email address will not be published. Required fields are marked *