Problem
I have a lot of problems making this code look more elegant or to reduce its size. The algorithm is mostly copied from the pseudocode of wikipedia page. The difference is I prepare my string with the tokenSplit
method.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace YardVer3
{
class Program
{
public class Token
{
public Token()
{
Value = string.Empty;
Info = '?';
Precedence = -1;
}
public Token(string text)
{
Value = text;
}
public string Value;
public char Info; //d - digit f-function r,l - right/left operator e- unHandled
public int Precedence;
}
static void Main(string[] args)
{
string inLine = "3 + 4 × 2 ÷ ( 1 − 5 ) ^ 2 ^ 3";
Token[] lineToken = new Token[inLine.Length];
for (int i = 0; i < lineToken.Length; i++)
{
lineToken[i] = new Token();
}
int tokenIndex = 0;
tokenIndex = tokenSplit(inLine, tokenIndex, lineToken);
//Start of Shunting Yard Algorithm(Referenced by Wikipedia PseudoCode)
Queue<Token> rpnLine = new Queue<Token>();
Stack<Token> opStack = new Stack<Token>();
for (int i = 0; i < tokenIndex; i++)
{
switch (lineToken[i].Info)
{
case 'd':
rpnLine.Enqueue(lineToken[i]);
break;
case 'f':
opStack.Push((lineToken[i]));
break;
case '(':
opStack.Push(new Token("("));
break;
case 'l':
case 'r':
if (opStack.Count>0)
{
Token t = opStack.Peek();
while ( t.Info == 'f' ||
(t.Precedence > lineToken[i].Precedence) ||
(t.Precedence == lineToken[i].Precedence && t.Info == 'l'))
{
rpnLine.Enqueue(opStack.Pop());
t = opStack.Peek();
}
}
opStack.Push(lineToken[i]);
break;
case ')':
while (opStack.Count > 0)
{
if (opStack.Peek().Value != "(")
{
rpnLine.Enqueue(opStack.Pop());
}
else
{
opStack.Pop();
break;
}
}
break;
default:
Console.WriteLine("Problem Accured");
break;
}
}
while (opStack.Count >0)
{
rpnLine.Enqueue(opStack.Pop());
}
Console.WriteLine(inLine);
while (rpnLine.Count > 0)
{
Console.Write(rpnLine.Dequeue().Value);
Console.Write(' ');
}
Console.WriteLine();
}
private static int tokenSplit(string inLine, int tokenIndex, Token[] lineToken)
{
Match digit = Regex.Match(inLine, @"d+.?d*");
Match funct = Regex.Match(inLine, @"[a-zA-Z]+");
for (int i = 0; i < inLine.Length; i++)
{
if (char.IsDigit(inLine[i]))
{
i += digit.Length - 1;
lineToken[tokenIndex].Info = 'd';
lineToken[tokenIndex].Value = digit.Value;
tokenIndex++;
digit = digit.NextMatch();
}
else if (char.IsLetter(inLine[i]))
{
i += funct.Length - 1;
lineToken[tokenIndex].Info = 'f';
lineToken[tokenIndex].Value = funct.Value;
tokenIndex++;
funct = funct.NextMatch();
}
else if (inLine[i] != ' ' && inLine[i] != ',')
{
lineToken[tokenIndex] = tokenInfoFill(inLine[i]);
lineToken[tokenIndex].Value = inLine[i].ToString();
tokenIndex++;
}
}
return tokenIndex;
}
public static Token tokenInfoFill(char c)
{
Token temp = new Token();
switch (c)
{
case '+':
case '-':
case '−':
temp.Precedence = 2;
temp.Info = 'l';
return temp;
case '*':
case '×':
case '/':
case '÷':
temp.Precedence = 3;
temp.Info = 'l';
return temp;
case '^':
temp.Precedence = 4;
temp.Info = 'r';
return temp;
case '(':
case ')':
temp.Precedence = -2;
temp.Info = c;
return temp;
default:
return temp;
}
}
}
}
Solution
Your Token
class looks a lot like a struct
(value type) to me. The main difference between a class
and a struct
is that two class
es can have the exact same data and not be considered the same, while two struct
s are considered the same if the data values it contains are the same. Additionally, a struct
can never be null
, and must have a default value (which appears to be your intention based on the default constructor). If this is how this works, you should change this to a struct
and override the .Equals
and .GetHashCode
functions.
Your Token
class should also not be a child class of Program
. Child classes are typically used when the child is special to the class–perhaps it will only ever be used within that class, or something.
string inLine = "3 + 4 × 2 ÷ ( 1 − 5 ) ^ 2 ^ 3";
Token[] lineToken = new Token[inLine.Length];
Here, you create an array of tokens for each character in the input. You don’t appear to account for spaces anywhere, so your array is nearly twice as large as it needs to be. You could use a List
here, or a stack or a queue structure, if you only use it in a LIFO or FIFO structure; that would dynamically resize as you pushed tokens to it.
Alternately, you could exclude tokens you don’t want from the size. Reading your code, it looks like ' '
(space) and ','
(comma) are excluded. You could do inLine.Where(c => c != ' ' && c != ',').Count()
, or something.
for (int i = 0; i < lineToken.Length; i++)
{
lineToken[i] = new Token();
}
Here, you initialize each item to a non-null Token
immediately. If you use a struct
, this would be initialized to a non-null default value by default.
private static int tokenSplit(string inLine, int tokenIndex, Token[] lineToken)
Using camelCase
is a Java thing. C# conventions dictate PascalCase
for function names.
case '+':
case '-':
case '−':
No need to duplicate a case
value.
In tokenInfoFill
, you pass in a char
value. Then you create an empty token, switch
over the passed-in value, and assign the Precedence
and Info
values. Then return this newly-created token and assign the Value
to char
value you passed in. I would assign the Value
when you create the token as well, to make this cleaner for the caller.
Additionally, I would not use a switch
here. It’s not the worst option–certainly better than if
s, but I would use a dictionary and C# 7’s value tuples.
private readonly Dictionary<char, (int precedence, char info)> dict = new Dictionary<char, (int precedence, char info)>
{
['+'] = (2, 'l'),
// ...
};
// in your method, you can deconstruct the tuple automatically
(temp.Precedence, temp.Info) = dict[c];
Finally, I would not use unique char
s for the info flag. I would use an enum
.
enum Info
{
Unknown = 0, // default value
Digit = 1,
Function = 2,
Operator = 3,
RightParenthesis = 4,
LeftParenthesis = 5
}
Now you can do temp.Info = Info.Digit
and
switch (temp.Info)
{
case Info.Digit:
break;
}
This way, you know exactly what you are looking at by the name. If necessary, you could also create groups of “info” flags using bit flags since an enum
is an int
behind the scenes (by default, that is–you can override that and make it a short
or long
or other numeric type).