Parsing cron expression

Posted on

Problem

I’m building my own scheduler and I took the first step by creating a cron expression parser.


To test it I used this expression to cover each case. (I guess the 52W is wrong but this doesn’t matter yet – validation comes later).

"14,18,3-39/3,52 0/5 14,18,3-39,52W ? JAN,MAR,SEP MON-WED,FRI#3 2002-2010"

Core

It starts with the tokenizer that has only one method with a loop that builds each token. It requires special handling of extensions like W, L or #.

class CronExpressionTokenizer
{
    private static readonly Dictionary<char, TokenType> TokenTypes = new Dictionary<char, TokenType>
    {
        [' '] = TokenType.FieldSeparator,
        [','] = TokenType.ListItemSeparator,
        ['-'] = TokenType.RangeSeparator,
        ['/'] = TokenType.StepSeparator,
        ['*'] = TokenType.Blank,
        ['?'] = TokenType.Blank,
        ['L'] = TokenType.Extension,
        ['W'] = TokenType.Extension,
        ['#'] = TokenType.Extension,
    };

    public static IEnumerable<Token> Tokenize(string text)
    {
        var position = (int?)null;
        var value = new StringBuilder();
        var lastTokenType = TokenType.None;

        var updateLastTokenType = new Func<Token, Token>(t => { lastTokenType = t.Type; return t; });

        for (var i = 0; i < text.Length; i++)
        {
            var c = text[i];

            var tokenType = TokenType.None;
            if (TokenTypes.TryGetValue(c, out tokenType))
            {
                // Special extension handling.
                var isNotExtension =
                    // Parsed as extension...
                    tokenType == TokenType.Extension &&
                    // but a "W" not after a value or field-separator.
                    (c == 'W' && (lastTokenType != TokenType.Value || lastTokenType == TokenType.FieldSeparator));

                if (isNotExtension)
                {
                    lastTokenType = TokenType.Value;
                    position = position ?? i;
                    value.Append(c);
                    continue;
                }

                if (value.Any())
                {
                    yield return updateLastTokenType(new Token
                    {
                        Type = TokenType.Value,
                        Position = position.Value,
                        Value = value.ToString()
                    });
                    lastTokenType = TokenType.Value;
                    position = i + 1;
                    value = new StringBuilder();
                }

                yield return updateLastTokenType(new Token
                {
                    Type = tokenType,
                    Position = i,
                    Value = c.ToString()
                });

                continue;
            }
            else
            {
                lastTokenType = TokenType.Value;
                position = position ?? i;
                value.Append(c);
            }
        }

        if (value.Any())
        {
            yield return new Token
            {
                Type = TokenType.Value,
                Position = position.Value,
                Value = value.ToString()
            };
        }
    }
}

The result for the test expression is:

Type                Position    Value
Value               0           14
ListItemSeparator   2           ,
Value               3           18
ListItemSeparator   5           ,
Value               6           3
RangeSeparator      7           -
Value               8           39
StepSeparator       10          /
Value               11          3
ListItemSeparator   12          ,
Value               13          52
FieldSeparator      15   
Value               16          0
StepSeparator       17          /
Value               18          5
FieldSeparator      19   
Value               20          14
ListItemSeparator   22          ,
Value               23          18
ListItemSeparator   25          ,
Value               26          3
RangeSeparator      27          -
Value               28          39
ListItemSeparator   30          ,
Value               31          52
Extension           33          W
FieldSeparator      34   
Value               34          JAN
Blank               35          ?
FieldSeparator      36   
ListItemSeparator   40          ,
Value               41          MAR
ListItemSeparator   44          ,
Value               45          SEP
FieldSeparator      48   
Value               49          MON
RangeSeparator      52          -
Value               53          WED
ListItemSeparator   56          ,
Value               57          FRI
Extension           60          #
Value               61          3
FieldSeparator      62   
Value               63          2002
RangeSeparator      67          -
Value               68          2010

Then the CronExpressionParser comes in play. It groups all tokens into fields TokenGroup and creates Subexpressions. It also parses names of months and days of week. This for names and ordinal extensions I use three dictionaries.

class CronExpressionParser
{
private static readonly IReadOnlyDictionary<string, int> DaysOfWeek = new[]
{
"SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"
}
.Select((dayOfWeek, index) => new { dayOfWeek, index }).ToDictionary(x => x.dayOfWeek, x => x.index + 1, StringComparer.OrdinalIgnoreCase);

private static readonly IReadOnlyDictionary<string, int> Months = new[]
{
"JAN", "FEB", "MAR", "APR", "MAI", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
}
.Select((month, index) => new { month, index }).ToDictionary(x => x.month, x => x.index + 1, StringComparer.OrdinalIgnoreCase);

private static readonly IReadOnlyDictionary<string, CronExtension> Extensions = new Dictionary<string, CronExtension>(StringComparer.OrdinalIgnoreCase)
{
["1"] = CronExtension.First,
["2"] = CronExtension.Second,
["3"] = CronExtension.Third,
["4"] = CronExtension.Fourth,
["5"] = CronExtension.Fifth,
["L"] = CronExtension.Last,
["W"] = CronExtension.Weekday,
};

public static IEnumerable<Subexpression> Parse(IEnumerable<Token> tokens)
{
var tokenGroups = GroupTokens(tokens);
foreach (var tokenGroup in tokenGroups)
{
var subexpression = ParseSubexpression(tokenGroup);
yield return subexpression;
}
}

private static IEnumerable<IGrouping<Type, Token>> GroupTokens(IEnumerable<Token> tokens)
{
var subexpressionTypes = new Queue<Type>(new[]
{
typeof(Second),
typeof(Minute),
typeof(Hour),
typeof(DayOfMonth),
typeof(Month),
typeof(DayOfWeek),
typeof(Year)
});

var field = new List<Token>();

foreach (var token in tokens)
{
if (token.Type == TokenType.FieldSeparator)
{
yield return new TokenGrouping(subexpressionTypes.Dequeue(), field);
field = new List<Token>();
continue;
}

field.Add(token);
}

yield return new TokenGrouping(subexpressionTypes.Dequeue(), field);
}

private static Subexpression ParseSubexpression(IGrouping<Type, Token> tokenGroup)
{
var ranges = new List<CronRange>();

var range = (CronRange?)null;
var lastTokenType = TokenType.None;

foreach (var token in tokenGroup)
{
if (token.Type == TokenType.Value)
{
var value = 0;
var valueParsed =
int.TryParse(token.Value, out value) ||
DaysOfWeek.TryGetValue(token.Value, out value) ||
Months.TryGetValue(token.Value, out value);

if (!valueParsed)
{
throw new Exception($"Invalid value "{token.Value}"" at {token.Position}."")

Solution

Leave a Reply

Your email address will not be published. Required fields are marked *