Problem
I’m building my own scheduler and I took the first step by creating a cron expression parser.
To test it I used this expression to cover each case. (I guess the 52W is wrong but this doesn’t matter yet – validation comes later).
"14,18,3-39/3,52 0/5 14,18,3-39,52W ? JAN,MAR,SEP MON-WED,FRI#3 2002-2010"
Core
It starts with the tokenizer that has only one method with a loop that builds each token. It requires special handling of extensions like W
, L
or #
.
class CronExpressionTokenizer
{
private static readonly Dictionary<char, TokenType> TokenTypes = new Dictionary<char, TokenType>
{
[' '] = TokenType.FieldSeparator,
[','] = TokenType.ListItemSeparator,
['-'] = TokenType.RangeSeparator,
['/'] = TokenType.StepSeparator,
['*'] = TokenType.Blank,
['?'] = TokenType.Blank,
['L'] = TokenType.Extension,
['W'] = TokenType.Extension,
['#'] = TokenType.Extension,
};
public static IEnumerable<Token> Tokenize(string text)
{
var position = (int?)null;
var value = new StringBuilder();
var lastTokenType = TokenType.None;
var updateLastTokenType = new Func<Token, Token>(t => { lastTokenType = t.Type; return t; });
for (var i = 0; i < text.Length; i++)
{
var c = text[i];
var tokenType = TokenType.None;
if (TokenTypes.TryGetValue(c, out tokenType))
{
// Special extension handling.
var isNotExtension =
// Parsed as extension...
tokenType == TokenType.Extension &&
// but a "W" not after a value or field-separator.
(c == 'W' && (lastTokenType != TokenType.Value || lastTokenType == TokenType.FieldSeparator));
if (isNotExtension)
{
lastTokenType = TokenType.Value;
position = position ?? i;
value.Append(c);
continue;
}
if (value.Any())
{
yield return updateLastTokenType(new Token
{
Type = TokenType.Value,
Position = position.Value,
Value = value.ToString()
});
lastTokenType = TokenType.Value;
position = i + 1;
value = new StringBuilder();
}
yield return updateLastTokenType(new Token
{
Type = tokenType,
Position = i,
Value = c.ToString()
});
continue;
}
else
{
lastTokenType = TokenType.Value;
position = position ?? i;
value.Append(c);
}
}
if (value.Any())
{
yield return new Token
{
Type = TokenType.Value,
Position = position.Value,
Value = value.ToString()
};
}
}
}
The result for the test expression is:
Type Position Value
Value 0 14
ListItemSeparator 2 ,
Value 3 18
ListItemSeparator 5 ,
Value 6 3
RangeSeparator 7 -
Value 8 39
StepSeparator 10 /
Value 11 3
ListItemSeparator 12 ,
Value 13 52
FieldSeparator 15
Value 16 0
StepSeparator 17 /
Value 18 5
FieldSeparator 19
Value 20 14
ListItemSeparator 22 ,
Value 23 18
ListItemSeparator 25 ,
Value 26 3
RangeSeparator 27 -
Value 28 39
ListItemSeparator 30 ,
Value 31 52
Extension 33 W
FieldSeparator 34
Value 34 JAN
Blank 35 ?
FieldSeparator 36
ListItemSeparator 40 ,
Value 41 MAR
ListItemSeparator 44 ,
Value 45 SEP
FieldSeparator 48
Value 49 MON
RangeSeparator 52 -
Value 53 WED
ListItemSeparator 56 ,
Value 57 FRI
Extension 60 #
Value 61 3
FieldSeparator 62
Value 63 2002
RangeSeparator 67 -
Value 68 2010
Then the CronExpressionParser
comes in play. It groups all tokens into fields TokenGroup
and creates Subexpression
s. It also parses names of months and days of week. This for names and ordinal extensions I use three dictionaries.
class CronExpressionParser
{
private static readonly IReadOnlyDictionary<string, int> DaysOfWeek = new[]
{
"SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"
}
.Select((dayOfWeek, index) => new { dayOfWeek, index }).ToDictionary(x => x.dayOfWeek, x => x.index + 1, StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyDictionary<string, int> Months = new[]
{
"JAN", "FEB", "MAR", "APR", "MAI", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
}
.Select((month, index) => new { month, index }).ToDictionary(x => x.month, x => x.index + 1, StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyDictionary<string, CronExtension> Extensions = new Dictionary<string, CronExtension>(StringComparer.OrdinalIgnoreCase)
{
["1"] = CronExtension.First,
["2"] = CronExtension.Second,
["3"] = CronExtension.Third,
["4"] = CronExtension.Fourth,
["5"] = CronExtension.Fifth,
["L"] = CronExtension.Last,
["W"] = CronExtension.Weekday,
};
public static IEnumerable<Subexpression> Parse(IEnumerable<Token> tokens)
{
var tokenGroups = GroupTokens(tokens);
foreach (var tokenGroup in tokenGroups)
{
var subexpression = ParseSubexpression(tokenGroup);
yield return subexpression;
}
}
private static IEnumerable<IGrouping<Type, Token>> GroupTokens(IEnumerable<Token> tokens)
{
var subexpressionTypes = new Queue<Type>(new[]
{
typeof(Second),
typeof(Minute),
typeof(Hour),
typeof(DayOfMonth),
typeof(Month),
typeof(DayOfWeek),
typeof(Year)
});
var field = new List<Token>();
foreach (var token in tokens)
{
if (token.Type == TokenType.FieldSeparator)
{
yield return new TokenGrouping(subexpressionTypes.Dequeue(), field);
field = new List<Token>();
continue;
}
field.Add(token);
}
yield return new TokenGrouping(subexpressionTypes.Dequeue(), field);
}
private static Subexpression ParseSubexpression(IGrouping<Type, Token> tokenGroup)
{
var ranges = new List<CronRange>();
var range = (CronRange?)null;
var lastTokenType = TokenType.None;
foreach (var token in tokenGroup)
{
if (token.Type == TokenType.Value)
{
var value = 0;
var valueParsed =
int.TryParse(token.Value, out value) ||
DaysOfWeek.TryGetValue(token.Value, out value) ||
Months.TryGetValue(token.Value, out value);
if (!valueParsed)
{
throw new Exception($"Invalid value "{token.Value}"" at {token.Position}."")
Solution