Problem
Joining a project with some crazy LINQ sorting statements and after refactoring by the team using a custom comparer we found out that it is so slow for some data that you actually think it is an infinite loop.
How to fix this performance issue but still maintaining the sorting requirements?
private static IEnumerable<PolicyTransactionItem<Subject>> GetSubjectsForChangeTracking(
List<PolicyTransaction> transactions)
{
var ebcidicSortComparer = new EBCDICSortComparer();
var subjects = (from t in transactions
from s in t.PolicySubjects
select new PolicyTransactionItem<Subject>()
{
Item = s,
PolicyNumber = t.PolicyNumber,
FromDate = t.EffectiveDate,
ToDate = t.TermExpirationDate,
CoverageKey = t.CoverageKey,
AmBestNumber = t.IssuerAmBestCompanyNumber
});
subjects = subjects
.OrderBy(t => t.Item.FirstName, ebcidicSortComparer)
.ThenBy(t => t.Item.MiddleName)
.ThenBy(t => t.Item.LastName, ebcidicSortComparer)
.ThenBy(t => t.Item.NameSuffix)
.ThenBy(t => t.Item.DOB ?? DateTime.MaxValue)
.ThenBy(t => t.Item.SSNFromDb, new EBCDICSortComparer(true))
.ThenBy(t => t.Item.GenderCodeFromDb)
.ThenBy(t => t.Item.RelationToInsuredCodeFromDb)
.ThenBy(t => t.Item.RelationToPolicyHolderCodeFromDb)
.ThenBy(t => t.Item.DLNumberFromDb, ebcidicSortComparer)
.ThenBy(t => t.Item.DLStateFromDb, new EBCDICSortComparer(false))
.ThenByDescending(t => t.FromDate)
.ThenByDescending(t => t.ToDate).ToList();
return subjects;
}
public class EBCDICSortComparer : IComparer<string>
{
public EBCDICSortComparer(bool emptyStringLast)
{
_emptyStringLast = emptyStringLast;
}
public EBCDICSortComparer()
{
_emptyStringLast = false;
}
private readonly bool _emptyStringLast;
static readonly int[] AsciiToEBCDICArray = new int[256]{
0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15,
16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
64, 79,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97,
240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111,
124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214,
215,216,217,226,227,228,229,230,231,232,233, 74,224, 90, 95,109,
121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150,
151,152,153,162,163,164,165,166,167,168,169,192,106,208,161, 7,
32, 33, 34, 35, 36, 21, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27,
48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62,225,
65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 82, 83, 84, 85, 86, 87,
88, 89, 98, 99,100,101,102,103,104,105,112,113,114,115,116,117,
118,119,120,128,138,139,140,141,142,143,144,154,155,156,157,158,
159,160,170,171,172,173,174,175,176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,202,203,204,205,206,207,218,219,
220,221,222,223,234,235,236,237,238,239,250,251,252,253,254,255
};
public int Compare(string x, string y)
{
if (x == null)
return 1;
if (y == null)
return -1;
if (string.Equals(x, y))
return 0;
var xChars = x.ToCharArray();
var yChars = y.ToCharArray();
if (xChars.Length == 0)
return _emptyStringLast ? 1 : -1;
if (yChars.Length == 0)
return _emptyStringLast ? -1 : 1;
for (var i = 0; i < Math.Min(xChars.Length, yChars.Length); i++)
{
var charItemX = xChars[i];
var charItemY = yChars[i];
var ebcdicItemX = AsciiToEBCDICArray[charItemX];
var ebcdicItemY = AsciiToEBCDICArray[charItemY];
if (ebcdicItemX != ebcdicItemY)
return ebcdicItemX > ebcdicItemY ? 1 : -1;
}
return x.Length.CompareTo(y.Length);
}
}
[UPDATE]
Debugging one by one the fields it turns out the problem is in .ThenBy(t => t.Item.SSNFromDb, new EBCDICSortComparer(true))
. When I try to debug the comparer it seems that x
and y
are both null, always, and it turns out like a “forever loop”.
Adding this at the top of the Compare
method seems to fix the problem.
if (string.IsNullOrWhiteSpace(x) &&
string.IsNullOrWhiteSpace(y))
return 0;
Solution
AsciiToEBCDICArray
establish one-to-one relation between EBCDIC and ASCII characters, so you do not have to convert to check whether characters are identical. You need to convert only when ASCII characters are different. So, instead of this:
var charItemX = xChars[i];
var charItemY = yChars[i];
var ebcdicItemX = AsciiToEBCDICArray[charItemX];
var ebcdicItemY = AsciiToEBCDICArray[charItemY];
if (ebcdicItemX != ebcdicItemY)
return ebcdicItemX > ebcdicItemY ? 1 : -1;
you can have:
var charItemX = xChars[i];
var charItemY = yChars[i];
if(charItemX != charItemY)
return AsciiToEBCDICArray[charItemX] > AsciiToEBCDICArray[charItemY] ? 1: -1;
to save quite a few character conversions.
And finally convert the fields to EBCDIC once (on some temporary objects), then compare those; if it’s done in a comparator it will be converted over and over again for no good reason.
What I suggested is to map, sort, unmap, aka Schwartzian transform:
- fetch the values without most sorting except the date sorting,
- copy objects into some temporary wrapper that still has a reference to the original object
- map to EBCDIC in the wrapper
- sort (use a stable one so that the sorting by date is preserved)
- return a list with the original objects from the now sorted list.