Get string distance for a pseudo fuzzy search

Posted on

Problem

We are using the following for evaluating strings for a pseudo fuzzy search. Curious if anything sticks out as needing optimization

    function getEditDistance(a, b) {
    if (a.length == 0) return b.length;
    if (b.length == 0) return a.length;

    var matrix = [];
    var i;
    for (i = 0; i <= b.length; i++) {
        matrix[i] = [i];
    }

    var j;
    for (j = 0; j <= a.length; j++) {
        matrix[0][j] = j;
    }

    for (i = 1; i <= b.length; i++) {
        for (j = 1; j <= a.length; j++) {
            if (b.charAt(i - 1) == a.charAt(j - 1)) {
                matrix[i][j] = matrix[i - 1][j - 1];
            } else {
                matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1,
                    Math.min(matrix[i][j - 1] + 1,
                        matrix[i - 1][j] + 1));
            }
        }
    }

    return matrix[b.length][a.length];
};

Solution

The code becomes 25% faster if we extract values not changed in the loops to variables, but of course it’s observable only on large number of repetitions or long strings as tested in Chrome.

function getEditDistance(a, b) {
    var lenA = a.length, lenB = b.length;
    if (lenA == 0) return lenB;
    if (lenB == 0) return lenA;

    var matrix = [];
    var i;
    for (i = 0; i <= lenB; i++) {
        matrix[i] = [i];
    }

    var j, m0 = matrix[0];
    for (j = 0; j <= lenA; j++) {
        m0[j] = j;
    }

    for (i = 1; i <= lenB; i++) {
        var m = matrix[i], mPrev = matrix[i - 1];
        var bChar = b.charAt(i - 1);
        for (j = 1; j <= lenA; j++) {
            if (bChar == a.charAt(j - 1)) {
                m[j] = mPrev[j - 1];
            } else {
                m[j] = Math.min(mPrev[j - 1] + 1, Math.min(m[j - 1] + 1, mPrev[j] + 1));
            }
        }
    }

    return matrix[lenB][lenA];
}

P.S. Replacing s.charAt(i) with s[i] provides no tangible speedup and is not cross-browser.

Leave a Reply

Your email address will not be published. Required fields are marked *