Problem
I have (designed and) implemented this fancy sorting algorithm that combines natural runs in the input array with a heap data structure:
function ArrayRangeException(message) {
this.message = message;
this.getMessage = function() { return this.message; }
}
function RangeCheck(arrayLength, fromIndex, toIndex) {
if (fromIndex < 0) {
throw new ArrayRangeException("'fromIndex' is negative: " + fromIndex);
}
if (toIndex > arrayLength) {
throw new ArrayRangeException(
"'toIndex' is too large: " + toIndex + ", array length: " +
arrayLength);
}
if (fromIndex > toIndex) {
throw new ArrayRangeException(
"fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
}
}
function RunHeap(array, cmp) {
this.cmp = cmp;
this.array = array;
const auxArrayLength = (array.length >>> 1) + 1;
this.fromIndexArray = Array(auxArrayLength);
this.toIndexArray = Array(auxArrayLength);
this.size = 0;
this.pushRun = function(fromIndex, toIndex) {
const nodeIndex = this.size++;
this.fromIndexArray[nodeIndex] = fromIndex;
this.toIndexArray[nodeIndex] = toIndex;
},
this.popElement = function() {
const returnValue = this.array[this.fromIndexArray[0]];
this.fromIndexArray[0]++;
if (this.fromIndexArray[0] === this.toIndexArray[0]) {
const last1 = this.fromIndexArray[--this.size];
this.fromIndexArray[0] = last1;
const last2 = this.toIndexArray[this.size];
this.toIndexArray[0] = last2;
}
this.siftDown(0);
return returnValue;
},
this.swap = function(array, index1, index2) {
const tmp = array[index1];
array[index1] = array[index2];
array[index2] = tmp;
},
this.isLessThan = function(runIndex1, runIndex2) {
const element1 = this.array[this.fromIndexArray[runIndex1]];
const element2 = this.array[this.fromIndexArray[runIndex2]];
const cmp = this.cmp(element1, element2);
if (cmp != 0) {
return cmp < 0;
}
return this.fromIndexArray[runIndex1] < this.fromIndexArray[runIndex2];
},
this.siftDown = function(index) {
let nodeIndex = index;
let leftChildIndex = (index << 1) + 1
let rightChildIndex = leftChildIndex + 1;
let minIndex = index;
while (true) {
if (leftChildIndex < this.size
&& this.isLessThan(leftChildIndex, nodeIndex)) {
minIndex = leftChildIndex;
}
if (rightChildIndex < this.size
&& this.isLessThan(rightChildIndex, minIndex)) {
minIndex = rightChildIndex;
}
if (minIndex === nodeIndex) {
return;
}
this.swap(this.fromIndexArray, minIndex, nodeIndex);
this.swap(this.toIndexArray, minIndex, nodeIndex);
nodeIndex = minIndex;
leftChildIndex = (nodeIndex << 1) + 1;
rightChildIndex = leftChildIndex + 1;
}
},
this.buildHeap = function() {
for (i = Math.floor(this.size / 2); i >= 0; --i) {
this.siftDown(i);
}
},
this.extendRun = function(length) {
this.toIndexArray[this.size - 1] += length;
},
this.appendRun = function(fromIndex, toIndex) {
this.fromIndexArray[this.size] = fromIndex;
this.toIndexArray[this.size] = toIndex;
this.size++;
}
}
function reverseRun(array, fromIndex, toIndex) {
for (i1 = fromIndex, i2 = toIndex; i1 < i2; i1++, i2--) {
const savedArrayComponent = array[i1];
array[i1] = array[i2];
array[i2] = savedArrayComponent;
}
}
function createRunHeap(array, cmp) {
let runHeap = new RunHeap(array, cmp);
let left = 0;
let right = 1;
let last = array.length - 1;
let previousWasDescending = false;
while (left < last) {
head = left;
right = left + 1;
if (cmp(array[left], array[right]) <= 0) {
while (left < last && cmp(array[left], array[right]) <= 0) {
++left;
++right;
}
if (previousWasDescending) {
if (cmp(array[head - 1], array[head]) <= 0) {
runHeap.extendRun(right - head);
} else {
runHeap.appendRun(head, right);
}
} else {
runHeap.appendRun(head, right);
}
previousWasDescending = false;
} else { // Scan a descending run:
while (left < last && cmp(array[left], array[right]) > 0) {
++left;
++right;
}
reverseRun(array, head, left);
if (previousWasDescending) {
if (cmp(array[head - 1], array[head]) <= 0) {
runHeap.extendRun(right - head);
} else {
runHeap.appendRun(head, right);
}
} else {
runHeap.appendRun(head, right);
}
previousWasDescending = true;
}
++left;
++right;
}
if (left === last) {
if (cmp(array[last - 1], array[last]) <= 0) {
runHeap.extendRun(1);
} else {
runHeap.appendRun(last, last + 1);
}
}
return runHeap;
}
Array.prototype.heapSelectionSort =
function(cmp, fromIndex, toIndex) {
if (!cmp) {
cmp = (a, b) => a - b;
}
if (!fromIndex) {
fromIndex = 0;
}
if (!toIndex) {
toIndex = this.length;
}
RangeCheck(this.length, fromIndex, toIndex);
if (toIndex - fromIndex < 2) {
return this;
}
const aux = this.slice(fromIndex, toIndex);
const runHeap = createRunHeap(aux, cmp);
runHeap.buildHeap();
let index = fromIndex;
while (index < toIndex) {
this[index] = runHeap.popElement();
index++;
}
return this;
};
(The entire demo gist is here: https://gist.github.com/coderodde/47ae57983954c89ab7bef21b3fa7b232)
Critique request
Since I am not a professional Javascript developer, I would like to make my code above on par with idiomatic JS code.
Solution
Some possible improvements:
Your ArrayRangeException
has a getMessage
property that is never referenced elsewhere (even if a consumer were to know about it, it would be much simpler for it to just use the .message
property). But it’s still basically just an object wrapper around a string. Since you’re going to be throwing, you might consider using a built-in RangeError instead, eg:
if (toIndex > arrayLength) {
throw new RangeError(
"'toIndex' is too large: " + toIndex + ', array length: ' +
arrayLength);
}
You can also consider using template literals, which some consider to be more readable than concatenation:
if (toIndex > arrayLength) {
throw new RangeError(`'toIndex' is too large: ${toIndex}, array length: ${arrayLength}`);
}
RangeCheck
isn’t a constructor, so it probably shouldn’t be capitalized.
Your RunHeap
constructor assigns lots of function properties directly to the instance, and chains them with the comma operator:
this.pushRun = function(fromIndex, toIndex) {
// ...
},
this.popElement = function() {
// ...
},
this.swap = function(array, index1, index2) {
// ...
The comma operator evaluates each of its operands (from left to right) and returns the value of the last operand. This can be somewhat confusing, and so is often forbidden by code style guides. Since each function assignment here can be standalone, have each be a separate statement instead, and separate them with semicolons:
this.pushRun = function(fromIndex, toIndex) {
// ...
};
this.popElement = function() {
// ...
};
this.swap = function(array, index1, index2) {
// ...
But there’s another issue – this creates those pushRun
, popElement
, swap
methods anew every time the RunHeap
constructor is run. This is inefficient; the functions are all the same. Put them on the prototype instead, so that they only have to be created once:
function RunHeap(array, cmp) {
this.cmp = cmp;
// ...
}
RunHeap.prototype.pushRun = function(fromIndex, toIndex) {
// ...
};
RunHeap.prototype.popElement = function() {
// ...
Since you’re already using ES6+ syntax (which is great, you should), it’d probably be a good idea to use it everywhere – instead of a function
that you call new
on, you can use class
, they’re a bit more concise and readable, and are the preferred modern way of doing things:
class RunHeap {
constructor(array, cmp) {
this.cmp = cmp;
// ...
}
pushRun(fromIndex, toIndex) {
const nodeIndex = this.size++;
// ...
}
popElement() {
const returnValue = this.array[this.fromIndexArray[0]];
// ...
Using ++
/ --
as a standalone statement is fine, but they can sometimes be confusing when they’re used as an expression. (This is the same idea behind avoiding chained assignments and conditional assignments) You might consider putting the increments/decrements on their own line, eg replace
const last1 = this.fromIndexArray[--this.size];
with
this.size--;
const last1 = this.fromIndexArray[this.size];
Same for the other instances of pre/post increment-as-expression.
(even if you find the first version readable at a glance, I wouldn’t bet on most readers of the code seeing it the same way)
if (cmp != 0) {
When comparing, best to use strict equality, not loose equality.
for (i = Math.floor(this.size / 2); i >= 0; --i) {
Best to always declare variables before using them – this will either implicitly create a global variable i
, or throw an error if in strict mode. (change to let i =
) (Same for for (i1 = fromIndex, i2 = toIndex;
and head = left
)
The swap
function does the same thing as is done in each iteration of the reverseRun
loop. Maybe have reverseRun
call swap
?
ES6 allows for default arguments in the case nothing is passed:
Array.prototype.heapSelectionSort = function (cmp, fromIndex, toIndex) {
if (!cmp) {
cmp = (a, b) => a - b;
}
if (!fromIndex) {
fromIndex = 0;
}
if (!toIndex) {
toIndex = this.length;
}
can be:
Array.prototype.heapSelectionSort = function (
cmp = (a, b) => a - b,
fromIndex = 0,
toIndex = this.length
) {
It’s usually a bad idea to mutate the built-in objects, like Array.prototype
. (Bad frameworks putting non-standard methods onto prototypes is why we have Array.prototype.flat
instead of Array.prototype.flatten
, and Array.prototype.includes
instead of Array.prototype.contains
.) It can cause a few problems. You could have heapSelectionSort
be a standalone function instead.
In full: