Problem
I am interested in the performance tradeoffs of linked lists, standard lists and unrolled linked lists. This will depend on various factors including what operation is being performed (e.g. delete, insert, and from where), and the size of the nodes. I’ve written some performance tests (not listed here).
I am interested in whether the node class could be implemented as an array rather than a list, so I’ll be trying this in the future. This class works quite well as a stack, but I am also interested in its use as a queue; in order to try this I might alter the Node class to store pointers for the start and end of the list/array.
Does anyone have any feedback on my unrolled linked list class? I did it just for fun.
/// <summary>
/// An unrolled linked list implements the standard list operations.
/// Its implementation is inbetween that of a standard List and a LinkedList.
/// </summary>
public class UnrolledLinkedList<T> : IList<T>
{
private Node first, last;
private int count;
/// <summary>
/// Create a new (empty) unrolled linked list
/// </summary>
public UnrolledLinkedList()
{
count = 0;
}
/// <summary>
/// Create an unrolled linked list which is populated with data from an enumerable
/// </summary>
public UnrolledLinkedList(IEnumerable<T> enumerable)
: this()
{
if (enumerable.Any())
{
first = new Node();
Node node = first;
foreach(T t in enumerable)
{
node = AddAfterIfFull(node, t);
count++;
}
last = node;
}
}
public int IndexOf(T item)
{
int offset = 0;
foreach (Node node in Nodes)
{
int nodeResult = node.IndexOf(item);
if (nodeResult >= 0)
{
return offset + nodeResult;
}
offset += node.Count;
}
return -1;
}
public void Insert(int index, T item)
{
if (first == null) //if the list is empty, then thats only ok if index=0
{
if (index == 0)
{
Initialise(item);
}
else
{
throw new IndexOutOfRangeException("The list is empty, so you can only insert at index 0");
}
}
else
{
if (index == count) //adding right at the end of the list
{
AddAfterIfFull(last, item);
}
else
{
FindResult findResult = Find(index);
if (findResult.node.IsFull)
{
T t = findResult.node.RemoveLast();
AddAfter(findResult.node, t);
}
findResult.node.Insert(index - findResult.offset, item);
}
}
count++;
}
public T this[int index]
{
get
{
FindResult findResult = Find(index);
return findResult.node[index - findResult.offset];
}
set
{
FindResult findResult = Find(index);
findResult.node[index - findResult.offset] = value;
return;
}
}
public void Add(T item)
{
if (first == null)
{
Initialise(item);
}
else
{
AddAfterIfFull(last, item);
}
count++;
}
public void Clear()
{
first = null;
count = 0;
}
public bool Contains(T item)
{
return Nodes.Any(x => x.Contains(item));
}
public void CopyTo(T[] array, int arrayIndex)
{
int i = arrayIndex;
foreach (T t in this)
{
array[i] = t;
i++;
}
}
public int Count
{
get
{
return count;
}
}
public bool IsReadOnly
{
get { return false; }
}
public bool Remove(T item)
{
foreach (Node node in Nodes)
{
int i = node.IndexOf(item);
if (i >= 0)
{
if (node.Count == 1)
{
DeleteNode(node);
}
else
{
node.RemoveAt(i);
}
count--;
return true;
}
}
return false;
}
public void RemoveAt(int index)
{
FindResult findResult = Find(index);
findResult.node.RemoveAt(index - findResult.offset);
if (!findResult.node.Any())
{
DeleteNode(findResult.node);
}
count--;
return;
}
public IEnumerator<T> GetEnumerator()
{
foreach (Node node in Nodes)
{
foreach (T t in node)
{
yield return t;
}
}
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
/// <summary>
/// Initialise this list so it contains only one item.
/// DOES NOT increase the count
/// </summary>
private void Initialise(T firstItem)
{
first = new Node();
first.Add(firstItem);
last = first;
}
/// <summary>
/// Gets an enumerator over all the nodes
/// </summary>
private IEnumerable<Node> Nodes
{
get
{
Node node = first;
while (node != null)
{
yield return node;
node = node.next;
}
}
}
/// <summary>
/// If the node supplied is full then a new one is created afterwards;
/// the item is added
/// </summary>
/// <returns>If the node is full then the new node; otherwise just the node supplied</returns>
private Node AddAfterIfFull(Node node, T item)
{
if (node.IsFull)
{
return AddAfter(node, item);
}
else
{
node.Add(item);
return node;
}
}
/// <summary>
/// Adds a new node after the node supplied, and populates it with the item supplied.
/// DOES NOT increase the count
/// </summary>
private Node AddAfter(Node node, T item)
{
Node newNode = new Node();
newNode.Add(item);
newNode.next = node.next;
newNode.previous = node;
if (node.next == null)
{
last = newNode;
}
else
{
node.next.previous = newNode;
}
node.next = newNode;
return newNode;
}
/// <summary>
/// Removes a node from the list
/// </summary>
private void DeleteNode(Node node)
{
if (node.next == null)
{
last = node.previous;
}
if (node.previous != null)
{
node.previous.next = node.next;
}
if (node.next != null)
{
node.next.previous = node.previous;
}
}
/// <summary>
/// Finds the item with this index,
/// and the index of the first item within that node
/// </summary>
private FindResult Find(int index)
{
int offset = 0;
foreach (Node node in Nodes)
{
int nextOffset = offset + node.Count;
if (index >= offset && index < nextOffset) //found node
{
return new FindResult(node, offset);
}
offset = nextOffset;
}
throw new IndexOutOfRangeException("No item at that index!");
}
/// <summary>
/// Stores the two values returned by Find
/// </summary>
private class FindResult
{
public readonly Node node;
public readonly int offset;
public FindResult(Node node, int offset)
{
this.node = node;
this.offset = offset;
}
}
////////////////////// NODE CLASS //////////////////////
private class Node : List<T>
{
internal const int MaxSize = 100;
public Node next, previous;
public bool IsFull
{
get
{
return Count >= MaxSize;
}
}
public T RemoveLast()
{
int i = Count - 1;
T t = this[i];
RemoveAt(i);
return t;
}
}
}
Performance
Here are some performance statistics. It’s not scientific but it gives you a rough idea. Times are in milliseconds.
- Adding 100000 integers (list): 1
-
Adding (unrolled linked list): 12
-
Finding the index of an integer at the end of 1000 integers (list):
10886 -
Finding (unrolled linked list): 18055
-
Inserting 100000 integers into the middle (list): 22694
-
Insertion(unrolled linked list): 2238
-
Deletion of 1000 items from the start (list): 2331
- Deletion (unrolled linked list): 1
It looks like an unrolled linked list could be a good choice for a long list where there is a lot of insertion or deletion at places other than the end. It will have a lower memory footprint than a linked list (I’ve not tested that out though).
Solution
A few things off the bat:
- You should consider prefixing private field names with underscores, so it is possible to distinguish them from local variables.
Edit: There is no “official” naming conventions regarding private members. Not that i know of anyway. Underscore however is a somewhat accepted industrial standart nowadays. Mainly, because a) it is used by Microsoft, b) it is used by ReSharper (by default), c) better intelli-sense support, d) when it comes to choosing betweenthis.name = name;
and_name = name;
most people choose the latter. I think this is pretty accurate -
IEnumerator<T> enumerator = enumerable.GetEnumerator(); while (enumerator.MoveNext())
emm, why dont you use
foreach
? -
tuple.Item1[index - tuple.Item2] = value;
– really hard to read. -
public void Clear() { first = null; count = 0; }
waht happens to
last
reference? -
internal
members inprivate
class do not make much sense (consider making thosepublic
). -
Your
Node
class should probably extendList<T>
and not encapsulate it. This way you can drop all those proxy methods and therefore reduce the amount of code. Edit2: i think using arrays inNode
class is an overkill. You’ll end up re-writingList
implementation. Resizing is the only noticable performance drawback ofList
but that is not going to happen in your case (since you specify the size in constructor). The rest is unlikely to become any kind of bottleneck, so you probably want to keep it simple. -
internal const int MaxSize = 100;
this should probably be a parameter rather than a constant (at least as a user i would like to be able to set up the size of a node)
Few things (mostly small things that could slightly improve performance):
public UnrolledLinkedList(IEnumerable<T> enumerable)
– do not callenumerable.Any()
– instead start enumerating it and check on the first iteration if there is any data. This is for the performance nitpicker.- The same constructor – you should dispose the enumerator if it is disposable (that is what
foreach
would do automatically). - Do not use
Tuple<,>
– instead create your ownstruct
that has two fields (not properties). Named fields will make the code a bit more readable and accessing fields will be faster than properties (it is only used in private members which is why it can be safely done). Insert()
checksfirst==null
whileAdd()
checkslast == null
. Should be consistent.- Since
.IsReadOnly
always returnfalse
you might hide it by implementing it explicitly. RemoveAt()
– use the same approach withnode.Count==1
as inRemove()
. Should be slightly faster.ToString()
– do not print the whole contents of the list since it rarely will give the expected result. Since this is most likely done to enhance debugging experience, use debugging attributesFind()
– you are doingoffset + node.Count
twice – instead useoffset = nextOffset;
.Node(List<T>)
constructor will result in the internal list being created and then promptly removed.- Since
Node
class is private, do not use properties forNext
andPrevious
– instead use fields.
There also could be a benefit of (maybe a parameter that controls it) not destroying the last Node
that you remove – instead hold it in another variable. When you need to create a new node first check if you have one stored there. You might save a lot of operations on memory allocation if the list is used in certain way. You might even store all previously created nodes in a pool. This would be consistent with the behavior of other .NET collection classes that do not decrease their allocated capacity automatically instead always remaining at their peak capacity.
This part is used twice, don’t duplicate the code but instead create a common method and call it from both places.
if (last.IsFull)
{
AddItemAfterInNewNode(last, item);
}
else
{
last.Add(item);
}
You could also use LINQ here.
public bool Contains(T item)
{
return Nodes.Any(x => x.Contains(item));
}
And I also would recommend you to inherit from List, it would save you a lot of code.