Binary Radix Sort implementation on linked list

Posted on

Problem

I am trying to implement binary radix sort in C which sorts a linked list of integers stably. Although my algorithm has a time complexity of O(log2(k).n) (where k is the biggest integer in the linked list), other standard implementations of algorithms like merge sort/quick sort seem to have better execution time even when input size is large (n>10^6) and kis small (k<1000). Am I doing something wrong which is causing these longer execution times? Could you review this code?

Here is the code for my implementation:

#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<time.h>
struct ListNode {
     int val;
     struct ListNode *next;
};
void print_list(struct ListNode *node) // printing integer values of linked list
{
  while(node!=NULL)
  {
    printf("%d, ", node->val);
    node=node->next;
  }
}
int getMax(struct ListNode *node) // finding biggest integer in linked list
{
  int max=node->val;
  while(node!=NULL)
  {
    if(node->val > max)
    {
      max=node->val;
    }
    node=node->next;
  }
  return max;
}
void addMin(struct ListNode *node, int min) // Adding smallest integer in linked list so that no integer in linked list is negative
{
  while(node!=NULL)
  {
    node->val+=min;
    node=node->next;
  }
}
void subMin(struct ListNode *node, int min) // returning linked list to original values after sorting
{
  while(node!=NULL)
  {
    node->val-=min;
    node=node->next;
  }
}
int getMin(struct ListNode *node) // finding smallest integer in linked list
{
  int min=node->val;
  while(node!=NULL)
  {
    if(node->val < min)
    {
      min=node->val;
    }
    node=node->next;
  }
  return min;
}
void binarySort(struct ListNode **head, int bit) // sorts linked list based on a particular bit
{
  struct ListNode *temp = *head;
  struct ListNode *insertNode = *head;
  struct ListNode *prevNode = NULL;
  int flag=0;
  int flag2=0;
  if((((*head)->val) & (1 << (bit - 1))))
  {
    flag=1;
  }
  while(temp!=NULL)
  {
    if(flag==0 && !((temp->val) & (1 << (bit - 1))))
    {
      if((temp->next)!=NULL && (((temp->next)->val) & (1 << (bit - 1))))
      {
        insertNode=temp;
        flag=1;
        flag2=1;
      }
    }
    else if(flag2==0 && !((temp->val) & (1 << (bit - 1))))
    {
      prevNode->next=temp->next;
      temp->next=*head;
      insertNode=temp;
      *head=temp;
      temp=prevNode;
      flag=1;
      flag2=1;
    }
    else if(!((temp->val) & (1 << (bit - 1))))
    {
      prevNode->next=temp->next;
      temp->next=insertNode->next;
      insertNode->next=temp;
      insertNode=temp;
      temp=prevNode;
    }
    prevNode=temp;
    temp=temp->next;
  }
}
struct ListNode* sortList(struct ListNode* head) // binary radix sort
{
  if(head==NULL)
  {
    return NULL;
  }
  int min=getMin(head);
  if(min<0)
  {
    subMin(head, min);
  }
  int biggest_int_len = log2(getMax(head))+1;
  int i;
  for(i=1 ; i<=biggest_int_len ; i++)
  {
    binarySort(&head, i);
  }
  if(min<0)
  {
    addMin(head, min);
  }
  return head;
}
int main() // code to test the function
{
  srand(time(0));
  int num;
  struct ListNode *head = (struct ListNode*) malloc(sizeof(struct ListNode));
  head->next=NULL;
  printf("Enter input size: ");
  scanf("%d", &num);
  head->val=rand()%1000;
  struct ListNode *prevNode = head;
  while(num-1>0)
  {
    struct ListNode *temp = (struct ListNode*) malloc(sizeof(struct ListNode));
    temp->val=rand()%1000;
    temp->next=NULL;
    prevNode->next=temp;
    prevNode=temp;
    num--;
  }
  printf("nn");
  print_list(head);
  clock_t t;
  t = clock();
  struct ListNode *sortedList=sortList(head);
  t = clock() - t;
  double time_taken = ((double)t)/CLOCKS_PER_SEC; // in seconds
  printf("nn");
  print_list(sortedList);
  printf("nnfun() took %f seconds to execute n", time_taken);
}

Also is there a good way to test how much memory the sort uses?

Solution

  • Performance

    The root cause is a poor referential locality. As sorting progresses, and the nodes get relinked, they become accessed in no particular order, all over the memory. This results in too many cache misses, and a cache miss is very expensive.

  • binarySort is a bit too complicated. It is very hard to follow. Things like flag, and flag2, (what do they signify?) are always a signal of an unclear design.

    It seems that those flags stem from the special-casing a head node. Try to avoid special cases. Usually having a dummy node greatly simplifies the design. Consider (untested)

      void binarySort(struct ListNode ** head, int bit)
      {
          struct ListNode dummy = { .next = *head };
          struct ListNode *base = &dummy;
          struct ListNode *prev = base;
          struct ListNode *curr = prev->next;
          while (curr != NULL) {
              if (!bit_is_set(curr->val, bit)) {
                  prev->next = curr->next;
                  curr->next = base->next;
                  base = curr;
              } else {
                  prev = prev->next;
              }
              curr = prev->next;
          }
          *head = dummy.next;
      }
    

Leave a Reply

Your email address will not be published. Required fields are marked *