Sorting efficiently by multiple columns [closed]

Posted on

Problem

I need to sort a large array (5 columns x ~500,000 rows) of string variables by two columns. I have code that works, but it takes an unacceptably long time, particularly because I have to do this on five sheets with different data.

I have been doing research, but the code I found tend to be for either a single column, or exclusively for integers. I need to be able to change it to work with between 3-8 columns in the array as well, but I’m hopeful I can figure that out myself.

Sorting on two columns:

   ActiveWorkbook.Worksheets("Quantity Available").Sort.SortFields.Clear
    ActiveWorkbook.Worksheets("Quantity Available").Sort.SortFields.Add Key:=Range( _
        "C2:C" & LastRowAvail), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:= _
        xlSortNormal
    ActiveWorkbook.Worksheets("Quantity Available").Sort.SortFields.Add Key:=Range( _
        "E2:E" & LastRowAvail), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:= _
        xlSortNormal
    With ActiveWorkbook.Worksheets("Quantity Available").Sort
        .SetRange Range("A1:F" & LastRowAvail)
        .Header = xlYes
        .MatchCase = False
        .Orientation = xlTopToBottom
        .Apply
    End With

Sorting on one column:

   ActiveWorkbook.Worksheets("Main Tab").Sort.SortFields.Clear
    ActiveWorkbook.Worksheets("Main Tab").Sort.SortFields.Add Key:=Range( _
        "U2:U" & LastRow), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:= _
        xlSortNormal
    With ActiveWorkbook.Worksheets("Main Tab").Sort
        .SetRange Range("A1:AU" & LastRow)
        .Header = xlYes
        .MatchCase = False
        .Orientation = xlTopToBottom
        .Apply
    End With

Solution

My time became limited so here is the first and probably the preferable as it would facilitate migrating the data (lots of data) to a proper database.

Note I have mixed some StackExchange code and Microsoft code with my code (that was never intended for excel) to accomplish a form of data manipulation for you, as an example.

Please note you probably need to look at the connection string in “Public Function RecordSetFromSheet” specifically this part “Extended Properties=’Excel 8.0;HDR=Yes;IMEX=1′”

Option Explicit

' How to use as a memory file
'  DataSet.Fields.Append "FileName", adVarChar, 254
'  DataSet.Fields.Append "FileType", adVarChar, 254
'  DataSet.Fields.Append "FileDateTime", adDBTimeStamp
'
'  OpenData
'  DataSet.Sort = "FileDateTime DESC, FileType ASC, FileName ASC"

' Use Save / CreatePersistant / OpenPersistant as required

' Create a Recordset
Public DataSet As Object 'Facilitate Late Binding
Public MyFSO As New FileSystemObject
Private Sub Construct()

  Set DataSet = CreateObject("ADODB.Recordset") ' New ADODB.Recordset

  'See this link for datatypeenum "https://docs.microsoft.com/en-us/sql/ado/reference/ado-api/datatypeenum"

  DataSet.Fields.Append "FileName", adVarChar, 254
  DataSet.Fields.Append "FileType", adVarChar, 254
  DataSet.Fields.Append "FileDateTime", adDBTimeStamp

End Sub
Private Sub Destruct()

  Set DataSet = Nothing

End Sub
Public Sub CloseData()

' Close the Recordset
  DataSet.Close

End Sub
Public Function FileExist(pFileName as string) as boolean

DIM RetValue as Boolean

  RetValue = False

  If MyFSO.FileExists(sFilename) Then
     RetValue = True
  End If

  FileExist = RetValue

End Function
Public Sub CreatePersistData(sFilename As String)

' Create the Recordset

  If FileExist(sFilename) = True Then
     MyFSO.DeleteFile sFilename
  End If

  DataSet.Save sFilename, adPersistADTG

End Sub
Public Sub OpenPersistData(sFilename As String)

' Open the Recordset

  If FileExist(sFilename) = True Then
     DataSet.Open sFilename, , adOpenStatic, adLockOptimistic, adCmdFile
     If DataSet.RecordCount > 0 Then
        DataSet.MoveFirst
     End If    
  End If

End Sub
Public Sub Save()

  DataSet.Save

End Sub
Public Sub DeleteAllRecords()

  While DataSet.RecordCount > 0
        DataSet.MoveFirst
        DataSet.Delete
  Wend    

End Sub
Public Sub Sort1

  DataSet.Sort = "FileDateTime DESC, FileType ASC, FileName ASC"

End Sub
Public Sub Sort2

  DataSet.Sort = "FileDateTime ASC, FileType ASC, FileName ASC"

End Sub
Public Sub Sort3

  DataSet.Sort = "FileType ASC, FileName ASC, FileDateTime DESC"

End Sub
Public Sub LoadWorkSheet(ws as string)

' See this Microsoft Document "https://msdn.microsoft.com/en-us/library/office/ff839240(v=office.15).aspx"

  For iCols = 0 to DataSet.Fields.Count - 1 
      ws.Cells(1, iCols + 1).Value = DataSet.Fields(iCols).Name 
  Next 

  ws.Range(ws.Cells(1, 1), _ 

  ws.Cells(1, DataSet.Fields.Count)).Font.Bold = True

  ws.Range("A2").CopyFromRecordset DataSet

End Sub
'
' See here for how to create initial file as in I have adapted it for consistency "http://stackoverflow.com/questions/2484516/vba-create-adodb-recordset-from-the-contents-of-a-spreadsheet"
'
Public Function RecordSetFromSheet(sheetName As String)

'Dim rst As New ADODB.Recordset
Dim cnx As Object ' To facilitate late binding New ADODB.Connection
Dim cmd As Object ' To Facilitate Late binding New ADODB.Command

  Set cnx = CreateObject("ADODB.Connection") ' New ADODB.Connection
  set cmd = CreateObject("ADODB.Command") ' New ADODB.Command

  'setup the connection
  '[HD    R=Yes] means the Field names are in the first row
  With cnx
      .Provider = "Microsoft.Jet.OLEDB.4.0"
      .ConnectionString = "Data Source='" & ThisWorkbook.FullName & "'; " & "Extended Properties='Excel 8.0;HDR=Yes;IMEX=1'"
      .Open
  End With

  'setup the command
  Set cmd.ActiveConnection = cnx
  cmd.CommandType = adCmdText
  cmd.CommandText = "SELECT * FROM [" & sheetName & "$]"

  DataSet.CursorLocation = adUseClient
  DataSet.CursorType = adOpenDynamic
  DataSet.LockType = adLockOptimistic
  'open the connection
  DataSet.Open cmd
  'disconnect the recordset
  Set DataSet.ActiveConnection = Nothing

  'cleanup
  If cmd.State = adStateOpen Then
     Set cmd = Nothing
  End If    

  If cnx.State = adStateOpen Then
     cnx.Close
  End if

  Set cnx = Nothing

  '"return" the recordset object
  Set RecordSetFromSheet = DataSet

End Function

' This plus the use of the DataSet.Sort = "String" examples may be all you need or want
' In addition this Answer may be all you need to persist vs the more generic I created
' Easiest would be to use rs.Save "filename" and rs.Open "filename" to serialize client-side recordsets to files.
' DataSet.Save "FileName" .... DataSet.Open "filename"

Public Sub Test()

'Dim rstData As ADODB.Recordset

  Set DataSet = RecordSetFromSheet("Sheet1")

  Sheets("Sheet2").Range("A1").CopyFromRecordset DataSet

End Sub

Public Sub Main

  Construct

  If FileExist("YourFileNameGoesHere") then
     OpenPersistData "YourFileNameGoesHere"
  Else
     CreatePersistData "YourFileNameGoesHere"
  End If    
'    
' These are for examples only and initial debuging / understanding uncomment one or build your own
'
'  Sort1
'
'  LoadWorkSheet "YourWorkSheetNameGoesHere"
'
'  Sort2
'
'  LoadWorkSheet "YourWorkSheetNameGoesHere"
'
'  Sort3
'
'  LoadWorkSheet "YourWorkSheetNameGoesHere"

  Destruct

End Sub

Leave a Reply

Your email address will not be published. Required fields are marked *