Markdown table strings to two dimensional array converter implementation in C#

Posted on

Problem

I am attempting to implement a converter which can convert List<string> type markdown table into two dimensional array string[,].

The experimental implementation

public static class Converter
{
    public static string[,] ToTwoDimArray(in List<string> input)
    {
        return ConcatenateVertical(GetTitleRow(input), GetContents(input));
    }

    private static string[,] ConcatenateVertical(in string[] source, in string[] newRow)
    {
        int columnLength = source.GetLength(0);

        string[,] output = new string[2, columnLength];

        for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
        {
            output[0, columnIndex] = source[columnIndex];
        }

        for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
        {
            output[1, columnIndex] = newRow[columnIndex];
        }

        return output;
    }

    private static string[,] ConcatenateVertical(in string[,] source, in string[] newRow)
    {
        int columnLength = source.GetLength(1);
        int rowLength = source.GetLength(0) + 1;

        string[,] output = new string[rowLength, columnLength];

        for (int rowIndex = 0; rowIndex < rowLength - 1; rowIndex++)
        {
            for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
            {
                output[rowIndex, columnIndex] = source[rowIndex, columnIndex];
            }
        }

        for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
        {
            output[rowLength - 1, columnIndex] = newRow[columnIndex];
        }

        return output;
    }

    private static string[,] ConcatenateVertical(in string[] source, in string[,] newRows)
    {
        if (source.GetLength(0) != newRows.GetLength(1))
        {
            throw new ArgumentException("Width isn't match", nameof(source));
        }
        
        int columnLength = source.GetLength(0);
        int rowLength = newRows.GetLength(0) + 1;

        string[,] output = new string[rowLength, columnLength];

        for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
        {
            output[0, columnIndex] = source[columnIndex];
        }

        for (int rowIndex = 1; rowIndex < rowLength; rowIndex++)
        {
            for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
            {
                output[rowIndex, columnIndex] = newRows[rowIndex - 1, columnIndex];
            }
        }

        return output;
    }

    private static string[,] GetContents(in List<string> input)
    {
        int columnLength = GetTitleRow(input).Length;
        int rowLength = input.Count - 2;

        string[,] output = new string[rowLength, columnLength];
        for (int rowIndex = 2; rowIndex < input.Count; rowIndex++)
        {
            var rowData = GetRow(input, rowIndex);
            for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
            {
                output[rowIndex - 2, columnIndex] = rowData[columnIndex];
            }
        }
        return output;
    }

    private static string[] GetTitleRow(in List<string> input)
    {
        return GetRow(input, 0);
    }

    private static string[] GetRow(in List<string> input, in int rowIndex)
    {
        return RowConstructor(input[rowIndex]);
    }

    private static string[] RowConstructor(in string input)
    {
        char[] charsToTrim = { ' ' };
        return Array.ConvertAll(input[1..^1].Split('|').ToArray(), element => element.Trim(charsToTrim));
    }
}

Test cases

System.Collections.Generic.List<string> strings = new System.Collections.Generic.List<string>();
strings.Add("|1|2|3|4|");
strings.Add("|:-:|:-:|:-:|:-:|");
strings.Add("|5|6|7|8|");
strings.Add("|9|10|11|12|");
strings.Add("|13|14|15|16|");
strings.Add("|17|18|19|20|");
strings.Add("|21|22|23|24|");
strings.Add("|25|26|27|28|");
strings.Add("|29|30|31|32|");
strings.Add("|33|34|35|36|");
strings.Add("|37|38|39|40|");

string[,] result = Converter.ToTwoDimArray(strings);

int columnLength = result.GetLength(1);
int rowLength = result.GetLength(0);

for (int rowIndex = 0; rowIndex < rowLength; rowIndex++)
{
    for (int columnIndex = 0; columnIndex < columnLength; columnIndex++)
    {
        Console.Write($"{result[rowIndex, columnIndex]}t");
    }
    Console.WriteLine();
}

The output of the above test:

1       2       3       4
5       6       7       8
9       10      11      12
13      14      15      16
17      18      19      20
21      22      23      24
25      26      27      28
29      30      31      32
33      34      35      36
37      38      39      40

If there is any possible improvement, please let me know.

Solution

The in keyword in a parameter list is used to pass an argument by reference. This is used to speed up passing big structs. Since strings and lists are of a reference type, a reference is passed anyway. Specifying ref, out or in makes them be passed a reference to a reference. This makes only sense for ref or out parameters when the method must replace the original object of the caller.

See also: in parameter modifier (C# Reference).


You can make the RowConstructor more robust by allowing the leading or the trailing | to be optional (as specified in the link you provided). The name “constructor” is more suited for a class. I prefer verbs for methods.

String.Split returns an array already. .ToArray() is superfluous.

private static string[] CreateRow(string input)
{
    int start = input[0] is '|' ? 1 : 0;
    int end = input[^1] is '|' ? 1 : 0;
    return input[start..^end].Split('|');
}

We can do it without this complicated Concatentate.. methods by inserting the rows directly into the output table.

private static void InsertRow(string[,] table, string[] row, int rowindex)
{
    int numColumns = table.GetLength(1);
    for (int i = 0; i < numColumns; i++) {
        table[rowindex, i] = row[i];
    }
}

We can treat the header row the same way as we do with the data rows, except that we must get it before creating the output table to determine the number of columns. Therefore, I created a method that creates the output 2d-array and returns it as a tuple together with the reader row.

private static (string[,] table, string[] header) CreateTable(IList<string> input)
{
    string[] headerRow = CreateRow(input[0]);
    int numColumns = headerRow.Length;
    int numRows = input.Count - 1; // without the horizontal line row.
    string[,] table = new string[numRows, numColumns];

    return (table, headerRow);
}

Now we can write the main method easily.

public static string[,] ToTwoDimArray(List<string> input)
{
    var (table, header) = CreateTable(input);
    InsertRow(table, header, 0);

    for (int rowIndex = 2; rowIndex < input.Count; rowIndex++) {
        string[] dataRow = CreateRow(input[rowIndex]);
        InsertRow(table, dataRow, rowIndex - 1);
    }

    return table;
}

My solution has 40 lines of code and is still very readable, compared to your solution with 98 lines of code. This has mainly been achieved by replacing 3 lengthy concatenation methods by a single row insert method.

Remove some vertical lines at the begin and end of lines in the test input to test the new CreateRow method.

Leave a Reply

Your email address will not be published. Required fields are marked *