Multithreaded matrix multiplication on Unix in C

Posted on

Problem

I have this program for comparing serial and parallel matrix multiplication algorithms:

matrix.h

#ifndef NET_CODERODDE_LINEAR_ALGEBRA_MATRIX_H
#define NET_CODERODDE_LINEAR_ALGEBRA_MATRIX_H

#include <stdlib.h>

typedef struct matrix_t {
    size_t  m_rows;
    size_t  m_cols;
    double* m_data;
} matrix_t;

void      matrix_t_init (matrix_t* matrix, size_t rows, size_t cols);
void      matrix_t_free (matrix_t* matrix);
double    matrix_t_get  (matrix_t* matrix, size_t x, size_t y);
void      matrix_t_set  (matrix_t* matrix, size_t x, size_t y, double value);
matrix_t* matrix_t_copy (matrix_t* matrix);

#endif /* NET_CODERODDE_LINEAR_ALGEBRA_MATRIX_H */

matrix_algorithm.h

#ifndef NET_CODERODDE_LINEAR_ALGEBRA_MATRIX_ALGORITHM_H
#define NET_CODERODDE_LINEAR_ALGEBRA_MATRIX_ALGORITHM_H

#include "matrix.h"

matrix_t* matrix_t_multiply          (matrix_t* matrix1, matrix_t* matrix2);
matrix_t* matrix_t_multiply_parallel (matrix_t* matrix1, matrix_t* matrix2);
void      matrix_t_print             (matrix_t* matrix);

#endif /* NET_CODERODDE_LINEAR_ALGEBRA_MATRIX_ALGORITHM_H */

matrix.c

#include "matrix.h"
#include <stdlib.h>
#include <string.h>

static size_t data_index(matrix_t* matrix, size_t x, size_t y)
{
    return y * matrix->m_cols + x;
}

void matrix_t_init(matrix_t* matrix, size_t rows, size_t cols)
{
    matrix->m_rows = rows;
    matrix->m_cols = cols;
    matrix->m_data = malloc(sizeof(double) * rows * cols);
}

void matrix_t_free(matrix_t* matrix)
{
    free(matrix->m_data);
    matrix->m_rows = 0;
    matrix->m_cols = 0;
}

double matrix_t_get(matrix_t* matrix, size_t x, size_t y)
{
    return matrix->m_data[data_index(matrix, x, y)];
}

void matrix_t_set(matrix_t* matrix, size_t x, size_t y, double value)
{
    matrix->m_data[data_index(matrix, x, y)] = value;
}

matrix_t* matrix_t_copy(matrix_t* matrix)
{
    size_t data_len = sizeof(double) * matrix->m_rows * matrix->m_cols;
    matrix_t* copy = malloc(sizeof(*copy));
    copy->m_rows = matrix->m_rows;
    copy->m_cols = matrix->m_cols;
    copy->m_data = malloc(data_len);
    memcpy(copy->m_data, matrix->m_data, data_len);
    return copy;
}

matrix_algorithm.c

#include "matrix.h"
#include "matrix_algorithm.h"
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>

#define MAX(x,y) (((x) > (y)) ? (x) : (y))
#define MIN(x,y) (((x) < (y)) ? (x) : (y))

matrix_t* matrix_t_multiply(matrix_t* matrix1, matrix_t* matrix2)
{
    matrix_t* result;
    size_t x;
    size_t y;
    size_t i;
    double sum;

    if (!matrix1 || !matrix2)
    {
        return NULL;
    }

    if (!matrix1->m_data || !matrix2->m_data)
    {
        return NULL;
    }

    if (matrix1->m_cols != matrix2->m_rows)
    {
        return NULL;
    }

    result = malloc(sizeof(*result));
    matrix_t_init(result, matrix1->m_rows, matrix2->m_cols);

    for (y = 0; y != matrix1->m_rows; ++y)
    {
        for (x = 0; x != matrix2->m_cols; ++x)
        {
            sum = 0.0;

            for (i = 0; i != matrix1->m_cols; ++i)
            {
                sum += matrix_t_get(matrix1, i, y) *
                       matrix_t_get(matrix2, x, i);
            }

            matrix_t_set(result, x, y, sum);
        }
    }

    return result;
}

static const size_t MINIMUM_THREAD_LOAD = 10 * 1000;

typedef struct thread_info {
    matrix_t* left_matrix;
    matrix_t* right_matrix;
    matrix_t* result_matrix;
    size_t    start_row;
    size_t    rows;
} thread_info;

static void* thread_func(void* arg)
{
    size_t i;
    size_t x;
    size_t y;
    double sum;
    thread_info* info = (thread_info*) arg;

    for (y = info->start_row; y < info->start_row + info->rows; ++y)
    {
        for (x = 0; x < info->right_matrix->m_cols; ++x)
        {
            sum = 0.0;

            for (i = 0; i < info->left_matrix->m_cols; ++i)
            {
                sum += matrix_t_get(info->left_matrix, i, y) *
                       matrix_t_get(info->right_matrix, x, i);
            }

            matrix_t_set(info->result_matrix, x, y, sum);
        }
    }

    return NULL;
}

matrix_t* matrix_t_multiply_parallel(matrix_t* left_matrix,
                                     matrix_t* right_matrix)
{
    size_t i;
    size_t rows_reserved;
    size_t work_load;
    size_t num_threads;
    size_t basic_rows_per_thread;
    thread_info* thread_info_structs;
    matrix_t* result_matrix = malloc(sizeof(*result_matrix));

    matrix_t_init(result_matrix,
                  left_matrix->m_rows,
                  right_matrix->m_cols);

    work_load = left_matrix->m_rows *
                right_matrix->m_cols *
                right_matrix->m_rows;

    num_threads = sysconf(_SC_NPROCESSORS_ONLN);
    num_threads = MIN(num_threads, work_load / MINIMUM_THREAD_LOAD);
    num_threads = MIN(num_threads, left_matrix->m_rows);
    num_threads = MAX(num_threads, 1);

    if (num_threads == 1)
    {
        return matrix_t_multiply(left_matrix, right_matrix);
    }

    basic_rows_per_thread = left_matrix->m_rows / num_threads;
    thread_info_structs = calloc(num_threads, sizeof(thread_info));

    for (i = 0, rows_reserved = 0;
         i != num_threads;
         i++, rows_reserved += basic_rows_per_thread)
    {
        thread_info_structs[i].left_matrix   = left_matrix;
        thread_info_structs[i].right_matrix  = right_matrix;
        thread_info_structs[i].result_matrix = result_matrix;
        thread_info_structs[i].rows          = basic_rows_per_thread;
        thread_info_structs[i].start_row     = rows_reserved;
    }

    thread_info_structs[num_threads - 1].rows +=
        left_matrix->m_rows % basic_rows_per_thread;

    pthread_t* threads = calloc(num_threads, sizeof(pthread_t));

    for (i = 0; i < num_threads - 1; ++i)
    {
        pthread_create(&threads[i],
                       NULL,
                       thread_func,
                       (void*) &thread_info_structs[i]);
    }

    thread_func((void*) &thread_info_structs[num_threads - 1]);

    for (i = 0; i < num_threads - 1; ++i)
    {
        pthread_join(threads[i], NULL);
    }

    return result_matrix;
}

void matrix_t_print(matrix_t* matrix)
{
    for (size_t y = 0; y < matrix->m_rows; ++y)
    {
        for (size_t x = 0; x < matrix->m_cols; ++x)
        {
            printf("%f ", matrix_t_get(matrix, x, y));
        }

        puts("");
    }
}

main.c

#include "matrix.h"
#include "matrix_algorithm.h"
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>

static matrix_t* create_random_matrix(size_t rows, size_t cols)
{
    size_t x;
    size_t y;
    matrix_t* m = malloc(sizeof(*m));
    matrix_t_init(m, rows, cols);

    for (x = 0; x < cols; ++x)
    {
        for (y = 0; y < rows; ++y)
        {
            matrix_t_set(m, x, y, ((double) rand()) / RAND_MAX);
        }
    }

    return m;
}

static size_t get_milliseconds()
{
    struct timeval time;
    gettimeofday(&time, NULL);
    return time.tv_sec * 1000 + time.tv_usec / 1000;
}

static int matrix_equals(matrix_t* a, matrix_t* b)
{
    size_t x;
    size_t y;

    if (a->m_cols != b->m_cols || a->m_rows != b->m_rows)
    {
        return 0;
    }

    for (y = 0; y < a->m_rows; ++y)
    {
        for (x = 0; x < a->m_cols; ++x)
        {
            if (matrix_t_get(a, x, y) != matrix_t_get(b, x, y))
            {
                return 0;
            }
        }
    }

    return 1;
}

int main() {
    size_t start_time;
    size_t end_time;
    matrix_t* a;
    matrix_t* b;
    matrix_t* ab1;
    matrix_t* ab2;

    srand((unsigned int) time(NULL));

    a = create_random_matrix(1000, 1000);
    b = matrix_t_copy(a);

    start_time = get_milliseconds();
    ab1 = matrix_t_multiply(a, b);
    end_time = get_milliseconds();

    printf("Single-threaded multiplication in %zu milliseconds.n",
           end_time - start_time);

    start_time = get_milliseconds();
    ab2 = matrix_t_multiply_parallel(a, b);
    end_time = get_milliseconds();

    printf("%zu-threaded multiplication in %zu milliseconds.n",
           sysconf(_SC_NPROCESSORS_ONLN),
           end_time - start_time);

    printf("Algorithms agree: %dn", matrix_equals(ab1, ab2));
    return 0;
}

Possible output on dual-core system:


Single-threaded multiplication in 14139 milliseconds.
4-threaded multiplication in 7105 milliseconds.
Algorithms agree: 1

Critique request

Please tell me anything that comes to mind.

Solution

First impressions

The code is neatly laid out and easy to follow – thank you. It was easy to build and run, thanks to a good test program. About the only thing that could be improved for ease of review would be to record (i.e. print) the value of the random seed used, and allow it to be specified on a subsequent run.

The matrix representation is good (and has good locality), and the use of a function to calculate the index is much clearer than embedding that logic everywhere it’s needed.

Consider a generous helping of const

The comparison function should not be modifying the elements, so let’s make its arguments matrix_t const*:

static int matrix_equals(matrix_t *const a, matrix_t *const b)

That required matrix_t_get() and data_index() to be modified to accept pointer-to-const arguments in their turn.

I also changed matrix_t_print() and the multiply functions, too. The only knock-on was that the threadinfo input matrices had to be declared const*.

Macros

Be very, very careful with macros like this, that expand their arguments more than once:

#define MAX(x,y) (((x) > (y)) ? (x) : (y))
#define MIN(x,y) (((x) < (y)) ? (x) : (y))

As far as I can tell, they are only ever used on size_t values, so they may be better implemented as functions (with internal linkage, obviously).

Test program

Firstly, thanks for providing a good test program; too many review requests omit that, but it really helps to be able to compile and run when reviewing.

Firstly, I’ll note that although the generated matrix is random, we always multiply it by itself, so it’s not completely representative (the result matrix will always be symmetrical, so we’d miss an accidental transposition, for example).

The test program doesn’t free the created matrices. I added:

matrix_t_free(a);
matrix_t_free(b);
matrix_t_free(ab1);
matrix_t_free(ab2);

But even then, the matrix structures weren’t freed, even though their contents were – see ‘Allocation and deallocation‘ below.

Also, the created matrices are never checked against NULL return from allocation.

Finally for the test program, size_t probably isn’t the right type for the time values. I’d simply keep them in struct timeval values, and perform the subtraction as

static long get_milliseconds_difference(struct timeval *a, struct timeval *b)
{
    return (a->tv_sec - b->tv_sec) * 1000 + (a->tv_usec - b->tv_usec) / 1000;
}

I then eliminate the signed/unsigned conversion warnings, at least in my environment.

Declarations

Modern C lets you declare variables at first use. This can help reduce scope (e.g. of index variables for loops) or to add const to prevent accidents and possibly enable additional optimisations.

Eliminate one array

We can avoid allocating two arrays of size num_threads if we embed the thread structure into the struct thread_info. I think it’s also easier reading if we reserve the zeroth element to run in the main thread, rather than the last (so the loops go from 1 to num_threads rather than 0 to num_threads-1 – I find the subtraction ugly).

Allocation and deallocation

Since the functions such as matrix_t_copy() allocate the structure, I expected that the corresponding matrix_t_free() would complement it. However, that would then mean that matrix_t_init() would no longer be symmetric. I suggest having two pairs of functions:

matrix_t* matrix_t_alloc (size_t rows, size_t cols);
void      matrix_t_init (matrix_t* matrix, size_t rows, size_t cols);
void      matrix_t_clear (matrix_t* matrix);
void      matrix_t_free (matrix_t* matrix);

My implementation also checks the result of malloc(), and changes a usage of sizeof (type) that can be replaced with sizeof expr:

static size_t data_index(matrix_t const* matrix, size_t x, size_t y)
{
    return y * matrix->m_cols + x;
}

matrix_t* matrix_t_alloc (size_t rows, size_t cols)
{
    matrix_t *m = malloc(sizeof *m);
    if (!m) {
        return m;
    }
    matrix_t_init(m, rows, cols);
    if (!m->m_data) {
        free(m);
        return NULL;
    }
    return m;
}

void matrix_t_init(matrix_t* matrix, size_t rows, size_t cols)
{
    matrix->m_data = malloc(sizeof *matrix->m_data * rows * cols);
    if (matrix->m_data) {
        matrix->m_rows = rows;
        matrix->m_cols = cols;
    } else {
        matrix->m_rows = 0;
        matrix->m_cols = 0;
    }
}

void matrix_t_clear (matrix_t* matrix)
{
    free(matrix->m_data);
    matrix->m_rows = 0;
    matrix->m_cols = 0;
}

void matrix_t_free(matrix_t* matrix)
{
    matrix_t_clear(matrix);
    free(matrix);
}

Memory leak with small workload

if (num_threads == 1)
{
    return matrix_t_multiply(left_matrix, right_matrix);
}

This happens after we’ve allocated result, which is then leaked. We need to perform the test first, if we’re to manage memory properly.


Modified program

I combined the sources into a single file for my convenience; it should be easy to split them back out again.

//matrix.h

#include <stdlib.h>

typedef struct matrix_t {
    size_t  m_rows;
    size_t  m_cols;
    double* m_data;
} matrix_t;

matrix_t* matrix_t_alloc(size_t rows, size_t cols);
void      matrix_t_init(matrix_t* matrix, size_t rows, size_t cols);
void      matrix_t_clear(matrix_t* matrix);
void      matrix_t_free(matrix_t* matrix);

double    matrix_t_get(matrix_t const* matrix, size_t x, size_t y);
void      matrix_t_set(matrix_t* matrix, size_t x, size_t y, double value);
matrix_t* matrix_t_copy(matrix_t const* matrix);


//matrix_algorithm.h


matrix_t* matrix_t_multiply(matrix_t const* matrix1, matrix_t const* matrix2);
matrix_t* matrix_t_multiply_parallel(matrix_t const* matrix1, matrix_t const* matrix2);
void      matrix_t_print(matrix_t const* matrix);


//matrix.c

#include <stdlib.h>
#include <string.h>

static size_t data_index(matrix_t const* matrix, size_t x, size_t y)
{
    return y * matrix->m_cols + x;
}

matrix_t* matrix_t_alloc(size_t rows, size_t cols)
{
    matrix_t *const m = malloc(sizeof *m);
    if (!m) {
        return m;
    }
    matrix_t_init(m, rows, cols);
    if (!m->m_data) {
        free(m);
        return NULL;
    }
    return m;
}

void matrix_t_init(matrix_t* matrix, size_t rows, size_t cols)
{
    matrix->m_data = malloc(sizeof *matrix->m_data * rows * cols);
    if (matrix->m_data) {
        matrix->m_rows = rows;
        matrix->m_cols = cols;
    } else {
        matrix->m_rows = 0;
        matrix->m_cols = 0;
    }
}

void matrix_t_clear(matrix_t* matrix)
{
    free(matrix->m_data);
    matrix->m_rows = 0;
    matrix->m_cols = 0;
}

void matrix_t_free(matrix_t* matrix)
{
    matrix_t_clear(matrix);
    free(matrix);
}

double matrix_t_get(matrix_t const* matrix, size_t x, size_t y)
{
    return matrix->m_data[data_index(matrix, x, y)];
}

void matrix_t_set(matrix_t* matrix, size_t x, size_t y, double value)
{
    matrix->m_data[data_index(matrix, x, y)] = value;
}

matrix_t* matrix_t_copy(matrix_t const* matrix)
{
    size_t data_len = sizeof *matrix->m_data * matrix->m_rows * matrix->m_cols;
    matrix_t* copy = matrix_t_alloc(matrix->m_rows, matrix->m_cols);
    if (copy) {
        memcpy(copy->m_data, matrix->m_data, data_len);
    }
    return copy;
}

//matrix_algorithm.c

#include <stdio.h>
#include <pthread.h>
#include <unistd.h>

static size_t max_size_t(size_t x, size_t y)
{
    return x > y ? x : y;
}
static size_t min_size_t(size_t x, size_t y)
{
    return x < y ? x : y;
}

matrix_t* matrix_t_multiply(matrix_t const* matrix1, matrix_t const* matrix2)
{
    if (!matrix1 || !matrix2) {
        return NULL;
    }

    if (!matrix1->m_data || !matrix2->m_data) {
        return NULL;
    }

    if (matrix1->m_cols != matrix2->m_rows) {
        return NULL;
    }

    matrix_t *const result = matrix_t_alloc(matrix1->m_rows, matrix2->m_cols);

    for (size_t y = 0; y != matrix1->m_rows; ++y) {
        for (size_t x = 0; x != matrix2->m_cols; ++x) {
            double sum = 0.0;

            for (size_t i = 0; i != matrix1->m_cols; ++i) {
                sum += matrix_t_get(matrix1, i, y) *
                    matrix_t_get(matrix2, x, i);
            }

            matrix_t_set(result, x, y, sum);
        }
    }

    return result;
}

static const size_t MINIMUM_THREAD_LOAD = 10 * 1000;

typedef struct thread_info {
    matrix_t const* left_matrix;
    matrix_t const* right_matrix;
    matrix_t* result_matrix;
    size_t    start_row;
    size_t    rows;
    pthread_t thread;
} thread_info;

static void* thread_func(void* arg)
{
    thread_info *const info = arg;

    for (size_t y = info->start_row; y < info->start_row + info->rows; ++y) {
        for (size_t x = 0; x < info->right_matrix->m_cols; ++x) {
            double sum = 0.0;

            for (size_t i = 0; i < info->left_matrix->m_cols; ++i) {
                sum += matrix_t_get(info->left_matrix, i, y) *
                    matrix_t_get(info->right_matrix, x, i);
            }

            matrix_t_set(info->result_matrix, x, y, sum);
        }
    }

    return NULL;
}

matrix_t* matrix_t_multiply_parallel(matrix_t const* left_matrix,
                                     matrix_t const* right_matrix)
{
    size_t const work_load =
        left_matrix->m_rows *
        right_matrix->m_cols *
        right_matrix->m_rows;

    size_t num_threads = (size_t)sysconf(_SC_NPROCESSORS_ONLN);
    num_threads = min_size_t(num_threads, work_load / MINIMUM_THREAD_LOAD);
    num_threads = min_size_t(num_threads, left_matrix->m_rows);
    num_threads = max_size_t(num_threads, 1);

    if (num_threads == 1) {
        return matrix_t_multiply(left_matrix, right_matrix);
    }

    matrix_t *const result_matrix = matrix_t_alloc(left_matrix->m_rows,
                                                   right_matrix->m_cols);
    if (!result_matrix) {
        return NULL;
    }

    thread_info *threads = calloc(num_threads, sizeof *threads);
    if (!threads) {
        matrix_t_free(result_matrix);
        return NULL;
    }

    const size_t basic_rows_per_thread = left_matrix->m_rows / num_threads;
    for (size_t i = 0, rows_reserved = 0;
         i != num_threads;
         i++, rows_reserved += basic_rows_per_thread)
        {
            threads[i].left_matrix   = left_matrix;
            threads[i].right_matrix  = right_matrix;
            threads[i].result_matrix = result_matrix;
            threads[i].rows          = basic_rows_per_thread;
            threads[i].start_row     = rows_reserved;
        }

    threads[num_threads - 1].rows +=
        left_matrix->m_rows % basic_rows_per_thread;

    /* block 0 will be run in this thread */
    for (size_t i = 1; i < num_threads; ++i) {
        pthread_create(&threads[i].thread,
                       NULL,
                       thread_func,
                       (void*) &threads[i]);
    }

    thread_func((void*) &threads[0]);

    for (size_t i = 1; i < num_threads; ++i) {
        pthread_join(threads[i].thread, NULL);
    }

    free(threads);

    return result_matrix;
}

void matrix_t_print(matrix_t const* matrix)
{
    for (size_t y = 0; y < matrix->m_rows; ++y) {
        for (size_t x = 0; x < matrix->m_cols; ++x) {
            printf("%f ", matrix_t_get(matrix, x, y));
        }

        puts("");
    }
}

//main.c

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>

static matrix_t* create_random_matrix(size_t rows, size_t cols)
{
    matrix_t* m = malloc(sizeof(*m));
    matrix_t_init(m, rows, cols);

    for (size_t x = 0; x < cols; ++x) {
        for (size_t y = 0; y < rows; ++y) {
            matrix_t_set(m, x, y, ((double) rand()) / RAND_MAX);
        }
    }

    return m;
}

static long get_milliseconds_difference(struct timeval *a, struct timeval *b)
{
    return(a->tv_sec - b->tv_sec) * 1000 + (a->tv_usec - b->tv_usec) / 1000;
}

static int matrix_equals(matrix_t const *a, matrix_t const *b)
{
    if (!a || !b) {
        /* treat NULL like NaN, not equal to itself */
        return !b;
    }

    if (a->m_cols != b->m_cols || a->m_rows != b->m_rows) {
        return 0;
    }

    for (size_t y = 0; y < a->m_rows; ++y) {
        for (size_t x = 0; x < a->m_cols; ++x) {
            if (matrix_t_get(a, x, y) != matrix_t_get(b, x, y)) {
                return 0;
            }
        }
    }

    return 1;
}

int main() {
    struct timeval start_time, end_time;

    unsigned int seed = (unsigned int)time(NULL);
    srand(seed);
    printf("Using random seed = %un", seed);

    matrix_t *a = create_random_matrix(1000, 1000);
    matrix_t *b = matrix_t_copy(a);
    if (!a || !b)
        return EXIT_FAILURE;

    gettimeofday(&start_time, NULL);
    matrix_t *ab1 = matrix_t_multiply(a, b);
    gettimeofday(&end_time, NULL);

    if (!ab1)
        return EXIT_FAILURE;

    printf("Single-threaded multiplication in %ld milliseconds.n",
           get_milliseconds_difference(&end_time, &start_time));

    gettimeofday(&start_time, NULL);
    matrix_t *ab2 = matrix_t_multiply_parallel(a, b);
    gettimeofday(&end_time, NULL);

    if (!ab2)
        return EXIT_FAILURE;

    printf("%zu-threaded multiplication in %ld milliseconds.n",
           sysconf(_SC_NPROCESSORS_ONLN),
           get_milliseconds_difference(&end_time, &start_time));

    printf("Algorithms agree: %dn", matrix_equals(ab1, ab2));

    matrix_t_free(a);
    matrix_t_free(b);
    matrix_t_free(ab1);
    matrix_t_free(ab2);

    return 0;
}

I’ve added a lot more checking throughout, but didn’t feel the need to mention each instance in the review.

Output:

==13358== Memcheck, a memory error detector
==13358== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==13358== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==13358== Command: ./175113
==13358== 
Using random seed = 1504864590
Single-threaded multiplication in 4670 milliseconds.
8-threaded multiplication in 1054 milliseconds.
Algorithms agree: 1
==13358== 
==13358== HEAP SUMMARY:
==13358==     in use at exit: 0 bytes in 0 blocks
==13358==   total heap usage: 17 allocs, 17 frees, 32,003,408 bytes allocated
==13358== 
==13358== All heap blocks were freed -- no leaks are possible
==13358== 
==13358== For counts of detected and suppressed errors, rerun with: -v
==13358== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

(Okay, I cheated – the times are from a non-Valgrind run; it took roughly 20x longer under Memcheck).

Leave a Reply

Your email address will not be published. Required fields are marked *