[LTP] [PATCH 3/3] realtime/matrix_mult: Fix test optimization

Mon Nov 15 16:22:17 CET 2021

Hi!
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (c) International Business Machines  Corp., 2007, 2008
> + * 
> + * Authors: Darren Hart <dvhltc@us.ibm.com>
> + *          Dinakar Guniguntala <dino@in.ibm.com>
> + */
> +/*\
> + * [Description]
>   *
> - *   Copyright ?? International Business Machines  Corp., 2007, 2008
> - *
> - *   This program is free software;  you can redistribute it and/or modify
> - *   it under the terms of the GNU General Public License as published by
> - *   the Free Software Foundation; either version 2 of the License, or
> - *   (at your option) any later version.
> - *
> - *   This program is distributed in the hope that it will be useful,
> - *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
> - *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
> - *   the GNU General Public License for more details.
> - *
> - *   You should have received a copy of the GNU General Public License
> - *   along with this program;  if not, write to the Free Software
> - *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - *
> - * NAME
> - *      matrix_mult.c
> - *
> - * DESCRIPTION
> - *      Compare running sequential matrix multiplication routines
> - *      to running them in parallel to judge mutliprocessor
> - *      performance
> - *
> - * USAGE:
> - *      Use run_auto.sh script in current directory to build and run test.
> - *
> - * AUTHOR
> - *      Darren Hart <dvhltc@us.ibm.com>
> - *
> - * HISTORY
> - *      2007-Mar-09:  Initial version by Darren Hart <dvhltc@us.ibm.com>
> - *      2008-Feb-26:  Closely emulate jvm Dinakar Guniguntala <dino@in.ibm.com>
> - *
> - *****************************************************************************/
> + * Compare running sequential matrix multiplication routines
> + * to running them in parallel to judge multiprocessor
> + * performance
> + */

Ideally the comment changes should be in a separate patch...

>  #include <stdio.h>
>  #include <stdlib.h>
> @@ -69,9 +46,14 @@ static int iterations_percpu;
>  stats_container_t sdat, cdat, *curdat;
>  stats_container_t shist, chist;
>  static pthread_barrier_t mult_start;
> -static pthread_mutex_t mutex_cpu;
>  
> -void usage(void)
> +struct matrices {
> +	double A[MATRIX_SIZE][MATRIX_SIZE];
> +	double B[MATRIX_SIZE][MATRIX_SIZE];
> +	double C[MATRIX_SIZE][MATRIX_SIZE];
> +};
> +
> +static void usage(void)
>  {
>  	rt_help();
>  	printf("matrix_mult specific options:\n");
> @@ -80,7 +62,7 @@ void usage(void)
>  	printf("  -i#	   #: number of iterations\n");
>  }
>  
> -int parse_args(int c, char *v)
> +static int parse_args(int c, char *v)
>  {
>  	int handled = 1;
>  	switch (c) {
> @@ -100,7 +82,7 @@ int parse_args(int c, char *v)
>  	return handled;
>  }
>  
> -void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
> +static void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
>  		 double B[MATRIX_SIZE][MATRIX_SIZE])

These static function conversions should ideally be in a separate patch
as well. At least the patch description does not mention either of these
changes.

>  {
>  	int i, j;
> @@ -112,41 +94,39 @@ void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
>  	}
>  }
>  
> -void matrix_mult(int m_size)
> +static void matrix_mult(struct matrices *matrices)
>  {
> -	double A[m_size][m_size];
> -	double B[m_size][m_size];
> -	double C[m_size][m_size];
>  	int i, j, k;
>  
> -	matrix_init(A, B);
> -	for (i = 0; i < m_size; i++) {
> -		int i_m = m_size - i;
> -		for (j = 0; j < m_size; j++) {
> -			double sum = A[i_m][j] * B[j][i];
> -			for (k = 0; k < m_size; k++)
> -				sum += A[i_m][k] * B[k][j];
> -			C[i][j] = sum;
> +	matrix_init(matrices->A, matrices->B);
> +	for (i = 0; i < MATRIX_SIZE; i++) {
> +		int i_m = MATRIX_SIZE - i;
> +		for (j = 0; j < MATRIX_SIZE; j++) {
> +			double sum = matrices->A[i_m][j] *  matrices->B[j][i];
> +			for (k = 0; k < MATRIX_SIZE; k++)
> +				sum +=  matrices->A[i_m][k] *  matrices->B[k][j];
> +			 matrices->C[i][j] = sum;
>  		}
>  	}
>  }
>  
> -void matrix_mult_record(int m_size, int index)
> +static void matrix_mult_record(struct matrices *matrices, int index)
>  {
>  	nsec_t start, end, delta;
>  	int i;
>  
>  	start = rt_gettime();
>  	for (i = 0; i < ops; i++)
> -		matrix_mult(MATRIX_SIZE);
> +		matrix_mult(matrices);
>  	end = rt_gettime();
>  	delta = (long)((end - start) / NS_PER_US);
>  	curdat->records[index].x = index;
>  	curdat->records[index].y = delta;
>  }
>  
> -int set_affinity(void)
> +static int set_affinity(void)
>  {
> +	static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER;

As well as this change, it looks okay, but it's not listed in the patch
description.

>  	cpu_set_t mask;
>  	int cpuid;
>  
> @@ -166,9 +146,10 @@ int set_affinity(void)
>  	return -1;
>  }
>  
> -void *concurrent_thread(void *thread)
> +static void *concurrent_thread(void *thread)
>  {
>  	struct thread *t = (struct thread *)thread;
> +	struct matrices *matrices = (struct matrices *) t->arg;
>  	int thread_id = (intptr_t) t->id;
>  	int cpuid;
>  	int i;
> @@ -183,18 +164,23 @@ void *concurrent_thread(void *thread)
>  	index = iterations_percpu * thread_id;	/* To avoid stats overlapping */
>  	pthread_barrier_wait(&mult_start);
>  	for (i = 0; i < iterations_percpu; i++)
> -		matrix_mult_record(MATRIX_SIZE, index++);
> +		matrix_mult_record(matrices, index++);
>  
>  	return NULL;
>  }
>  
> -int main_thread(void)
> +static int main_thread(void)
>  {
>  	int ret, i, j;
>  	nsec_t start, end;
>  	long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
>  	float savg, cavg;
>  	int cpuid;
> +	struct matrices *matrices[numcpus];
> +
> +	for (i = 0; i < numcpus; ++i) {
> +		matrices[i] = malloc(sizeof(struct matrices));
> +	}
>  
>  	if (stats_container_init(&sdat, iterations) ||
>  	    stats_container_init(&shist, HIST_BUCKETS) ||
> @@ -205,12 +191,11 @@ int main_thread(void)
>  		exit(1);
>  	}
>  
> -	tids = malloc(sizeof(int) * numcpus);
> +	tids = calloc(numcpus, sizeof(int));
>  	if (!tids) {
>  		perror("malloc");
>  		exit(1);
>  	}
> -	memset(tids, 0, numcpus);
>  
>  	cpuid = set_affinity();
>  	if (cpuid == -1) {
> @@ -223,8 +208,9 @@ int main_thread(void)
>  	curdat->index = iterations - 1;
>  	printf("\nRunning sequential operations\n");
>  	start = rt_gettime();
> -	for (i = 0; i < iterations; i++)
> -		matrix_mult_record(MATRIX_SIZE, i);
> +	for (i = 0; i < iterations; i++) {
> +		matrix_mult_record(matrices[0], i);
> +	}

And here LMKL coding style prefers to avoid the curly braces, but that
is very minor.

>  	end = rt_gettime();
>  	delta = (long)((end - start) / NS_PER_US);
>  
> @@ -256,7 +242,7 @@ int main_thread(void)
>  	online_cpu_id = -1;	/* Redispatch cpus */
>  	/* Create numcpus-1 concurrent threads */
>  	for (j = 0; j < numcpus; j++) {
> -		tids[j] = create_fifo_thread(concurrent_thread, NULL, PRIO);
> +		tids[j] = create_fifo_thread(concurrent_thread, matrices[j], PRIO);
>  		if (tids[j] == -1) {
>  			printf
>  			    ("Thread creation failed (max threads exceeded?)\n");
> @@ -308,6 +294,9 @@ int main_thread(void)
>  	     criteria);
>  	printf("Result: %s\n", ret ? "FAIL" : "PASS");
>  
> +	for (i = 0; i < numcpus; i++)
> +		free(matrices[i]);
> +
>  	return ret;
>  }
>  
> -- 
> 2.25.1
> 
> 
> -- 
> Mailing list info: https://lists.linux.it/listinfo/ltp

-- 
Cyril Hrubis
chrubis@suse.cz