HiCMA
Hierarchical Computations on Manycore Architectures
timing.c
Go to the documentation of this file.
1 
16 /*
17  * @copyright (c) 2009-2014 The University of Tennessee and The University
18  * of Tennessee Research Foundation.
19  * All rights reserved.
20  * @copyright (c) 2012-2016 Inria. All rights reserved.
21  * @copyright (c) 2012-2015 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
22  */
23 /*
24  *
25  * file timing.c
26  *
27  * MORSE auxiliary routines
28  * MORSE is a software package provided by Univ. of Tennessee,
29  * Univ. of California Berkeley and Univ. of Colorado Denver
30  *
31  * version 0.9.0
32  * author Mathieu Faverge
33  * author Dulceneia Becker
34  * author Cedric Castagnede
35  * date 2010-11-15
36  *
37  */
38 #if defined( _WIN32 ) || defined( _WIN64 )
39 #define int64_t __int64
40 #endif
41 
42 /* Define these so that the Microsoft VC compiler stops complaining
43  about scanf and friends */
44 #define _CRT_SECURE_NO_DEPRECATE
45 #define _CRT_SECURE_NO_WARNINGS
46 
47 #include "morse.h"
48 #include "starpu.h"
49 #include "hicma_constants.h"
50 
51 #include <math.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 
56 #if defined( _WIN32 ) || defined( _WIN64 )
57 #include <windows.h>
58 #else /* Non-Windows */
59 #include <unistd.h>
60 #include <sys/resource.h>
61 #endif
62 
63 #include "coreblas/lapacke.h"
64 #include "morse.h"
65 #include "coreblas/coreblas.h"
66 //#include "flops.h"
67 #include "timing.h"
68 #include "control/auxiliary.h"
69 
70 #if defined(CHAMELEON_USE_MPI)
71 #include <mpi.h>
72 #endif
73 
74 #if defined (CHAMELEON_SCHED_STARPU)
75 #include <starpu.h>
76 #endif
77 
78 //static int RunTest(int *iparam, _PREC *dparam, double *t_);
79 void* morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
80 {
81  return (void*)( NULL );
82 }
83 
84 int ISEED[4] = {0,0,0,1}; /* initial seed for zlarnv() */
85 
86 static int
87 Test(int64_t n, int *iparam,
88  double fixed_rank_decay,
89  double wave_k,
90  char* rankfile) {
91  int i, j, iter;
92  int thrdnbr, niter;
93  int64_t M, N, K, NRHS;
94  double *t;
95 #if defined(CHAMELEON_SIMULATION)
96  _PREC eps = 0.;
97 #else
98  _PREC eps = _LAMCH( 'e' );
99 #endif
100  _PREC dparam[IPARAM_DNBPARAM];
101  double fmuls, fadds, fp_per_mul, fp_per_add;
102  double sumgf, sumgf2, sumt, sd, flops, gflops;
103  char *s;
104  char *env[] = {
105  "OMP_NUM_THREADS",
106  "MKL_NUM_THREADS",
107  "GOTO_NUM_THREADS",
108  "ACML_NUM_THREADS",
109  "ATLAS_NUM_THREADS",
110  "BLAS_NUM_THREADS", ""
111  };
112  int gnuplot = 0;
113 
114  /*
115  * if hres = 0 then the test succeed
116  * if hres = n then the test failed n times
117  */
118  int hres = 0;
119 
120  memset( &dparam, 0, IPARAM_DNBPARAM * sizeof(_PREC) );
121  dparam[IPARAM_THRESHOLD_CHECK] = 100.0;
122 
123  thrdnbr = iparam[IPARAM_THRDNBR];
124  niter = iparam[IPARAM_NITER];
125 
126  M = iparam[IPARAM_M];
127  N = iparam[IPARAM_N];
128  K = iparam[IPARAM_K];
129  NRHS = K;
130  (void)M;(void)N;(void)K;(void)NRHS;
131 
132  if ( (n < 0) || (thrdnbr < 0 ) ) {
133  if (gnuplot && (MORSE_My_Mpi_Rank() == 0) ) {
134  printf( "set title '%d_NUM_THREADS: ", thrdnbr );
135  for (i = 0; env[i][0]; ++i) {
136  s = getenv( env[i] );
137 
138  if (i) printf( " " ); /* separating space */
139 
140  for (j = 0; j < 5 && env[i][j] && env[i][j] != '_'; ++j)
141  printf( "%c", env[i][j] );
142 
143  if (s)
144  printf( "=%s", s );
145  else
146  printf( "->%s", "?" );
147  }
148  printf( "'\n" );
149  printf( "%s\n%s\n%s\n%s\n%s%s%s\n",
150  "set xlabel 'Matrix size'",
151  "set ylabel 'Gflop/s'",
152  "set key bottom",
153  gnuplot > 1 ? "set terminal png giant\nset output 'timeplot.png'" : "",
154  "plot '-' using 1:5 title '", _NAME, "' with linespoints" );
155  }
156  return 0;
157  }
158 
159  /*if ( MORSE_My_Mpi_Rank() == 0)*/
160  /*printf( "%7d %7d %7d ", iparam[IPARAM_M], iparam[IPARAM_N], iparam[IPARAM_K] );*/
161  /*fflush( stdout );*/
162 
163  t = (double*)malloc(niter*sizeof(double));
164  memset(t, 0, niter*sizeof(double));
165 
166  if (sizeof(_TYPE) == sizeof(_PREC)) {
167  fp_per_mul = 1;
168  fp_per_add = 1;
169  } else {
170  fp_per_mul = 6;
171  fp_per_add = 2;
172  }
173 
174  fadds = (double)(_FADDS);
175  fmuls = (double)(_FMULS);
176  flops = 1e-9 * (fmuls * fp_per_mul + fadds * fp_per_add); //FIXME FLOPS FOR GEMM!!!
177  gflops = 0.0;
178 
179  dparam[IPARAM_HICMA_STARSH_DECAY] = fixed_rank_decay;
180  dparam[IPARAM_HICMA_STARSH_WAVE_K] = wave_k;
181  if ( iparam[IPARAM_WARMUP] ) {
182  int status = RunTest( iparam, dparam, &(t[0]), rankfile);
183  if (status != MORSE_SUCCESS) return status;
184  }
185 
186  sumgf = 0.0;
187  double sumgf_upper = 0.0;
188  sumgf2 = 0.0;
189  sumt = 0.0;
190 
191  for (iter = 0; iter < niter; iter++)
192  {
193  if( iter == 0 ) {
194  if ( iparam[IPARAM_TRACE] )
195  iparam[IPARAM_TRACE] = 2;
196  if ( iparam[IPARAM_DAG] )
197  iparam[IPARAM_DAG] = 2;
198  if ( iparam[IPARAM_PROFILE] )
199  iparam[IPARAM_PROFILE] = 2;
200 
201  int status = RunTest( iparam, dparam, &(t[iter]), rankfile);
202  if (status != MORSE_SUCCESS) return status;
203 
204  iparam[IPARAM_TRACE] = 0;
205  iparam[IPARAM_DAG] = 0;
206  iparam[IPARAM_PROFILE] = 0;
207  }
208  else {
209  int status = RunTest( iparam, dparam, &(t[iter]), rankfile);
210  if (status != MORSE_SUCCESS) return status;
211  }
212  gflops = flops / t[iter];
213 
214 #if defined (CHAMELEON_SCHED_STARPU)
215  if (iparam[IPARAM_BOUND])
216  {
217  double upper_gflops = 0.0;
218  double tmin = 0.0;
219  double integer_tmin = 0.0;
220 #if 0
221  if (iparam[IPARAM_BOUNDDEPS]) {
222  FILE *out = fopen("bounddeps.pl", "w");
223  starpu_bound_print_lp(out);
224  fclose(out);
225  out = fopen("bound.dot", "w");
226  starpu_bound_print_dot(out);
227  fclose(out);
228  } else {
229  FILE *out = fopen("bound.pl", "w");
230  starpu_bound_print_lp(out);
231  fclose(out);
232 #else
233  {
234 #endif
235  starpu_bound_compute(&tmin, &integer_tmin, 0);
236  upper_gflops = (flops / (tmin / 1000.0));
237  sumgf_upper += upper_gflops;
238  }
239  }
240 #endif
241  sumt += t[iter];
242  sumgf += gflops;
243  sumgf2 += gflops*gflops;
244  }
245  if ( MORSE_My_Mpi_Rank() == 0)
246  printf( "%7d %7d %7d ", iparam[IPARAM_M], iparam[IPARAM_N], iparam[IPARAM_K] );
247  fflush( stdout );
248 
249  gflops = sumgf / niter;
250  sd = sqrt((sumgf2 - (sumgf*sumgf)/niter)/niter);
251 
252  if ( MORSE_My_Mpi_Rank() == 0) {
253  printf( "%9.3f %9.2f +-%7.2f ", sumt/niter, gflops, sd);
254 
255  if (iparam[IPARAM_BOUND] && !iparam[IPARAM_BOUNDDEPS])
256  printf(" %9.2f", sumgf_upper/niter);
257 
258  if ( iparam[IPARAM_PEAK] )
259  {
260  if (dparam[IPARAM_ESTIMATED_PEAK]<0.0f)
261  printf(" n/a n/a ");
262  else
263  printf(" %5.2f%% %9.2f ", 100.0f*(gflops/dparam[IPARAM_ESTIMATED_PEAK]), dparam[IPARAM_ESTIMATED_PEAK]);
264  }
265 
266  if ( iparam[IPARAM_CHECK] ){
267  hres = (dparam[IPARAM_RES] / dparam[IPARAM_XNORM]) > (eps*10.0);
268  /*hres = ( dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ) > dparam[IPARAM_THRESHOLD_CHECK] );*/
269 
270  /*if (hres)*/
271  /*printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE",*/
272  /*dparam[IPARAM_RES], dparam[IPARAM_ANORM], dparam[IPARAM_XNORM], dparam[IPARAM_BNORM],*/
273  /*dparam[IPARAM_RES] / dparam[IPARAM_XNORM] );*/
274  /*[>dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));<]*/
275  /*else*/
276  printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS",
277  dparam[IPARAM_RES], dparam[IPARAM_ANORM], dparam[IPARAM_XNORM], dparam[IPARAM_BNORM],
278  dparam[IPARAM_RES] / dparam[IPARAM_XNORM] );
279  /*dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));*/
280  }
281 
282  if ( iparam[IPARAM_INVERSE] )
283  printf( " %8.5e %8.5e %8.5e %8.5e",
284  dparam[IPARAM_RNORM], dparam[IPARAM_ANORM], dparam[IPARAM_AinvNORM],
285  dparam[IPARAM_RNORM] /((dparam[IPARAM_ANORM] * dparam[IPARAM_AinvNORM])*n*eps));
286 
287  printf("\n");
288 
289  fflush( stdout );
290  }
291  free(t);
292 
293  hres = 0;
294  return hres;
295  }
296 
297  static int
298  startswith(const char *s, const char *prefix) {
299  size_t n = strlen( prefix );
300  if (strncmp( s, prefix, n ))
301  return 0;
302  return 1;
303  }
304 
305  static int
306  get_range(char *range, int *start_p, int *stop_p, int *step_p) {
307  char *s, *s1, buf[21];
308  int colon_count, copy_len, nbuf=20, n;
309  int start=1000, stop=10000, step=1000;
310 
311  colon_count = 0;
312  for (s = strchr( range, ':'); s; s = strchr( s+1, ':'))
313  colon_count++;
314 
315  if (colon_count == 0) { /* No colon in range. */
316  if (sscanf( range, "%d", &start ) < 1 || start < 1)
317  return -1;
318  step = start / 10;
319  if (step < 1) step = 1;
320  stop = start + 10 * step;
321 
322  } else if (colon_count == 1) { /* One colon in range.*/
323  /* First, get the second number (after colon): the stop value. */
324  s = strchr( range, ':' );
325  if (sscanf( s+1, "%d", &stop ) < 1 || stop < 1)
326  return -1;
327 
328  /* Next, get the first number (before colon): the start value. */
329  n = s - range;
330  copy_len = n > nbuf ? nbuf : n;
331  strncpy( buf, range, copy_len );
332  buf[copy_len] = 0;
333  if (sscanf( buf, "%d", &start ) < 1 || start > stop || start < 1)
334  return -1;
335 
336  /* Let's have 10 steps or less. */
337  step = (stop - start) / 10;
338  if (step < 1)
339  step = 1;
340  } else if (colon_count == 2) { /* Two colons in range. */
341  /* First, get the first number (before the first colon): the start value. */
342  s = strchr( range, ':' );
343  n = s - range;
344  copy_len = n > nbuf ? nbuf : n;
345  strncpy( buf, range, copy_len );
346  buf[copy_len] = 0;
347  if (sscanf( buf, "%d", &start ) < 1 || start < 1)
348  return -1;
349 
350  /* Next, get the second number (after the first colon): the stop value. */
351  s1 = strchr( s+1, ':' );
352  n = s1 - (s + 1);
353  copy_len = n > nbuf ? nbuf : n;
354  strncpy( buf, s+1, copy_len );
355  buf[copy_len] = 0;
356  if (sscanf( buf, "%d", &stop ) < 1 || stop < start)
357  return -1;
358 
359  /* Finally, get the third number (after the second colon): the step value. */
360  if (sscanf( s1+1, "%d", &step ) < 1 || step < 1)
361  return -1;
362  } else
363 
364  return -1;
365 
366  *start_p = start;
367  *stop_p = stop;
368  *step_p = step;
369 
370  return 0;
371  }
372 
373  static void
374  show_help(char *prog_name) {
375  printf( "Usage:\n%s [options]\n\n", prog_name );
376  printf( "Options are:\n"
377  " --help Show this help\n"
378  "\n"
379  " --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n"
380  " --gpus=X Number of GPU workers (default: 0)\n"
381  "\n"
382  " --[a]sync Enable/Disable synchronous calls in wrapper function such as POTRI. (default: async)\n"
383  " --[no]bigmat Allocating one big mat or plenty of small (default: bigmat)\n"
384  " --[no]check Check result (default: nocheck)\n"
385  " --[no]progress Display progress indicator (default: noprogress)\n"
386  " --[no]gemm3m Use gemm3m complex method (default: nogemm3m)\n"
387  " --[no]inv Check on inverse (default: noinv)\n"
388  " --[no]warmup Perform a warmup run to pre-load libraries (default: warmup)\n"
389  " --[no]trace Enable/Disable trace generation (default: notrace)\n"
390  " --[no]dag Enable/Disable DAG generation (default: nodag)\n"
391  " Generates a dot_dag_file.dot.\n"
392  " --[no]profile Print profiling informations (default: noprofile)\n"
393  " --nocpu All GPU kernels are exclusively executed on GPUs (default: 0)\n"
394  /* " --inplace Enable layout conversion inplace for lapack interface timers (default: enable)\n" */
395  /* " --outplace Enable layout conversion out of place for lapack interface timers (default: disable)\n" */
396  /* " --[no]atun Activate autotuning (default: noatun)\n" */
397  "\n"
398  " --n_range=R Range of N values\n"
399  " with R=Start:Stop:Step (default: 500:5000:500)\n"
400  " --m=X dimension (M) of the matrices (default: N)\n"
401  " --k=X dimension (K) of the matrices (default: 1)\n"
402  " --nrhs=X Number of right-hand size (default: 1)\n"
403  " --nb=N Nb size. (default: 128)\n"
404  " --ib=N IB size. (default: 32)\n"
405  "\n"
406  " --niter=N Number of iterations performed for each test (default: 1)\n"
407  "\n"
408  " --rhblk=N If N > 0, enable Householder mode for QR and LQ factorization\n"
409  " N is the size of each subdomain (default: 0)\n"
410  " --rk fixed rank\n"
411  " --acc fixed accuracy is used if fixed rank is equal to zero. This value is also used by STARSH-H in generation of matrix. Rndtiled also depends on this variable\n"
412  " --starshmaxrank buffer sizes in generation of matrices hcore_gytlr and the limit used in QR factorizations and SVD of hcore_gemm to verify that ranks do not excess current allocation.\n"
413  " Use one of the flags for selecting problem type:\n"
414  " --ss Spatial statistics with square exp kernel\n"
415  " --geostat Spatial statistics with Matern kernel\n"
416  " --edsin Electro dynamics with Sinus\n"
417  " --rnd Random matrix\n"
418  /* "\n" */
419  /* " Options specific to the conversion format timings xgetri and xgecfi:\n" */
420  /* " --ifmt Input format. (default: 0)\n" */
421  /* " --ofmt Output format. (default: 1)\n" */
422  /* " The possible values are:\n" */
423  /* " 0 - morseCM, Column major\n" */
424  /* " 1 - morseCCRB, Column-Colum rectangular block\n" */
425  /* " 2 - morseCRRB, Column-Row rectangular block\n" */
426  /* " 3 - morseRCRB, Row-Colum rectangular block\n" */
427  /* " 4 - morseRRRB, Row-Row rectangular block\n" */
428  /* " 5 - morseRM, Row Major\n" */
429  /* " --thrdbypb Number of threads per subproblem for inplace transformation (default: 1)\n" */
430  "\n");
431  }
432 
433 
434  static void
435  print_header(char *prog_name, int * iparam,
436  double fixed_rank_decay, double wave_k) {
437  const char *bound_header = iparam[IPARAM_BOUND] ? " thGflop/s" : "";
438  //const char *check_header = iparam[IPARAM_CHECK] ? " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN" : "";
439  const char *check_header = iparam[IPARAM_CHECK] ? " ||DC-TLR|| ||init DC|| ||DC|| ||TLR|| ||DC-TLR||/||DC|| RETURN" : "";
440  const char *inverse_header = iparam[IPARAM_INVERSE] ? " ||I-A*Ainv|| ||A|| ||Ainv|| ||Id - A*Ainv||/((||A|| ||Ainv||).N.eps)" : "";
441  const char *peak_header = iparam[IPARAM_PEAK] ? " (% of peak) peak" : "";
442 #if defined(CHAMELEON_SIMULATION)
443  _PREC eps = 0.;
444 #else
445  _PREC eps = _LAMCH( 'e' );
446 #endif
447 
448  printf( "#\n"
449  "# morse %s\n"
450  "# Nb threads: %d\n"
451  "# Nb GPUs: %d\n"
452 #if defined(CHAMELEON_USE_MPI)
453  "# Nb mpi: %d\n"
454  "# PxQ: %dx%d\n"
455 #endif
456  "# MB: %d\n"
457  "# NB: %d\n"
458  "# IB: %d\n"
459  "# eps: %e\n"
460  "# fixed rank: %d\n"
461  "# fixed acc: %.1e\n"
462  "# alwaysfixedrank: %d\n"
463  "# wave_k: %g\n"
464  "# shmaxrk: %d\n"
465  "# shprob: %d\n"
466  "# shdecay: %e\n"
467  "#\n",
468  prog_name,
469  iparam[IPARAM_THRDNBR],
470  iparam[IPARAM_NCUDAS],
471 #if defined(CHAMELEON_USE_MPI)
472  iparam[IPARAM_NMPI],
473  iparam[IPARAM_P], iparam[IPARAM_Q],
474 #endif
475  iparam[IPARAM_MB],
476  iparam[IPARAM_NB],
477  iparam[IPARAM_IB],
478  eps,
479  iparam[IPARAM_RK],
480  //iparam[IPARAM_ACC],
481  pow(10, -1.0*iparam[IPARAM_ACC]),
483  wave_k,
485  //iparam[IPARAM_HICMA_MAXRANK],
486  iparam[IPARAM_HICMA_STARSH_PROB],
487  //iparam[IPARAM_HICMA_STARSH_DECAY],
488  fixed_rank_decay
489  //pow(10, -1.0*iparam[IPARAM_HICMA_STARSH_DECAY])
490  );
491 
492  printf( "# M N K/NRHS seconds Gflop/s Deviation%s%s%s\n",
493  bound_header, peak_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header);
494  return;
495  }
496 
497  static void
498  get_thread_count(int *thrdnbr) {
499 #if defined WIN32 || defined WIN64
500  sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr );
501 #else
502  *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN);
503 #endif
504  }
505 
506  int
507  main(int argc, char *argv[]) {
508  int i, m, mx, nx;
509  int nbnode = 1;
510  int start = 500;
511  int stop = 5000;
512  int step = 500;
513  int iparam[IPARAM_SIZEOF];
514  int success = 0;
515 
516  double fixed_rank_decay = 0.0;
517  double wave_k = 0.0;
518  char* rankfile = calloc(2048, sizeof(char));
519  rankfile[0] = '\0';
520 
521  memset(iparam, 0, IPARAM_SIZEOF*sizeof(int));
522 
523  iparam[IPARAM_THRDNBR ] = -1;
524  iparam[IPARAM_THRDNBR_SUBGRP] = 1;
525  iparam[IPARAM_SCHEDULER ] = 0;
526  iparam[IPARAM_M ] = -1;
527  iparam[IPARAM_N ] = 500;
528  iparam[IPARAM_K ] = 1;
529  iparam[IPARAM_LDA ] = -1;
530  iparam[IPARAM_LDB ] = -1;
531  iparam[IPARAM_LDC ] = -1;
532  iparam[IPARAM_MB ] = 128;
533  iparam[IPARAM_NB ] = 128;
534  iparam[IPARAM_IB ] = 32;
535  iparam[IPARAM_NITER ] = 1;
536  iparam[IPARAM_WARMUP ] = 1;
537  iparam[IPARAM_CHECK ] = 0;
538  iparam[IPARAM_BIGMAT ] = 1;
539  iparam[IPARAM_VERBOSE ] = 0;
540  iparam[IPARAM_AUTOTUNING ] = 0;
541  iparam[IPARAM_INPUTFMT ] = 0;
542  iparam[IPARAM_OUTPUTFMT ] = 0;
543  iparam[IPARAM_TRACE ] = 0;
544  iparam[IPARAM_DAG ] = 0;
545  iparam[IPARAM_ASYNC ] = 1;
546  iparam[IPARAM_MX ] = -1;
547  iparam[IPARAM_NX ] = -1;
548  iparam[IPARAM_RHBLK ] = 0;
549  iparam[IPARAM_MX ] = -1;
550  iparam[IPARAM_NX ] = -1;
551  iparam[IPARAM_RHBLK ] = 0;
552  iparam[IPARAM_INPLACE ] = MORSE_OUTOFPLACE;
553  iparam[IPARAM_MODE ] = 0;
554 
555  iparam[IPARAM_INVERSE ] = 0;
556  iparam[IPARAM_NCUDAS ] = 0;
557  iparam[IPARAM_NMPI ] = 1;
558  iparam[IPARAM_P ] = 1;
559  iparam[IPARAM_Q ] = 1;
560  iparam[IPARAM_GEMM3M ] = 0;
561  iparam[IPARAM_PROGRESS ] = 0;
562  iparam[IPARAM_PROFILE ] = 0;
563  iparam[IPARAM_PRINT_WARNINGS] = 0;
564  iparam[IPARAM_PEAK ] = 0;
565  iparam[IPARAM_PARALLEL_TASKS] = 0;
566  iparam[IPARAM_NO_CPU ] = 0;
567  iparam[IPARAM_BOUND ] = 0;
568  iparam[IPARAM_BOUNDDEPS ] = 0;
569  iparam[IPARAM_BOUNDDEPSPRIO ] = 0;
570  iparam[IPARAM_RK ] = 0;
571  iparam[IPARAM_ACC ] = 1;
572  iparam[IPARAM_HICMA_ALWAYS_FIXED_RANK] = 0;
574  iparam[IPARAM_HICMA_STARSH_MAXRANK] = 10;
575  iparam[IPARAM_HICMA_MAXRANK] = 10;
576  iparam[IPARAM_HICMA_PRINTMAT] = 0;
577  iparam[IPARAM_HICMA_PRINTINDEX] = 0;
578  iparam[IPARAM_HICMA_PRINTINDEXEND] = 0;
579 
580  for (i = 1; i < argc && argv[i]; ++i) {
581  if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") ||
582  startswith( argv[i], "--h") || startswith( argv[i], "-h") ) {
583  show_help( argv[0] );
584  return EXIT_SUCCESS;
585  } else if (startswith( argv[i], "--rk=" )) {
586  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_RK]) );
587  } else if (startswith( argv[i], "--acc=" )) {
588  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_ACC]) );
589  } else if (startswith( argv[i], "--alwaysfixedrank" )) {
590  iparam[IPARAM_HICMA_ALWAYS_FIXED_RANK] = 1;
591  } else if (startswith( argv[i], "--matfile=" )) {
593  sscanf( strchr( argv[i], '=' ) + 1, "%s", strmatfile );
594  } else if (startswith( argv[i], "--rndusr" )) {
596  } else if (startswith( argv[i], "--rnd" )) {
598  } else if (startswith( argv[i], "--ss" )) {
600  } else if (startswith( argv[i], "--geostat" )) {
602  } else if (startswith( argv[i], "--edsin" )) {
604  } else if (startswith( argv[i], "--starshwavek=" )) {
605  sscanf( strchr( argv[i], '=' ) + 1, "%lf", &(wave_k) );
606  } else if (startswith( argv[i], "--starshdecay=" )) {
607  sscanf( strchr( argv[i], '=' ) + 1, "%lf", &(fixed_rank_decay) );
608  } else if (startswith( argv[i], "--starshmaxrank=" )) {
609  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_HICMA_STARSH_MAXRANK]) );
610  } else if (startswith( argv[i], "--maxrank=" )) {
611  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_HICMA_MAXRANK]) );
612  } else if (startswith( argv[i], "--printmat" )) {
613  iparam[IPARAM_HICMA_PRINTMAT] = 1;
614  } else if (startswith( argv[i], "--printindexall" )) {
615  iparam[IPARAM_HICMA_PRINTINDEX] = 1;
616  } else if (startswith( argv[i], "--rankfile" )) {
617  sscanf( strchr( argv[i], '=' ) + 1, "%s", rankfile );
618  } else if (startswith( argv[i], "--printindexend" )) {
619  iparam[IPARAM_HICMA_PRINTINDEXEND] = 1;
620  } else if (startswith( argv[i], "--threads=" )) {
621  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) );
622  } else if (startswith( argv[i], "--gpus=" )) {
623  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NCUDAS]) );
624  } else if (startswith( argv[i], "--check" )) {
625  iparam[IPARAM_CHECK] = 1;
626  } else if (startswith( argv[i], "--nocheck" )) {
627  iparam[IPARAM_CHECK] = 0;
628  } else if (startswith( argv[i], "--bigmat" )) {
629  iparam[IPARAM_BIGMAT] = 1;
630  } else if (startswith( argv[i], "--nobigmat" )) {
631  iparam[IPARAM_BIGMAT] = 0;
632  } else if (startswith( argv[i], "--inv" )) {
633  iparam[IPARAM_INVERSE] = 1;
634  } else if (startswith( argv[i], "--noinv" )) {
635  iparam[IPARAM_INVERSE] = 0;
636  } else if (startswith( argv[i], "--warmup" )) {
637  iparam[IPARAM_WARMUP] = 1;
638  } else if (startswith( argv[i], "--nowarmup" )) {
639  iparam[IPARAM_WARMUP] = 0;
640  /* } else if (startswith( argv[i], "--atun" )) { */
641  /* iparam[IPARAM_AUTOTUNING] = 1; */
642  /* } else if (startswith( argv[i], "--noatun" )) { */
643  /* iparam[IPARAM_AUTOTUNING] = 0; */
644  } else if (startswith( argv[i], "--trace" )) {
645  iparam[IPARAM_TRACE] = 1;
646  } else if (startswith( argv[i], "--notrace" )) {
647  iparam[IPARAM_TRACE] = 0;
648  } else if (startswith( argv[i], "--gemm3m" )) {
649  iparam[IPARAM_GEMM3M] = 1;
650  } else if (startswith( argv[i], "--nogemm3m" )) {
651  iparam[IPARAM_GEMM3M] = 0;
652  } else if (startswith( argv[i], "--progress" )) {
653  iparam[IPARAM_PROGRESS] = 1;
654  } else if (startswith( argv[i], "--noprogress" )) {
655  iparam[IPARAM_PROGRESS] = 0;
656  } else if (startswith( argv[i], "--dag" )) {
657  iparam[IPARAM_DAG] = 1;
658  } else if (startswith( argv[i], "--nodag" )) {
659  iparam[IPARAM_DAG] = 0;
660  } else if (startswith( argv[i], "--sync" )) {
661  iparam[IPARAM_ASYNC] = 0;
662  } else if (startswith( argv[i], "--async" )) {
663  iparam[IPARAM_ASYNC] = 1;
664  } else if (startswith( argv[i], "--n_range=" )) {
665  get_range( strchr( argv[i], '=' ) + 1, &start, &stop, &step );
666  } else if (startswith( argv[i], "--m=" )) {
667  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_M]) );
668  } else if (startswith( argv[i], "--nb=" )) {
669  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) );
670  //iparam[IPARAM_MB] = iparam[IPARAM_NB]; WHY THIS ASSUMPTION? @KADIR
671  } else if (startswith( argv[i], "--mb=" )) {
672  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_MB]) );
673  } else if (startswith( argv[i], "--nrhs=" )) {
674  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_K]) );
675  } else if (startswith( argv[i], "--k=" )) {
676  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_K]) );
677  } else if (startswith( argv[i], "--ib=" )) {
678  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_IB]) );
679  } else if (startswith( argv[i], "--niter=" )) {
680  sscanf( strchr( argv[i], '=' ) + 1, "%d", &iparam[IPARAM_NITER] );
681  } else if (startswith( argv[i], "--mx=" )) {
682  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_MX]) );
683  } else if (startswith( argv[i], "--nx=" )) {
684  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NX]) );
685  } else if (startswith( argv[i], "--rhblk=" )) {
686  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_RHBLK]) );
687  /* } else if (startswith( argv[i], "--inplace" )) { */
688  /* iparam[IPARAM_INPLACE] = morse_INPLACE; */
689  /* } else if (startswith( argv[i], "--outplace" )) { */
690  /* iparam[IPARAM_INPLACE] = morse_OUTOFPLACE; */
691  /* } else if (startswith( argv[i], "--ifmt=" )) { */
692  /* sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_INPUTFMT]) ); */
693  /* } else if (startswith( argv[i], "--ofmt=" )) { */
694  /* sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_OUTPUTFMT]) ); */
695  /* } else if (startswith( argv[i], "--thrdbypb=" )) { */
696  /* sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR_SUBGRP]) ); */
697  } else if (startswith( argv[i], "--profile" )) {
698  iparam[IPARAM_PROFILE] = 1;
699  } else if (startswith( argv[i], "--peak" )) {
700  iparam[IPARAM_PEAK] = 1;
701  } else if (startswith( argv[i], "--noprofile" )) {
702  iparam[IPARAM_PROFILE] = 0;
703  } else if (startswith( argv[i], "--printerrors" )) {
704  iparam[IPARAM_PRINT_WARNINGS] = 1;
705  /* } else if (startswith( argv[i], "--parallel=" )) { */
706  /* sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_PARALLEL_TASKS]) ); */
707  /* } else if (startswith( argv[i], "--noparallel" )) { */
708  /* iparam[IPARAM_PARALLEL_TASKS] = 0; */
709  } else if (startswith( argv[i], "--nocpu" )) {
710  iparam[IPARAM_NO_CPU] = 1;
711  } else if (startswith( argv[i], "--bounddepsprio" )) {
712  iparam[IPARAM_BOUND] = 1;
713  iparam[IPARAM_BOUNDDEPS] = 1;
714  iparam[IPARAM_BOUNDDEPSPRIO] = 1;
715  } else if (startswith( argv[i], "--bounddeps" )) {
716  iparam[IPARAM_BOUND] = 1;
717  iparam[IPARAM_BOUNDDEPS] = 1;
718  } else if (startswith( argv[i], "--bound" )) {
719  iparam[IPARAM_BOUND] = 1;
720  } else if (startswith( argv[i], "--p=" )) {
721  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_P]) );
722  } else if (startswith( argv[i], "--mode=" )) {
723  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_MODE]) );
724  if (iparam[IPARAM_MODE] < 0 || iparam[IPARAM_MODE] > 20){
725  fprintf( stderr, "Invalid mode: %s from 0 to 20\n", argv[i] );
726  exit(0);
727  }
728  } else {
729  fprintf( stderr, "Unknown option: %s\n", argv[i] );
730  }
731  }
732 #if !defined(CHAMELEON_USE_CUDA)
733  if (iparam[IPARAM_NCUDAS] != 0){
734  fprintf(stderr, "ERROR: MORSE_USE_CUDA is not defined. "
735  "The number of CUDA devices must be set to 0 (--gpus=0).\n");
736  return EXIT_FAILURE;
737  }
738 #endif
739 
740  if ( iparam[IPARAM_THRDNBR] == -1 ) {
741  get_thread_count( &(iparam[IPARAM_THRDNBR]) );
742  iparam[IPARAM_THRDNBR] -= iparam[IPARAM_NCUDAS];
743  }
744 
745  m = iparam[IPARAM_M];
746  mx = iparam[IPARAM_MX];
747  nx = iparam[IPARAM_NX];
748 
749  /* Initialize morse */
750  MORSE_Init( iparam[IPARAM_THRDNBR],
751  iparam[IPARAM_NCUDAS] );
752 
753  /* Stops profiling here to avoid profiling uninteresting routines.
754  It will be reactivated in the time_*.c routines with the macro START_TIMING() */
755  RUNTIME_stop_profiling();
756 
757  MORSE_Disable(MORSE_AUTOTUNING);
758  MORSE_Set(MORSE_TILE_SIZE, iparam[IPARAM_NB] );
759  MORSE_Set(MORSE_INNER_BLOCK_SIZE, iparam[IPARAM_IB] );
760 
761  /* Householder mode */
762  if (iparam[IPARAM_RHBLK] < 1) {
763  MORSE_Set(MORSE_HOUSEHOLDER_MODE, MORSE_FLAT_HOUSEHOLDER);
764  } else {
765  MORSE_Set(MORSE_HOUSEHOLDER_MODE, MORSE_TREE_HOUSEHOLDER);
766  MORSE_Set(MORSE_HOUSEHOLDER_SIZE, iparam[IPARAM_RHBLK]);
767  }
768 
769  if (iparam[IPARAM_PROFILE] == 1)
770  MORSE_Enable(MORSE_PROFILING_MODE);
771 
772  if (iparam[IPARAM_PRINT_WARNINGS] == 1)
773  MORSE_Enable(MORSE_WARNINGS);
774 
775  if (iparam[IPARAM_PROGRESS] == 1)
776  MORSE_Enable(MORSE_PROGRESS);
777 
778  if (iparam[IPARAM_GEMM3M] == 1)
779  MORSE_Enable(MORSE_GEMM3M);
780 
781 #if defined(CHAMELEON_USE_MPI)
782  nbnode = MORSE_Comm_size( );
783  iparam[IPARAM_NMPI] = nbnode;
784  /* Check P */
785  if ( (iparam[IPARAM_P] > 1) &&
786  (nbnode % iparam[IPARAM_P] != 0) ) {
787  fprintf(stderr, "ERROR: %d doesn't divide the number of node %d\n",
788  iparam[IPARAM_P], nbnode );
789  return EXIT_FAILURE;
790  }
791 #endif
792  iparam[IPARAM_Q] = nbnode / iparam[IPARAM_P];
793 
794  /* Layout conversion */
795  MORSE_Set(MORSE_TRANSLATION_MODE, iparam[IPARAM_INPLACE]);
796 
797  if ( MORSE_My_Mpi_Rank() == 0 )
798  print_header( argv[0], iparam, fixed_rank_decay, wave_k);
799 
800  if (step < 1) step = 1;
801 
802  int status = Test( -1, iparam, fixed_rank_decay, wave_k, rankfile ); /* print header */
803  if (status != MORSE_SUCCESS) return status;
804  for (i = start; i <= stop; i += step)
805  {
806  if ( nx > 0 ) {
807  iparam[IPARAM_M] = i;
808  iparam[IPARAM_N] = chameleon_max(1, i/nx);
809  } else if ( mx > 0 ) {
810  iparam[IPARAM_M] = chameleon_max(1, i/mx);
811  iparam[IPARAM_N] = i;
812  } else {
813  if ( m == -1 )
814  iparam[IPARAM_M] = i;
815  iparam[IPARAM_N] = i;
816  }
817  int status = Test( iparam[IPARAM_N], iparam, fixed_rank_decay, wave_k, rankfile );
818  if (status != MORSE_SUCCESS) return status;
819  success += status;
820  }
821 
822  MORSE_Finalize();
823  starpu_data_display_memory_stats();
824  free(rankfile);
825  return success;
826  }
827 
#define _FMULS
Definition: timing.h:35
int RunTest(int *iparam, double *dparam, morse_time_t *t_, char *rankfile)
#define HICMA_STARSH_PROB_EDSIN
#define A(m, n)
Definition: pzgemm.c:56
#define HICMA_STARSH_PROB_GEOSTAT
#define _FADDS
Definition: timing.h:36
#define HICMA_STARSH_PROB_RND
#define _PREC
Definition: timing.h:32
char strmatfile[LEN_STR_MAT_FILE]
#define HICMA_STARSH_PROB_SS
#define _TYPE
Definition: timing.h:31
int ISEED[4]
Definition: timing.c:84
#define HICMA_STARSH_PROB_FILE
#define HICMA_STARSH_PROB_RNDUSR
#define _LAMCH
Definition: timing.h:33
void * morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
Definition: timing.c:79
#define _NAME
Definition: timing.h:37
int main(int argc, char *argv[])
Definition: timing.c:507