HiCMA
Hierarchical Computations on Manycore Architectures
timing.h
Go to the documentation of this file.
1 
18 /*
19  * @copyright (c) 2009-2014 The University of Tennessee and The University
20  * of Tennessee Research Foundation.
21  * All rights reserved.
22  * @copyright (c) 2012-2016 Inria. All rights reserved.
23  * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
24  */
25 
26 #ifndef TIMING_H
27 #define TIMING_H
28 
29 #include "morse.h"
30 
31 #define _TYPE double
32 #define _PREC double
33 #define _LAMCH LAPACKE_dlamch_work
34 /* See Lawn 41 page 120 */
35 #define _FMULS 0 //FMULS_GEMM(M, N, K) //FIXME
36 #define _FADDS 0 //FADDS_GEMM(M, N, K) //FIXME
37 #define _NAME "HICMA_zgemm_Tile"
38 
39 
40 
41 
42 typedef double morse_time_t;
43 
44 
45 int RunTest(int *iparam, double *dparam, morse_time_t *t_, char* rankfile);
46 void* morse_getaddr_null(const MORSE_desc_t *A, int m, int n);
47 
49  IPARAM_THRDNBR, /* Number of cores */
50  IPARAM_THRDNBR_SUBGRP, /* Number of cores in a subgroup (NUMA node) */
51  IPARAM_SCHEDULER, /* What scheduler do we choose (dyn, stat) */
52  IPARAM_M, /* Number of rows of the matrix */
53  IPARAM_N, /* Number of columns of the matrix */
54  IPARAM_K, /* RHS or K */
55  IPARAM_LDA, /* Leading dimension of A */
56  IPARAM_LDB, /* Leading dimension of B */
57  IPARAM_LDC, /* Leading dimension of C */
58  IPARAM_IB, /* Inner-blocking size */
59  IPARAM_NB, /* Number of columns in a tile */
60  IPARAM_MB, /* Number of rows in a tile */
61  IPARAM_NITER, /* Number of iteration of each test */
62  IPARAM_WARMUP, /* Run one test to load dynamic libraries */
63  IPARAM_BIGMAT, /* Allocating one big mat or plenty of small */
64  IPARAM_CHECK, /* Checking activated or not */
65  IPARAM_VERBOSE, /* How much noise do we want? */
66  IPARAM_AUTOTUNING, /* Disable/enable autotuning */
67  IPARAM_INPUTFMT, /* Input format (Use only for getmi/gecfi) */
68  IPARAM_OUTPUTFMT, /* Output format (Use only for getmi/gecfi) */
69  IPARAM_TRACE, /* Generate trace on the first non warmup run */
70  IPARAM_DAG, /* Do we require to output the DOT file? */
71  IPARAM_ASYNC, /* Asynchronous calls */
72  IPARAM_MX, /* */
73  IPARAM_NX, /* */
74  IPARAM_RHBLK, /* Householder reduction parameter for QR/LQ */
75  IPARAM_INPLACE, /* InPlace/OutOfPlace translation mode */
76  IPARAM_MODE, /* Eigenvalue generation mode */
77 
81  IPARAM_P, /* Parameter for 2D cyclic distribution */
82  IPARAM_Q, /* Parameter for 2D cyclic distribution */
83 
84  IPARAM_PROGRESS, /* Use a progress indicator during computations */
85  IPARAM_GEMM3M, /* Use GEMM3M for complex matrix vector products */
86  /* Added for StarPU version */
104  /* End */
106 };
107 
117  /* Begin section for hydra integration tool */
118  IPARAM_THRESHOLD_CHECK, /* Maximum value accepted for: |Ax-b||/N/eps/(||A||||x||+||b||) */
121  /* End section for hydra integration tool */
123 };
124 
125 #define PASTE_CODE_IPARAM_LOCALS(iparam) \
126  double t; \
127  int64_t M = iparam[IPARAM_M]; \
128  int64_t N = iparam[IPARAM_N]; \
129  int64_t K = iparam[IPARAM_K]; \
130  int64_t NRHS = K; \
131  int64_t LDA = chameleon_max(M, iparam[IPARAM_LDA]); \
132  int64_t LDB = chameleon_max(N, iparam[IPARAM_LDB]); \
133  int64_t LDC = chameleon_max(K, iparam[IPARAM_LDC]); \
134  int64_t IB = iparam[IPARAM_IB]; \
135  int64_t MB = iparam[IPARAM_MB]; \
136  int64_t NB = iparam[IPARAM_NB]; \
137  int64_t P = iparam[IPARAM_P]; \
138  int64_t Q = iparam[IPARAM_Q]; \
139  int64_t MT = (M%MB==0) ? (M/MB) : (M/MB+1); \
140  int64_t NT = (N%NB==0) ? (N/NB) : (N/NB+1); \
141  int bigmat = iparam[IPARAM_BIGMAT]; \
142  int check = iparam[IPARAM_CHECK]; \
143  int loud = iparam[IPARAM_VERBOSE]; \
144  (void)M;(void)N;(void)K;(void)NRHS; \
145  (void)LDA;(void)LDB;(void)LDC; \
146  (void)IB;(void)MB;(void)NB;(void)P;(void)Q; \
147  (void)MT;(void)NT;(void)check;(void)loud;(void)bigmat;
148 
149 /* Paste code to allocate a matrix in desc if cond_init is true */
150 #define PASTE_CODE_ALLOCATE_MATRIX_TILE(_desc_, _cond_, _type_, _type2_, _lda_, _m_, _n_) \
151  MORSE_desc_t *_desc_ = NULL; \
152  int status ## _desc_ ; \
153  if( _cond_ ) { \
154  if (!bigmat){ \
155  status ## _desc_ = MORSE_Desc_Create_User(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
156  P, Q, morse_getaddr_null, NULL, NULL);\
157  }\
158  else {\
159  status ## _desc_ = MORSE_Desc_Create(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
160  P, Q);\
161  }\
162  if (status ## _desc_ != MORSE_SUCCESS) return (status ## _desc_); \
163  }
164 
165 #define PASTE_CODE_FREE_MATRIX(_desc_) \
166  MORSE_Desc_Destroy( &_desc_ );
167 
168 #define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_) \
169  _type_ *_name_ = NULL; \
170  if ( _cond_ ) { \
171  _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_)); \
172  if ( ! _name_ ) { \
173  fprintf(stderr, "Out of Memory for %s\n", #_name_); \
174  return -1; \
175  } \
176  MORSE_Tile_to_Lapack(_desc_, (void*)_name_, _lda_); \
177  }
178 
179 #define PASTE_CODE_ALLOCATE_MATRIX(_name_, _cond_, _type_, _lda_, _n_) \
180  _type_ *_name_ = NULL; \
181  if( _cond_ ) { \
182  _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) ); \
183  if ( ! _name_ ) { \
184  fprintf(stderr, "Out of Memory for %s\n", #_name_); \
185  return -1; \
186  } \
187  }
188 
189 #define PASTE_CODE_ALLOCATE_COPY(_name_, _cond_, _type_, _orig_, _lda_, _n_) \
190  _type_ *_name_ = NULL; \
191  if( _cond_ ) { \
192  _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) ); \
193  if ( ! _name_ ) { \
194  fprintf(stderr, "Out of Memory for %s\n", #_name_); \
195  return -1; \
196  } \
197  memcpy(_name_, _orig_, (_lda_) * (_n_) * sizeof(_type_) ); \
198  }
199 
200 /*********************
201  *
202  * Macro for trace generation
203  *
204  */
205 #define START_TRACING() \
206  RUNTIME_start_stats(); \
207  if(iparam[IPARAM_TRACE] == 2) { \
208  RUNTIME_start_profiling(); \
209  } \
210  if(iparam[IPARAM_BOUND]) { \
211  MORSE_Enable(MORSE_BOUND); \
212  }
213 
214 #define STOP_TRACING() \
215  RUNTIME_stop_stats(); \
216  if(iparam[IPARAM_TRACE] == 2) { \
217  RUNTIME_stop_profiling(); \
218  } \
219  if(iparam[IPARAM_BOUND]) { \
220  MORSE_Disable(MORSE_BOUND); \
221  }
222 
223 /*********************
224  *
225  * Macro for DAG generation
226  *
227  */
228 #if 0
229 #define START_DAG() \
230  if ( iparam[IPARAM_DAG] == 2 ) \
231  MORSE_Enable(MORSE_DAG);
232 
233 #define STOP_DAG() \
234  if ( iparam[IPARAM_DAG] == 2 ) \
235  MORSE_Disable(MORSE_DAG);
236 #else
237 #define START_DAG() do {} while(0);
238 #define STOP_DAG() do {} while(0);
239 #endif
240 
241 /*********************
242  *
243  * Synchro for distributed computations
244  *
245  */
246 #if defined(CHAMELEON_USE_MPI)
247 #define START_DISTRIBUTED() MORSE_Distributed_start();
248 #define STOP_DISTRIBUTED() MORSE_Distributed_stop();
249 #else
250 #define START_DISTRIBUTED() do {} while(0);
251 #define STOP_DISTRIBUTED() do {} while(0);
252 #endif
253 
254 /*********************
255  *
256  * General Macros for timing
257  *
258  */
259 #define START_TIMING() \
260  t = -RUNTIME_get_time(); \
261  START_DAG(); \
262  START_TRACING(); \
263  START_DISTRIBUTED();
264 
265 #define STOP_TIMING() \
266  t += RUNTIME_get_time(); \
267  if (iparam[IPARAM_PROFILE] == 2) { \
268  RUNTIME_kernelprofile_display(); \
269  RUNTIME_schedprofile_display(); \
270  } \
271  *t_ = t; \
272  STOP_DISTRIBUTED(); \
273  STOP_TRACING(); \
274  STOP_DAG();
275 
276 #endif /* TIMING_H */
iparam_timing
Definition: timing.h:48
double morse_time_t
Definition: timing.h:42
void * morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
Definition: timing.c:79
dparam_timing
Definition: timing.h:108
#define A(m, n)
Definition: pzgemm.c:56
int RunTest(int *iparam, double *dparam, morse_time_t *t_, char *rankfile)