HiCMA
Hierarchical Computations on Manycore Architectures
timing.h
Go to the documentation of this file.
1
18
/*
19
* @copyright (c) 2009-2014 The University of Tennessee and The University
20
* of Tennessee Research Foundation.
21
* All rights reserved.
22
* @copyright (c) 2012-2016 Inria. All rights reserved.
23
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
24
*/
25
26
#ifndef TIMING_H
27
#define TIMING_H
28
29
#include "morse.h"
30
31
#define _TYPE double
32
#define _PREC double
33
#define _LAMCH LAPACKE_dlamch_work
34
/* See Lawn 41 page 120 */
35
#define _FMULS 0 //FMULS_GEMM(M, N, K) //FIXME
36
#define _FADDS 0 //FADDS_GEMM(M, N, K) //FIXME
37
#define _NAME "HICMA_zgemm_Tile"
38
39
40
41
42
typedef
double
morse_time_t
;
43
44
45
int
RunTest
(
int
*iparam,
double
*dparam,
morse_time_t
*t_,
char
* rankfile);
46
void
*
morse_getaddr_null
(
const
MORSE_desc_t *
A
,
int
m,
int
n);
47
48
enum
iparam_timing
{
49
IPARAM_THRDNBR
,
/* Number of cores */
50
IPARAM_THRDNBR_SUBGRP
,
/* Number of cores in a subgroup (NUMA node) */
51
IPARAM_SCHEDULER
,
/* What scheduler do we choose (dyn, stat) */
52
IPARAM_M
,
/* Number of rows of the matrix */
53
IPARAM_N
,
/* Number of columns of the matrix */
54
IPARAM_K
,
/* RHS or K */
55
IPARAM_LDA
,
/* Leading dimension of A */
56
IPARAM_LDB
,
/* Leading dimension of B */
57
IPARAM_LDC
,
/* Leading dimension of C */
58
IPARAM_IB
,
/* Inner-blocking size */
59
IPARAM_NB
,
/* Number of columns in a tile */
60
IPARAM_MB
,
/* Number of rows in a tile */
61
IPARAM_NITER
,
/* Number of iteration of each test */
62
IPARAM_WARMUP
,
/* Run one test to load dynamic libraries */
63
IPARAM_BIGMAT
,
/* Allocating one big mat or plenty of small */
64
IPARAM_CHECK
,
/* Checking activated or not */
65
IPARAM_VERBOSE
,
/* How much noise do we want? */
66
IPARAM_AUTOTUNING
,
/* Disable/enable autotuning */
67
IPARAM_INPUTFMT
,
/* Input format (Use only for getmi/gecfi) */
68
IPARAM_OUTPUTFMT
,
/* Output format (Use only for getmi/gecfi) */
69
IPARAM_TRACE
,
/* Generate trace on the first non warmup run */
70
IPARAM_DAG
,
/* Do we require to output the DOT file? */
71
IPARAM_ASYNC
,
/* Asynchronous calls */
72
IPARAM_MX
,
/* */
73
IPARAM_NX
,
/* */
74
IPARAM_RHBLK
,
/* Householder reduction parameter for QR/LQ */
75
IPARAM_INPLACE
,
/* InPlace/OutOfPlace translation mode */
76
IPARAM_MODE
,
/* Eigenvalue generation mode */
77
78
IPARAM_INVERSE
,
79
IPARAM_NCUDAS
,
80
IPARAM_NMPI
,
81
IPARAM_P
,
/* Parameter for 2D cyclic distribution */
82
IPARAM_Q
,
/* Parameter for 2D cyclic distribution */
83
84
IPARAM_PROGRESS
,
/* Use a progress indicator during computations */
85
IPARAM_GEMM3M
,
/* Use GEMM3M for complex matrix vector products */
86
/* Added for StarPU version */
87
IPARAM_PROFILE
,
88
IPARAM_PRINT_WARNINGS
,
89
IPARAM_PEAK
,
90
IPARAM_PARALLEL_TASKS
,
91
IPARAM_NO_CPU
,
92
IPARAM_BOUND
,
93
IPARAM_BOUNDDEPS
,
94
IPARAM_BOUNDDEPSPRIO
,
95
IPARAM_RK
,
96
IPARAM_ACC
,
97
IPARAM_HICMA_MAXRANK
,
98
IPARAM_HICMA_STARSH_PROB
,
99
IPARAM_HICMA_STARSH_MAXRANK
,
100
IPARAM_HICMA_PRINTMAT
,
101
IPARAM_HICMA_PRINTINDEX
,
102
IPARAM_HICMA_PRINTINDEXEND
,
103
IPARAM_HICMA_ALWAYS_FIXED_RANK
,
104
/* End */
105
IPARAM_SIZEOF
106
};
107
108
enum
dparam_timing
{
109
IPARAM_TIME
,
110
IPARAM_ANORM
,
111
IPARAM_BNORM
,
112
IPARAM_XNORM
,
113
IPARAM_RNORM
,
114
IPARAM_AinvNORM
,
115
IPARAM_ESTIMATED_PEAK
,
116
IPARAM_RES
,
117
/* Begin section for hydra integration tool */
118
IPARAM_THRESHOLD_CHECK
,
/* Maximum value accepted for: |Ax-b||/N/eps/(||A||||x||+||b||) */
119
IPARAM_HICMA_STARSH_DECAY
,
120
IPARAM_HICMA_STARSH_WAVE_K
,
121
/* End section for hydra integration tool */
122
IPARAM_DNBPARAM
123
};
124
125
#define PASTE_CODE_IPARAM_LOCALS(iparam) \
126
double t; \
127
int64_t M = iparam[IPARAM_M]; \
128
int64_t N = iparam[IPARAM_N]; \
129
int64_t K = iparam[IPARAM_K]; \
130
int64_t NRHS = K; \
131
int64_t LDA = chameleon_max(M, iparam[IPARAM_LDA]); \
132
int64_t LDB = chameleon_max(N, iparam[IPARAM_LDB]); \
133
int64_t LDC = chameleon_max(K, iparam[IPARAM_LDC]); \
134
int64_t IB = iparam[IPARAM_IB]; \
135
int64_t MB = iparam[IPARAM_MB]; \
136
int64_t NB = iparam[IPARAM_NB]; \
137
int64_t P = iparam[IPARAM_P]; \
138
int64_t Q = iparam[IPARAM_Q]; \
139
int64_t MT = (M%MB==0) ? (M/MB) : (M/MB+1); \
140
int64_t NT = (N%NB==0) ? (N/NB) : (N/NB+1); \
141
int bigmat = iparam[IPARAM_BIGMAT]; \
142
int check = iparam[IPARAM_CHECK]; \
143
int loud = iparam[IPARAM_VERBOSE]; \
144
(void)M;(void)N;(void)K;(void)NRHS; \
145
(void)LDA;(void)LDB;(void)LDC; \
146
(void)IB;(void)MB;(void)NB;(void)P;(void)Q; \
147
(void)MT;(void)NT;(void)check;(void)loud;(void)bigmat;
148
149
/* Paste code to allocate a matrix in desc if cond_init is true */
150
#define PASTE_CODE_ALLOCATE_MATRIX_TILE(_desc_, _cond_, _type_, _type2_, _lda_, _m_, _n_) \
151
MORSE_desc_t *_desc_ = NULL; \
152
int status ## _desc_ ; \
153
if( _cond_ ) { \
154
if (!bigmat){ \
155
status ## _desc_ = MORSE_Desc_Create_User(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
156
P, Q, morse_getaddr_null, NULL, NULL);\
157
}\
158
else {\
159
status ## _desc_ = MORSE_Desc_Create(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
160
P, Q);\
161
}\
162
if (status ## _desc_ != MORSE_SUCCESS) return (status ## _desc_); \
163
}
164
165
#define PASTE_CODE_FREE_MATRIX(_desc_) \
166
MORSE_Desc_Destroy( &_desc_ );
167
168
#define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_) \
169
_type_ *_name_ = NULL; \
170
if ( _cond_ ) { \
171
_name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_)); \
172
if ( ! _name_ ) { \
173
fprintf(stderr, "Out of Memory for %s\n", #_name_); \
174
return -1; \
175
} \
176
MORSE_Tile_to_Lapack(_desc_, (void*)_name_, _lda_); \
177
}
178
179
#define PASTE_CODE_ALLOCATE_MATRIX(_name_, _cond_, _type_, _lda_, _n_) \
180
_type_ *_name_ = NULL; \
181
if( _cond_ ) { \
182
_name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) ); \
183
if ( ! _name_ ) { \
184
fprintf(stderr, "Out of Memory for %s\n", #_name_); \
185
return -1; \
186
} \
187
}
188
189
#define PASTE_CODE_ALLOCATE_COPY(_name_, _cond_, _type_, _orig_, _lda_, _n_) \
190
_type_ *_name_ = NULL; \
191
if( _cond_ ) { \
192
_name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) ); \
193
if ( ! _name_ ) { \
194
fprintf(stderr, "Out of Memory for %s\n", #_name_); \
195
return -1; \
196
} \
197
memcpy(_name_, _orig_, (_lda_) * (_n_) * sizeof(_type_) ); \
198
}
199
200
/*********************
201
*
202
* Macro for trace generation
203
*
204
*/
205
#define START_TRACING() \
206
RUNTIME_start_stats(); \
207
if(iparam[IPARAM_TRACE] == 2) { \
208
RUNTIME_start_profiling(); \
209
} \
210
if(iparam[IPARAM_BOUND]) { \
211
MORSE_Enable(MORSE_BOUND); \
212
}
213
214
#define STOP_TRACING() \
215
RUNTIME_stop_stats(); \
216
if(iparam[IPARAM_TRACE] == 2) { \
217
RUNTIME_stop_profiling(); \
218
} \
219
if(iparam[IPARAM_BOUND]) { \
220
MORSE_Disable(MORSE_BOUND); \
221
}
222
223
/*********************
224
*
225
* Macro for DAG generation
226
*
227
*/
228
#if 0
229
#define START_DAG() \
230
if ( iparam[IPARAM_DAG] == 2 ) \
231
MORSE_Enable(MORSE_DAG);
232
233
#define STOP_DAG() \
234
if ( iparam[IPARAM_DAG] == 2 ) \
235
MORSE_Disable(MORSE_DAG);
236
#else
237
#define START_DAG() do {} while(0);
238
#define STOP_DAG() do {} while(0);
239
#endif
240
241
/*********************
242
*
243
* Synchro for distributed computations
244
*
245
*/
246
#if defined(CHAMELEON_USE_MPI)
247
#define START_DISTRIBUTED() MORSE_Distributed_start();
248
#define STOP_DISTRIBUTED() MORSE_Distributed_stop();
249
#else
250
#define START_DISTRIBUTED() do {} while(0);
251
#define STOP_DISTRIBUTED() do {} while(0);
252
#endif
253
254
/*********************
255
*
256
* General Macros for timing
257
*
258
*/
259
#define START_TIMING() \
260
t = -RUNTIME_get_time(); \
261
START_DAG(); \
262
START_TRACING(); \
263
START_DISTRIBUTED();
264
265
#define STOP_TIMING() \
266
t += RUNTIME_get_time(); \
267
if (iparam[IPARAM_PROFILE] == 2) { \
268
RUNTIME_kernelprofile_display(); \
269
RUNTIME_schedprofile_display(); \
270
} \
271
*t_ = t; \
272
STOP_DISTRIBUTED(); \
273
STOP_TRACING(); \
274
STOP_DAG();
275
276
#endif
/* TIMING_H */
iparam_timing
iparam_timing
Definition:
timing.h:48
IPARAM_P
Definition:
timing.h:81
IPARAM_NCUDAS
Definition:
timing.h:79
IPARAM_M
Definition:
timing.h:52
IPARAM_AUTOTUNING
Definition:
timing.h:66
IPARAM_NX
Definition:
timing.h:73
IPARAM_HICMA_STARSH_PROB
Definition:
timing.h:98
IPARAM_LDB
Definition:
timing.h:56
morse_time_t
double morse_time_t
Definition:
timing.h:42
IPARAM_TIME
Definition:
timing.h:109
IPARAM_BOUNDDEPS
Definition:
timing.h:93
morse_getaddr_null
void * morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
Definition:
timing.c:79
IPARAM_HICMA_PRINTINDEXEND
Definition:
timing.h:102
IPARAM_HICMA_MAXRANK
Definition:
timing.h:97
IPARAM_RNORM
Definition:
timing.h:113
dparam_timing
dparam_timing
Definition:
timing.h:108
A
#define A(m, n)
Definition:
pzgemm.c:56
IPARAM_INPLACE
Definition:
timing.h:75
IPARAM_INVERSE
Definition:
timing.h:78
IPARAM_RES
Definition:
timing.h:116
IPARAM_MX
Definition:
timing.h:72
IPARAM_PROGRESS
Definition:
timing.h:84
RunTest
int RunTest(int *iparam, double *dparam, morse_time_t *t_, char *rankfile)
Definition:
time_zgemm_tile.c:153
IPARAM_BIGMAT
Definition:
timing.h:63
IPARAM_DAG
Definition:
timing.h:70
IPARAM_ASYNC
Definition:
timing.h:71
IPARAM_TRACE
Definition:
timing.h:69
IPARAM_CHECK
Definition:
timing.h:64
IPARAM_THRDNBR_SUBGRP
Definition:
timing.h:50
IPARAM_HICMA_STARSH_DECAY
Definition:
timing.h:119
IPARAM_HICMA_PRINTMAT
Definition:
timing.h:100
IPARAM_NITER
Definition:
timing.h:61
IPARAM_Q
Definition:
timing.h:82
IPARAM_WARMUP
Definition:
timing.h:62
IPARAM_SIZEOF
Definition:
timing.h:105
IPARAM_AinvNORM
Definition:
timing.h:114
IPARAM_PARALLEL_TASKS
Definition:
timing.h:90
IPARAM_THRDNBR
Definition:
timing.h:49
IPARAM_LDA
Definition:
timing.h:55
IPARAM_SCHEDULER
Definition:
timing.h:51
IPARAM_K
Definition:
timing.h:54
IPARAM_VERBOSE
Definition:
timing.h:65
IPARAM_THRESHOLD_CHECK
Definition:
timing.h:118
IPARAM_BNORM
Definition:
timing.h:111
IPARAM_NO_CPU
Definition:
timing.h:91
IPARAM_BOUNDDEPSPRIO
Definition:
timing.h:94
IPARAM_ACC
Definition:
timing.h:96
IPARAM_OUTPUTFMT
Definition:
timing.h:68
IPARAM_LDC
Definition:
timing.h:57
IPARAM_ESTIMATED_PEAK
Definition:
timing.h:115
IPARAM_XNORM
Definition:
timing.h:112
IPARAM_HICMA_ALWAYS_FIXED_RANK
Definition:
timing.h:103
IPARAM_MODE
Definition:
timing.h:76
IPARAM_HICMA_STARSH_WAVE_K
Definition:
timing.h:120
IPARAM_DNBPARAM
Definition:
timing.h:122
IPARAM_IB
Definition:
timing.h:58
IPARAM_RHBLK
Definition:
timing.h:74
IPARAM_RK
Definition:
timing.h:95
IPARAM_ANORM
Definition:
timing.h:110
IPARAM_GEMM3M
Definition:
timing.h:85
IPARAM_HICMA_PRINTINDEX
Definition:
timing.h:101
IPARAM_BOUND
Definition:
timing.h:92
IPARAM_INPUTFMT
Definition:
timing.h:67
IPARAM_PEAK
Definition:
timing.h:89
IPARAM_PROFILE
Definition:
timing.h:87
IPARAM_N
Definition:
timing.h:53
IPARAM_MB
Definition:
timing.h:60
IPARAM_HICMA_STARSH_MAXRANK
Definition:
timing.h:99
IPARAM_NB
Definition:
timing.h:59
IPARAM_PRINT_WARNINGS
Definition:
timing.h:88
IPARAM_NMPI
Definition:
timing.h:80
timing
timing.h
Generated by
1.8.14