HiCMA
Hierarchical Computations on Manycore Architectures
time_zgemm_tile.c
Go to the documentation of this file.
1 
17 /*
18  * @copyright (c) 2009-2014 The University of Tennessee and The University
19  * of Tennessee Research Foundation.
20  * All rights reserved.
21  * @copyright (c) 2012-2016 Inria. All rights reserved.
22  * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
23  */
24 
35 #include "morse.h"
36 #include "timing.h"
37 #include "hicma_constants.h"
38 #include "hicma_struct.h"
39 #include "hicma_z.h"
40 #include <stdio.h>
41 #include <time.h>
42 #include <sys/time.h>
43 
44 #include "starpu.h"
45 
46 
47 #include <assert.h>
48 #include "hicma_z.h"
49 #include "auxcompute_z.h"
50 #include "auxdescutil.h"
51 
52 #include "hicma.h"
53 #include "timing_zauxiliary.h"
54 
55 #include <math.h>
56 #include <time.h>
57 
58 #undef CBLAS_SADDR
59 #define CBLAS_SADDR(_val) (_val)
60 
61 // zgytlr uses starsh in MPI mode.
62 STARSH_blrf *mpiF;
63 
64 int print_progress = 1; // Print progress about the execution
65 char datebuf[128];
66 time_t timer;
67 struct tm* tm_info;
68 #define PROGRESS(str) \
69  if(print_progress){ \
70  int myrank = MORSE_My_Mpi_Rank();\
71  time(&timer); \
72  tm_info = localtime(&timer); \
73  strftime(datebuf, 26, "%Y-%m-%d %H:%M:%S",tm_info); \
74  fprintf(stdout, "%d:%s\t%d\t%s\t%s\n", myrank, datebuf, __LINE__, __func__, str);\
75  fflush(stdout);\
76  }
77 #undef PROGRESS
78 #define PROGRESS(str)
79 
81 int global_check = 0;
85 int diag_nrows = 0;
87 int print_index = 0;
89 int print_mat = 0;
90 int use_scratch = 1; // Use scratch memory provided by starpu
91 
92 double timediff(struct timeval begin, struct timeval end){
93  double elapsed = (end.tv_sec - begin.tv_sec) +
94  ((end.tv_usec - begin.tv_usec)/1000000.0);
95  return elapsed;
96 }
97 // This function isused to compare to descriptors in terms of size and numerical values.
98 // Do not use this function with MPI
99 // FIXME: This function will be moved to aux/ folder
100 void check_same(MORSE_desc_t *descL, MORSE_desc_t *descR, char diag, char lower_triangle){
101  double *MATL = descL->mat;
102  double *MATR = descR->mat;
103  if(descL->mb != descR->mb){
104  printf("mb: %d %d\n", descL->mb, descR->mb);
105  }
106  assert(descL->mb == descR->mb);
107  if(descL->nb != descR->nb){
108  printf("nb: %d %d\n", descL->nb, descR->nb);
109  }
110  assert(descL->nb == descR->nb);
111  if(descL->mt != descR->mt){
112  printf("mt: %d %d\n", descL->mt, descR->mt);
113  }
114  assert(descL->mt == descR->mt);
115  if(descL->nt != descR->nt){
116  printf("nt: %d %d\n", descL->nt, descR->nt);
117  }
118  assert(descL->nt == descR->nt);
119  int64_t i, j, imt, jnt;
120  for(imt=0;imt<descL->mt;imt++){
121  for(jnt=0;jnt<descL->nt;jnt++){
122  if(diag == 'D' && imt != jnt){
123  continue;
124  }
125  if(lower_triangle == 'L' && imt < jnt){
126  continue;
127  }
128  double *L = &MATL[tsa(descL, imt, jnt)];
129  double *R = &MATR[tsa(descR, imt, jnt)];
130  for(i=0;i<descL->nb;i++){
131  for(j=0;j<descL->nb;j++){
132  double valL = L[j*tld(descL)+i];
133  double valR = R[j*tld(descR)+i];
134  double diff = fabs(valL - valR);
135  double thresh = 1e-14;
136  if(diff > thresh ){
137  printf("Tile:%d,%d. Elm:%d,%d val:%.2e %.2e\n", imt, jnt, i, j, valL, valR);
138  exit(1);
139  }
140  //printf("%g ", A[j*tld(descZ)+i]);
141  //printf("%g\t", A[j*descZ->n+i]);
142  //printf("(%d,%d,%d) %g\t", i,j,descZ->mb,A[j*descZ->mb+i]);
143  //printf("(%d,%d,%d) %g\t", i,j,descZ->n,A[j*descZ->n+i]);
144  //printf("(%d,%d,%d) %g [%d %d %d]\t", i,j,descZ->n,A[j*descZ->n+i], descZ->m, descZ->lm, descZ->ln);
145  }
146  }
147  }
148  }
149 }
150 
151 
152  int
153 RunTest(int *iparam, double *dparam, morse_time_t *t_, char* rankfile)
154 {
155  // print progress info only on ROOT process
156  if(MORSE_My_Mpi_Rank() != 0)
157  print_progress = 0;
158  PROGRESS("RunTest started");
159  // set alpha and beta which will be used in gemm
160  double alpha = 1.0, beta = 1.0;
161  // this paramater enables storing only diagonal tiles in a tall and skinny matrix
163 
165 
166  // Gemm requires more descriptors
167  //chameleon/runtime/starpu/control/runtime_descriptor.c
168  MORSE_user_tag_size(31,26);
169  //MORSE_user_tag_size(31,28);
170 
172 
173  // get parameters coming from command line
174  PASTE_CODE_IPARAM_LOCALS( iparam );
175  // set global variable so that p.. files can fill dense matrix
177  // calculate total number of mpi processes (it is not used for now)
178  num_mpi_ranks = P*Q;
181  int64_t _nb = iparam[IPARAM_NB];
182  LDB = chameleon_max(K, iparam[IPARAM_LDB]);
183  LDC = chameleon_max(M, iparam[IPARAM_LDC]);
184  int hicma_maxrank = iparam[IPARAM_HICMA_MAXRANK];
185 
186  // Idealy, in gemm C=AxB,
187  // A is M by K
188  // B is K by N
189  // C is M by N
190  // However work with square matrices for now. FIXME
191  assert(M==K);
192  // Maxrank is N for now FIXME
193  assert(M>=N);
194  // As a result
195  // Dense initial matrices:
196  // A is M by M
197  // B is M by M
198  // C is M by M
199  //
200  // Low rank matrices:
201  // A is M by N
202  // B is M by N
203  // C is M by N
204 
205  int saveN, saveNB;
206 
207  saveNB = NB;
208  NB = MB;
209  /* Allocate Data */
210  // initial matrices which will be used for accuracy checking
211  // TODO: since HICMA_zgytlr_Tile does not handle NULL dense pointer, have to create them, better condition it with 'check' variable
212  PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, double, MorseRealDouble, LDA, M, M );
213  PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, double, MorseRealDouble, LDB, M, M );
214  PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, double, MorseRealDouble, LDC, M, M );
215  PROGRESS("desc..'s are allocated");
216 
217  // gemm will not have dense diagonals in RND TILED. So remove desc..D's in the future. TODO
218  // TODO: since HICMA_zgytlr_Tile does not handle NULL dense pointer, have to create them, better condition it with 'check' variable
219  PASTE_CODE_ALLOCATE_MATRIX_TILE( descAD, 1, double, MorseRealDouble, LDA, M, M );
220  PASTE_CODE_ALLOCATE_MATRIX_TILE( descBD, 1, double, MorseRealDouble, LDB, M, M );
221  PASTE_CODE_ALLOCATE_MATRIX_TILE( descCD, 1, double, MorseRealDouble, LDC, M, M );
222  NB = saveNB;
223 
224 
225  //U and V's
226  saveN = N;
227  N = N * 2;
228  saveNB = NB;
229  NB = NB * 2;
230  PASTE_CODE_ALLOCATE_MATRIX_TILE( descAUV, 1, double, MorseRealDouble, LDC, M, N );
231  PASTE_CODE_ALLOCATE_MATRIX_TILE( descBUV, 1, double, MorseRealDouble, LDC, M, N );
232  PASTE_CODE_ALLOCATE_MATRIX_TILE( descCUV, 1, double, MorseRealDouble, LDC, M, N );
233  N = saveN;
234  NB = saveNB;
235  PROGRESS("desc..UV's are allocated");
236 
237  int mt = descAUV->mt;
238  int nt = descAUV->nt;
239  assert(mt == nt);
240 
241  // rank matrices
242  /* tile dimension of rank descriptor must be 1 */
243  /* when LD for rk matrices is 1, program exits*/
244  int bigMB = MB;
245  int bigNB = NB;
246  MB = NB = 1;
247  int ldrk = mt;//when I use fix number e.g. 2, program exited unexpectedly without any error message
248  PASTE_CODE_ALLOCATE_MATRIX_TILE( descArk, 1, double, MorseRealDouble, ldrk, mt, mt);
249  PASTE_CODE_ALLOCATE_MATRIX_TILE( descBrk, 1, double, MorseRealDouble, ldrk, mt, mt);
250  PASTE_CODE_ALLOCATE_MATRIX_TILE( descCrk, 1, double, MorseRealDouble, ldrk, mt, mt);
251  MB = bigMB;
252  NB = bigNB;
253  PROGRESS("desc..rk's are allocated");
254 
255  int _k = iparam[IPARAM_RK]; //genargs->k
256  double _acc = pow(10, -1.0*iparam[IPARAM_ACC]);
257 
258  char sym;
259  sym = 'N';
260  int probtype = iparam[IPARAM_HICMA_STARSH_PROB];
261  int maxrank = iparam[IPARAM_HICMA_STARSH_MAXRANK];
262  double ddecay = dparam[IPARAM_HICMA_STARSH_DECAY];
263  int initial_maxrank, final_maxrank;
264  double initial_avgrank, final_avgrank;
265  HICMA_problem_t hicma_problem;
266 
267  //BEGIN: rndtiled
269  hicma_problem.noise = 0.0;
270  }
271  //END: rndtiled
272 
273  //BEGIN: geostat
274  double theta[3] = {1, 0.1, 0.5};
276  hicma_problem.theta = theta;
277  hicma_problem.noise = 0.0;
278  }
279  //END: geostat
280 
281  //BEGIN: ss
283  // Correlation length
284  hicma_problem.beta = 0.1;
285  //If fixed rank is required set beta=1 and a sample case will be like this nb=25 maxrank=10 m=2500 So ranks will decrease.
286 
287  // Smoothing parameter for Matern kernel
288  hicma_problem.nu = 0.5;
289  // Shift added to diagonal elements
290  hicma_problem.noise = 1.e-4; //not enough for matrices larger than 600K
291  hicma_problem.noise = 5.e-4; //works for 640K
292  }
293  //END: ss
294  PROGRESS("generating coordinates started");
295  HICMA_zgenerate_problem(probtype, sym, ddecay, M, MB, mt, nt, &hicma_problem);
296  PROGRESS("generating coordinates ended");
297  mpiF = hicma_problem.starsh_format; // This is assignment will be hidden from user in release
298 
299  // enforce compression of diagonal tiles
300  int compress_diag = 1;
301  PROGRESS("zgytlr starting");
302  //desc[A,B,C] are original problem if global check is enabled
303  //desc[A,B,C]D are original problem. Temporary buffers should be used in hcore_zgytlr TODO Do not use desc..D in this gemm code
304  HICMA_zgytlr_Tile(MorseUpperLower, descAUV, descAD, descArk, 0, maxrank, _acc, compress_diag, descA);
305  HICMA_zgytlr_Tile(MorseUpperLower, descBUV, descBD, descBrk, 0, maxrank, _acc, compress_diag, descB);
306  HICMA_zgytlr_Tile(MorseUpperLower, descCUV, descCD, descCrk, 0, maxrank, _acc, compress_diag, descC);
307  PROGRESS("zgytlr finished");
308 
309  int set_diag = 0;
310  double diagVal = M;
311 
312  /* Save A for check */
313  PROGRESS("pasting original dense descC into C2 started");
314  // Adense: original dense problem.
315  PASTE_TILE_TO_LAPACK( descC, C2, check, double, LDC, M );
316  PROGRESS("pasting original dense descC into C2 finished");
317 
318  PROGRESS("gemm started");
319  START_TIMING();
320  HICMA_zgemm_Tile( MorseNoTrans, MorseNoTrans, alpha, //TODO
321  descAUV, descArk,
322  descBUV, descBrk,
323  beta,
324  descCUV, descCrk, _k, maxrank, _acc
325  );
326  STOP_TIMING();
327  fflush(stderr);
328  fflush(stdout);
329  PROGRESS("gemm finished");
330 
331  if(check){
332 
333  PASTE_TILE_TO_LAPACK( descA, A, check, double, LDA, M );
334  PASTE_TILE_TO_LAPACK( descB, B, check, double, LDB, M );
335  HICMA_zuncompress(MorseUpperLower, descCUV, descC, descCrk);
336 
337  PASTE_TILE_TO_LAPACK( descC, C, check, double, LDC, M );
338 
339 
340  dparam[IPARAM_RES] = hicma_z_check_gemm( MorseNoTrans, MorseNoTrans, M, M, M,
341  alpha, A, LDA, B, LDB, beta, C, C2, LDC,
342  &(dparam[IPARAM_ANORM]),
343  &(dparam[IPARAM_BNORM]),
344  &(dparam[IPARAM_XNORM]));
345  free(A); free(B); free(C); free(C2);
346  }
347 
348  PASTE_CODE_FREE_MATRIX( descAUV );
349  PASTE_CODE_FREE_MATRIX( descBUV );
350  PASTE_CODE_FREE_MATRIX( descCUV );
351  PROGRESS("desc..UV's are freed");
352 
353  PASTE_CODE_FREE_MATRIX( descAD );
354  PASTE_CODE_FREE_MATRIX( descBD );
355  PASTE_CODE_FREE_MATRIX( descCD );
356  PROGRESS("desc..D's are freed");
357 
358  PASTE_CODE_FREE_MATRIX( descArk );
359  PASTE_CODE_FREE_MATRIX( descBrk );
360  PASTE_CODE_FREE_MATRIX( descCrk );
361  PROGRESS("desc..rk's are freed");
362 
363  PASTE_CODE_FREE_MATRIX( descA );
364  PASTE_CODE_FREE_MATRIX( descB );
365  PASTE_CODE_FREE_MATRIX( descC );
366  PROGRESS("desc..'s are freed");
367  PROGRESS("freed descs");
368  return 0;
369 }
int RunTest(int *iparam, double *dparam, morse_time_t *t_, char *rankfile)
int main_print_mat
double timediff(struct timeval begin, struct timeval end)
double morse_time_t
Definition: timing.h:42
#define PASTE_CODE_ALLOCATE_MATRIX_TILE(_desc_, _cond_, _type_, _type2_, _lda_, _m_, _n_)
Definition: timing.h:150
#define A(m, n)
Definition: pzgemm.c:56
#define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_)
Definition: timing.h:168
int num_mpi_ranks
#define HICMA_STARSH_PROB_GEOSTAT
int main_print_index
#define PASTE_CODE_FREE_MATRIX(_desc_)
Definition: timing.h:165
int HICMA_zgemm_Tile(MORSE_enum transA, MORSE_enum transB, double alpha, MORSE_desc_t *AUV, MORSE_desc_t *Ark, MORSE_desc_t *BUV, MORSE_desc_t *Brk, double beta, MORSE_desc_t *CUV, MORSE_desc_t *Crk, int rk, int maxrk, double acc)
Definition: zgemm.c:109
time_t timer
struct tm * tm_info
int store_only_diagonal_tiles
#define HICMA_STARSH_PROB_RND
int print_mat
int print_index
int check
Definition: testing_zposv.c:76
STARSH_blrf * mpiF
double hicma_z_check_gemm(MORSE_enum transA, MORSE_enum transB, int M, int N, int K, double alpha, double *A, int LDA, double *B, int LDB, double beta, double *Cmorse, double *Cref, int LDC, double *Cinitnorm, double *Cmorsenorm, double *Clapacknorm)
int global_fixed_rank
int diag[2]
STARSH_blrf * starsh_format
Definition: hicma_struct.h:22
int use_scratch
#define C(m, n)
Definition: pzgemm.c:58
int HICMA_set_use_fast_hcore_zgemm()
Definition: hicma_init.c:22
int HICMA_zgytlr_Tile(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, unsigned long long int seed, int maxrank, double tol, int compress_diag, MORSE_desc_t *Dense)
Definition: zgytlr.c:93
#define HICMA_STARSH_PROB_SS
#define START_TIMING()
Definition: timing.h:259
int diag_nrows
char datebuf[128]
#define PROGRESS(str)
int print_progress
int global_always_fixed_rank
int global_check
#define STOP_TIMING()
Definition: timing.h:265
#define B(m, n)
Definition: pzgemm.c:57
#define PASTE_CODE_IPARAM_LOCALS(iparam)
Definition: timing.h:125
void check_same(MORSE_desc_t *descL, MORSE_desc_t *descR, char diag, char lower_triangle)
double * theta
Definition: hicma_struct.h:25