HiCMA
Hierarchical Computations on Manycore Architectures
testing_zposv.c
Go to the documentation of this file.
1 
22 /*
23  *
24  * file testing_zposv.c
25  *
26  * MORSE testing routines
27  * MORSE is a software package provided by Univ. of Tennessee,
28  * Univ. of California Berkeley and Univ. of Colorado Denver
29  *
30  * version 2.5.0
31  * comment This file has been automatically generated
32  * from Plasma 2.5.0 for MORSE 1.0.0
33  * author Bilel Hadri, Hatem Ltaief
34  * author Mathieu Faverge
35  * author Emmanuel Agullo
36  * author Cedric Castagnede
37  * date 2010-11-15
38  * precisions normal z -> c d s
39  *
40  */
41 
42 
43 
44 #include "timing.h"
45 #include "hicma_constants.h"
46 #include "hicma_struct.h"
47 #include "hicma_z.h"
48 #include "hicma.h"
49 #include "hicma_common.h"
50 
51 #include "auxcompute_z.h"
52 #include "auxdescutil.h"
53 
54 // zgytlr uses starsh in MPI mode.
55 STARSH_blrf *mpiF;
56 
57 char datebuf[128];
58 time_t timer;
59 struct tm* tm_info;
60 
62 int global_check = 0;
67 int run_potrf = 1;
68 int diag_nrows = 0;
70 int print_index = 0;
73 int print_mat = 0;
74 int use_scratch = 1; // Use scratch memory provided by starpu
76 int check = 0;
77 
78 
79 #include <time.h>
80 #include <sys/time.h>
81 
82 #include <stdlib.h>
83 #include <stdio.h>
84 #include <string.h>
85 #include <math.h>
86 
87 #include <morse.h>
88 #include "testing_zauxiliary.h"
89 
92  blas_colmajor = 102 };
93 
95  blas_base = 151,
96  blas_t = 152,
97  blas_rnd = 153,
98  blas_ieee = 154,
99  blas_emin = 155,
100  blas_emax = 156,
101  blas_eps = 157,
102  blas_prec = 158,
105  blas_sfmin = 161};
106 
116 
117 static void
118 BLAS_error(char *rname, int err, int val, int x) {
119  fprintf( stderr, "%s %d %d %d\n", rname, err, val, x );
120  abort();
121 }
122 
123 static
124 void
125 BLAS_dge_norm(enum blas_order_type order, enum blas_norm_type norm,
126  int m, int n, const double *a, int lda, double *res) {
127  int i, j; float anorm, v;
128  char rname[] = "BLAS_dge_norm";
129 
130  if (order != blas_colmajor) BLAS_error( rname, -1, order, 0 );
131 
132  if (norm == blas_frobenius_norm) {
133  anorm = 0.0f;
134  for (j = n; j; --j) {
135  for (i = m; i; --i) {
136  v = a[0];
137  anorm += v * v;
138  a++;
139  }
140  a += lda - m;
141  }
142  anorm = sqrt( anorm );
143  } else if (norm == blas_inf_norm) {
144  anorm = 0.0f;
145  for (i = 0; i < m; ++i) {
146  v = 0.0f;
147  for (j = 0; j < n; ++j) {
148  v += cabs( a[i + j * lda] );
149  }
150  if (v > anorm)
151  anorm = v;
152  }
153  } else {
154  BLAS_error( rname, -2, norm, 0 );
155  return;
156  }
157 
158  if (res) *res = anorm;
159 }
160 
161 static
162 double
163 BLAS_dpow_di(double x, int n) {
164  double rv = 1.0;
165 
166  if (n < 0) {
167  n = -n;
168  x = 1.0 / x;
169  }
170 
171  for (; n; n >>= 1, x *= x) {
172  if (n & 1)
173  rv *= x;
174  }
175 
176  return rv;
177 }
178 
179 static
180 double
181 BLAS_dfpinfo(enum blas_cmach_type cmach) {
182  double eps = 1.0, r = 1.0, o = 1.0, b = 2.0;
183  int t = 53, l = 1024, m = -1021;
184  char rname[] = "BLAS_dfpinfo";
185 
186  if ((sizeof eps) == sizeof(float)) {
187  t = 24;
188  l = 128;
189  m = -125;
190  } else {
191  t = 53;
192  l = 1024;
193  m = -1021;
194  }
195 
196  /* for (i = 0; i < t; ++i) eps *= half; */
197  eps = BLAS_dpow_di( b, -t );
198  /* for (i = 0; i >= m; --i) r *= half; */
199  r = BLAS_dpow_di( b, m-1 );
200 
201  o -= eps;
202  /* for (i = 0; i < l; ++i) o *= b; */
203  o = (o * BLAS_dpow_di( b, l-1 )) * b;
204 
205  switch (cmach) {
206  case blas_eps: return eps;
207  case blas_sfmin: return r;
208  default:
209  BLAS_error( rname, -1, cmach, 0 );
210  break;
211  }
212  return 0.0;
213 }
214 
215 int testing_dtrsmd(int argc, char** argv){}//FIXME Use CMakeLists from Chameleon
216 
217 static int check_factorization(int, double*, double*, int, int , double);
218 static int check_solution(int, int, double*, int, double*, double*, int, double);
228 int testing_dposv(int argc, char **argv)
229 {
230  HICMA_init();
231  //HICMA_set_print_index();
232  //HICMA_set_print_index_end();
233  //HICMA_unset_print_index_end();
234  /*HICMA_set_use_fast_hcore_zgemm();*/
235 
236  int hres = 0;
237 
238  /*int nbnode = 0;*/
239  /*MORSE_Comm_size( &nbnode );*/
240  /*printf("Myrank:%d nbnode:%d\n", MORSE_My_Mpi_Rank(), nbnode);*/
241  /*fflush( stdout );*/
242 
243  /* Check for number of arguments*/
244  if (argc != 12){
245  USAGE("POSV", "N LDA NRHS LDB",
246  " - N : the size of the matrix\n"
247  " - LDA : leading dimension of the matrix A\n"
248  " - NRHS : number of RHS\n"
249  " - LDB : leading dimension of the RHS B\n"
250  " - NB : Block size\n"
251  " - acc : Fixed accuracy threshold\n"
252  " - rk : Fixed rank threshold\n"
253  " - maxrank : maxrank for U and V storage\n"
254  " - compmaxrank : maxrank for starsh and buffers\n"
255  " - P : num procs in row dim\n"
256  " - Q : num procs in col dim\n"
257  " - check : {0,1}\n"
258  );
259  return -1;
260  }
261 
262  int N = atoi(argv[0]);
263  int LDA = atoi(argv[1]);
264  int NRHS = atoi(argv[2]);
265  int LDB = atoi(argv[3]);
266  int NB = atoi(argv[4]);
267  double fixedacc = atof(argv[5]);
268  int fixedrank = atoi(argv[6]);
269  int maxrank = atoi(argv[7]);
270  int comp_maxrank = atoi(argv[8]);
271  int P = atoi(argv[9]);
272  int Q = atoi(argv[10]);
273  check = atoi(argv[11]);
274  double eps;
275  int uplo;
276  int info_solution, info_factorization;
277  int trans1, trans2;
278  if(check) {
279  printf("A: %d-by-%d LD:%d\n", N, N, LDA);
280  printf("B: %d-by-%d LD:%d\n", N, NRHS, LDB);
281  printf("NB: %d fixedrank: %d fixedacc:%.1e\n", NB ,fixedrank, fixedacc);
282  printf("MaxrankUV: %d MaxrankBuffers: %d\n", maxrank, comp_maxrank);
283  }
284 
285  double *A1 = NULL;
286  double *A2 = NULL;
287  double *B1 = NULL;
288  double *B2 = NULL;
289 
290  if(check) {
291  A1 = (double *)malloc(LDA*N*sizeof(double));
292  A2 = (double *)malloc(LDA*N*sizeof(double));
293  B1 = (double *)malloc(LDB*NRHS*sizeof(double));
294  B2 = (double *)malloc(LDB*NRHS*sizeof(double));
295 
296  /* Check if unable to allocate memory */
297  if ((!A1)||(!A2)||(!B1)||(!B2)){
298  printf("Out of Memory \n ");
299  return -2;
300  }
301  }
302  eps = BLAS_dfpinfo( blas_eps );
303 
304  uplo = MorseUpper;
305  uplo = MorseLower; //to comply with current HICMA_dpotrf
306  trans1 = uplo == MorseUpper ? MorseTrans : MorseNoTrans;
307  trans2 = uplo == MorseUpper ? MorseNoTrans : MorseTrans;
308  /*-------------------------------------------------------------
309  * TESTING ZPOTRF + ZPTRSM + ZTRSM
310  */
311 
312  /* Initialize A1 and A2 for Symmetric Positif Matrix */
313  /*MORSE_dplgsy( N, MorseUpperLower, N, A1, LDA, 51 );*/
314  /*MORSE_dlacpy( MorseUpperLower, N, N, A1, LDA, A2, LDA );*/
315 
316  /* Initialize B1 and B2 */
317  /*MORSE_dplrnt( N, NRHS, B1, LDB, 371 );*/
318  /*MORSE_dlacpy( MorseUpperLower, N, NRHS, B1, LDB, B2, LDB );*/
319 
320  /*MORSE_dpotrf(uplo, N, A2, LDA);*/
321  /*MORSE_dtrsm(MorseLeft, uplo, trans1, MorseNonUnit,*/
322  /*N, NRHS, 1.0, A2, LDA, B2, LDB);*/
323  /*MORSE_dtrsm(MorseLeft, uplo, trans2, MorseNonUnit,*/
324  /*N, NRHS, 1.0, A2, LDA, B2, LDB);*/
325 
326  // this paramater enables storing only diagonal tiles in a tall and skinny matrix
328  //eps = fixedacc/10;
329  int NBxMaxrank = NB * maxrank;
330  int NT = N/NB;
331  int NTxMaxrank = NT * maxrank;
332  int NBxNB = NB * NB;
333  int NBxNRHS = NB * NRHS;
334  int status;
335  HICMA_problem_t hicma_problem;
336  int probtype;
337  char sym;
338  double ddecay;
339  /*BEGIN AAAAAAAAAAAAAAAAAAAAAAAA */
341  MORSE_desc_t *descAUV = NULL;
342  status = MORSE_Desc_Create(&descAUV, NULL, MorseRealDouble, NB, maxrank*2, NBxMaxrank*2, LDA, NTxMaxrank*2, 0, 0, N, NTxMaxrank*2, P, Q);
343  if(status != MORSE_SUCCESS){ return status; }
344  MORSE_desc_t *descAD = NULL;
345  status = MORSE_Desc_Create(&descAD, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, NB, 0, 0, N, NB, P, Q);
346  if(status != MORSE_SUCCESS){ return status; }
347 
348  MORSE_desc_t *descArk = NULL;
349  status = MORSE_Desc_Create(&descArk, NULL, MorseRealDouble, 1, 1, 1, NT, NT, 0, 0, NT, NT, P, Q);
350  if(status != MORSE_SUCCESS){ return status; }
352  MORSE_dlaset_Tile(MorseLower, 0.0, NB, descArk);
353 
354  MORSE_desc_t *descAdense = NULL;
355  if(check) {
356  status = MORSE_Desc_Create(&descAdense, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, N, 0, 0, N, N, P, Q);
357  if(status != MORSE_SUCCESS){ return status; }
358  }
359  probtype = HICMA_STARSH_PROB_SS;
360  sym = 'S';
361  hicma_problem.ndim = 2;
362  // Correlation length
363  hicma_problem.beta = 0.1;
364  // Smoothing parameter for Matern kernel
365  hicma_problem.nu = 0.5;
366  // Shift added to diagonal elements
367  hicma_problem.noise = 1.e-4; //not enough for matrices larger than 600K
368  hicma_problem.noise = 5.e-4; //works for 640K but does not work for 10M
369  hicma_problem.noise = 1.e-2; //
370 
371  // probtype = HICMA_STARSH_PROB_EDSIN;
372  hicma_problem.wave_k = 20;
373  hicma_problem.diag = N;
374 
375  HICMA_zgenerate_problem(probtype, sym, ddecay, N, NB, NT, NT, &hicma_problem);
376 
377  if(check) {
379  HICMA_zhagdm_Tile(MorseLower, descAdense);
380  }
381  struct timeval tvalBefore, tvalAfter;
382  gettimeofday (&tvalBefore, NULL);
384  HICMA_zhagdmdiag_Tile(MorseUpperLower, descAD);
385  gettimeofday (&tvalAfter, NULL);
386  double tgad = (tvalAfter.tv_sec - tvalBefore.tv_sec)
387  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
388  gettimeofday (&tvalBefore, NULL);
390  HICMA_zhagcm_Tile(MorseLower, descAUV, descArk, N, N, NB, NB, maxrank, fixedacc);
391  gettimeofday (&tvalAfter, NULL);
392  double tgauv = (tvalAfter.tv_sec - tvalBefore.tv_sec)
393  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0; //FIXME implement the function
395 
396  if(calc_rank_stat){ // print initial ranks of A
397  PASTE_TILE_TO_LAPACK( descArk, Ark_initial, 1, double, NT, NT );
398  if(MORSE_My_Mpi_Rank()==0){
399  HICMA_stat_t hicma_statrk_initial;
400  zget_stat(MorseLower, Ark_initial, NT, NT, NT, &hicma_statrk_initial);
401  printf("------------------------------A initial_ranks:");
402  zprint_stat(hicma_statrk_initial);
403  fflush(stderr);
404  fflush(stdout);
405  }
406  }
407  double *Aorg = NULL;
408  if(check) {
409  PASTE_TILE_TO_LAPACK( descAdense, _Aorg, 1, double, LDA, N );
410  Aorg = _Aorg;
411  { // fill upper part of Aorg so the rest of code taken from Chameleon works correctly.
412  int i, j;
413  for(j = 0; j < N; j++){
414  for(i = 0; i < j; i++){
415  //Aorg[j*LDA+i] = 0.0;
416  Aorg[j*LDA+i] = Aorg[i*LDA+j];
417  // Acham[j*LDA+i] = 0.0;
418  }
419  }
420  }
421  }
422  /*END AAAAAAAAAAAAAAAAAAAAAAAA */
423  /*BEGIN BBBBBBBBBBBBBBBBBBBBBBBB */
425  int NTB = ceil(NRHS*1.0/NB);
426  int NTBxMaxrank = NTB * maxrank;
427  MORSE_desc_t *descBUV = NULL;
428  status = MORSE_Desc_Create(&descBUV, NULL, MorseRealDouble, NB, maxrank*2, NBxMaxrank*2, LDA, NTBxMaxrank*2, 0, 0, N, NTBxMaxrank*2, P, Q);
429  if(status != MORSE_SUCCESS){ return status; }
430  MORSE_desc_t *descBrk = NULL;
431  status = MORSE_Desc_Create(&descBrk, NULL, MorseRealDouble, 1, 1, 1, NT, NTB, 0, 0, NT, NTB, P, Q);
432  if(status != MORSE_SUCCESS){ return status; }
433  MORSE_desc_t *descBdense = NULL;
434  if(check) {
435  status = MORSE_Desc_Create(&descBdense, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, NRHS, 0, 0, N, NRHS, P, Q);
436  if(status != MORSE_SUCCESS){ return status; }
437  }
438  /*if randtlr*/
439  //probtype = HICMA_STARSH_PROB_RND;
440  sym = 'N';
441  hicma_problem.noise = 0.0; //value added to diagonal
442  //hicma_problem.wave_k = 20;
443  hicma_problem.diag = hicma_problem.wave_k;
444 
445  ddecay = 1e-2;
446  double fixedacc_B = 1e-12;//fixedacc;
447  fixedacc_B = fixedacc;
448  HICMA_zgenerate_problem(probtype, sym, ddecay, N, NB, NT, NT, &hicma_problem);
449  mpiF = hicma_problem.starsh_format; // This is assignment will be hidden from user in release
450 
451  if(check) {
453  HICMA_zhagdm_Tile(MorseUpperLower, descBdense);
454  }
456  gettimeofday (&tvalBefore, NULL);
457  HICMA_zhagcm_Tile(MorseUpperLower, descBUV, descBrk, N, NRHS, NB, NB, maxrank, fixedacc_B);
458  gettimeofday (&tvalAfter, NULL);
459  double tgbuv = (tvalAfter.tv_sec - tvalBefore.tv_sec)
460  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
461  //check_same(descBrk, descBrk2, 'A', 'A'); //FIXME implement the function
463 
464  if(calc_rank_stat){ // print initial ranks
465  PASTE_TILE_TO_LAPACK( descBrk, Brk_initial, 1, double, NT, NTB);
466  if(MORSE_My_Mpi_Rank()==0){
467  HICMA_stat_t hicma_statrk_initial;
468  zget_stat(MorseLower, Brk_initial, NT, NTB, NT, &hicma_statrk_initial);
469  printf("------------------------------B initial_ranks:");
470  zprint_stat(hicma_statrk_initial);
471  fflush(stderr);
472  fflush(stdout);
473  }
474  }
475  double *Borg = NULL;
476  if(check){
477  PASTE_TILE_TO_LAPACK( descBdense, _Borg, 1, double, LDA, NRHS );
478  Borg = _Borg;
479  /*END BBBBBBBBBBBBBBBBBBBBBBBB */
480  // Save A
481  MORSE_dlacpy( MorseUpperLower, N, N, Aorg, LDA, A1, LDA );
482  MORSE_dlacpy( MorseUpperLower, N, N, Aorg, LDA, A2, LDA );
483  if(main_print_mat ){printf("Aorg\n");printmat_format(Aorg,N,N,LDA,1000, 1000, 0);}
484  // Save B
485  MORSE_dlacpy( MorseUpperLower, N, NRHS, Borg, LDA, B1, LDA );
486  MORSE_dlacpy( MorseUpperLower, N, NRHS, Borg, LDA, B2, LDA );
487  if(main_print_mat ){printf("Borg\n");printmat_format(Borg,N,NRHS,LDB,1000, 1000, 0);}
488 
489 
490  MORSE_dpotrf(uplo, N, A2, LDA);
491  MORSE_dtrsm(MorseLeft, uplo, trans1, MorseNonUnit,
492  N, NRHS, 1.0, A2, LDA, B2, LDB);
493  if(0)cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, 1.0, A1, LDA, B2, LDB, 0.0, B1, LDB);
494  MORSE_dtrsm(MorseLeft, uplo, trans2, MorseNonUnit,
495  N, NRHS, 1.0, A2, LDA, B2, LDB);
496  }
498  gettimeofday (&tvalBefore, NULL);
499  HICMA_zpotrf_Tile(MorseLower, descAUV, descAD, descArk, fixedrank, comp_maxrank, fixedacc );
500  gettimeofday (&tvalAfter, NULL);
501  double tpotrf = (tvalAfter.tv_sec - tvalBefore.tv_sec)
502  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
503  double *Ahicma = NULL;
504  if(check) {
506  HICMA_zuncompress(MorseLower, descAUV, descAdense, descArk);
508  HICMA_zdiag_vec2mat(descAD, descAdense);
509  PASTE_TILE_TO_LAPACK( descAdense, _Ahicma, 1, double, LDA, N );
510  Ahicma = _Ahicma;
511  }
512  if(calc_rank_stat){ // print final ranks of L
513  PASTE_TILE_TO_LAPACK( descArk, Ark_initial, 1, double, NT, NT );
514  if(MORSE_My_Mpi_Rank()==0){
515  HICMA_stat_t hicma_statrk_initial;
516  zget_stat(MorseLower, Ark_initial, NT, NT, NT, &hicma_statrk_initial);
517  printf("------------------------------A final_ranks after HICMA_potrf():");
518  zprint_stat(hicma_statrk_initial);
519  fflush(stderr);
520  fflush(stdout);
521  }
522  }
523 
524  if(check) {
525  //Check Chameleon. A1 is original matrix, A2 is L coming from MORSE_dpotrf
526  //info_factorization = check_factorization( N, A1, A2, LDA, uplo, eps);
527  //Check Hicma. Aorg is original matrix, Ahicma is L coming from HICMA_dpotrf
528  info_factorization = check_factorization( N, Aorg, Ahicma, LDA, uplo, fixedacc);
529  //return info_factorization;
530 
531  //check if uncompress works well for B
532  /*HICMA_zuncompress(MorseUpperLower, descBUV, descBdense2, descBrk); */
533  /*PASTE_TILE_TO_LAPACK( descBdense2, Bhicma2, 1, double, LDA, N );*/
534  /*MORSE_dtrsm(MorseLeft, uplo, trans1, MorseNonUnit,*/
535  /*N, NRHS, 1.0, A2, LDA, Bhicma2, LDB);*/
536  /*MORSE_dtrsm(MorseLeft, uplo, trans2, MorseNonUnit,*/
537  /*N, NRHS, 1.0, A2, LDA, Bhicma2, LDB);*/
538  /*info_solution = check_solution(N, NRHS, Aorg, LDA, Borg, Bhicma2, LDB, eps);*/
539  /*return 0;*/
540 
541  printf("\n");
542  printf("------ TESTS FOR CHAMELEON ZPOTRF + ZTRSM + ZTRSM ROUTINE ------- \n");
543  printf(" Size of the Matrix %d by %d\n", N, N);
544  printf("\n");
545  printf(" The matrix A is randomly generated for each test.\n");
546  printf("============\n");
547  printf(" The relative machine precision (eps) is to be %e \n", eps);
548  printf(" Computational tests pass if scaled residuals are less than 60.\n");
549  }
550  if(0)MORSE_dtrsm_Tile(MorseLeft, uplo, trans1, MorseNonUnit, 1.0,
551  descAdense, descBdense);
552  if(0)MORSE_dtrsm_Tile(MorseLeft, uplo, trans2, MorseNonUnit, 1.0,
553  descAdense, descBdense);
554  gettimeofday (&tvalBefore, NULL);
555  if(1)HICMA_ztrsm_Tile(MorseLeft, uplo, trans1, MorseNonUnit, 1.0,
556  descAUV,
557  descAD,
558  descArk,
559  descBUV,
560  descBrk,
561  fixedrank, comp_maxrank, fixedacc);
562  gettimeofday (&tvalAfter, NULL);
563  double ttrsm1 = (tvalAfter.tv_sec - tvalBefore.tv_sec)
564  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
565 
566  if(calc_rank_stat){ // print ranks for B after 1st trsm
567  PASTE_TILE_TO_LAPACK( descBrk, Brk_initial, 1, double, NT, NTB );
568  if(MORSE_My_Mpi_Rank()==0){
569  HICMA_stat_t hicma_statrk_initial;
570  zget_stat(MorseUpperLower, Brk_initial, NT, NTB, NT, &hicma_statrk_initial);
571  printf("------------------------------B ranks after 1st trsm:");
572  zprint_stat(hicma_statrk_initial);
573  fflush(stderr);
574  fflush(stdout);
575  }
576  }
577  gettimeofday (&tvalBefore, NULL);
578  if(1)HICMA_ztrsm_Tile(MorseLeft, uplo, trans2, MorseNonUnit, 1.0,
579  descAUV,
580  descAD,
581  descArk,
582  descBUV,
583  descBrk,
584  fixedrank, comp_maxrank, fixedacc);
585  gettimeofday (&tvalAfter, NULL);
586  double ttrsm2 = (tvalAfter.tv_sec - tvalBefore.tv_sec)
587  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
588  if(calc_rank_stat){ // print initial ranks
589  PASTE_TILE_TO_LAPACK( descBrk, Brk_initial, 1, double, NT, NTB );
590  if(MORSE_My_Mpi_Rank()==0){
591  HICMA_stat_t hicma_statrk_initial;
592  zget_stat(MorseUpperLower, Brk_initial, NT, 1, NT, &hicma_statrk_initial);
593  printf("------------------------------B ranks after 2nd trsm:");
594  zprint_stat(hicma_statrk_initial);
595  fflush(stderr);
596  fflush(stdout);
597  }
598  }
599 
600 
601  if(MORSE_My_Mpi_Rank()==0){
602  printf("%d %d %d %d %d %d %d %d %d %.1e %d %d ", P*Q, P, Q, N, NRHS, LDA, LDB, NB, fixedrank, fixedacc, maxrank, comp_maxrank);
603  printf("%g %g %g %g %g %g\n", tgad, tgauv, tgbuv, tpotrf, ttrsm1, ttrsm2);
604  fflush(stderr);
605  fflush(stdout);
606  }
607 
608  if(check) {
609  //printf("%s %d %s EXITING %e\n", __FILE__, __LINE__, __func__, fixedacc);exit(0);
610  HICMA_zuncompress_custom_size(MorseUpperLower, descBUV, descBdense, descBrk, N, NRHS, NB, NB);
611  PASTE_TILE_TO_LAPACK( descBdense, Bhicma, 1, double, LDA, NRHS );
612  info_solution = check_solution(N, NRHS, Aorg, LDA, Borg, Bhicma, LDB, fixedacc);
613  /* Check the factorization and the solution */
614  //info_factorization = check_factorization( N, A1, A2, LDA, uplo, eps);
615  info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);
616 
617  if ((info_solution == 0)&(info_factorization == 0)){
618  printf("***************************************************\n");
619  printf(" ---- TESTING ZPOTRF + ZTRSM + ZTRSM ..... PASSED !\n");
620  printf("***************************************************\n");
621  }
622  else{
623  printf("***************************************************\n");
624  printf(" - TESTING ZPOTRF + ZTRSM + ZTRSM ... FAILED !\n");
625  printf("***************************************************\n");
626  }
627 
628  free(A1); free(A2); free(B1); free(B2);
629  }
630  if(check){
631  PASTE_CODE_FREE_MATRIX( descAdense );
632  PASTE_CODE_FREE_MATRIX( descBdense );
633  }
634  PASTE_CODE_FREE_MATRIX( descAD );
635  PASTE_CODE_FREE_MATRIX( descAUV );
636  PASTE_CODE_FREE_MATRIX( descArk );
637 
638  PASTE_CODE_FREE_MATRIX( descBUV );
639  PASTE_CODE_FREE_MATRIX( descBrk );
640 
641 
642  return hres;
643 }
644 
645 
646 /*------------------------------------------------------------------------
647  * Check the factorization of the matrix A2
648  */
649 static int check_factorization(int N, double *A1, double *A2, int LDA, int uplo, double eps)
650 {
651  double Anorm, Rnorm;
652  double alpha;
653  int info_factorization;
654  int i,j;
655 
656  double *Residual = (double *)malloc(N*N*sizeof(double));
657  double *L1 = (double *)malloc(N*N*sizeof(double));
658  double *L2 = (double *)malloc(N*N*sizeof(double));
659  double *work = (double *)malloc(N*sizeof(double));
660 
661  memset((void*)L1, 0, N*N*sizeof(double));
662  memset((void*)L2, 0, N*N*sizeof(double));
663 
664  alpha= 1.0;
665 
666  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
667 
668  /* Dealing with L'L or U'U */
669  if (uplo == MorseUpper){
670  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
671  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
672  cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
673  }
674  else{
675  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
676  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
677  cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
678  }
679 
680  /* Compute the Residual || A -L'L|| */
681  for (i = 0; i < N; i++)
682  for (j = 0; j < N; j++)
683  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
684 
685  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, N, Residual, N, &Rnorm );
686  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, N, A1, LDA, &Anorm );
687 
688  printf("============\n");
689  printf("Checking the Cholesky Factorization \n");
690  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e eps:%.1e\n",Rnorm/(Anorm*N*eps), eps);
691 
692  if ( isnan(Rnorm/(Anorm*N*eps)) || isinf(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 60.0) ){
693  printf("-- Factorization is suspicious ! \n");
694  info_factorization = 1;
695  }
696  else{
697  printf("-- Factorization is CORRECT ! \n");
698  info_factorization = 0;
699  }
700 
701  free(Residual); free(L1); free(L2); free(work);
702 
703  return info_factorization;
704 }
705 
706 
707 /*------------------------------------------------------------------------
708  * Check the accuracy of the solution of the linear system
709  */
710 
711 static int check_solution(int N, int NRHS, double *A1, int LDA, double *B1, double *B2, int LDB, double eps )
712 {
713  int info_solution;
714  double Rnorm, Anorm, Xnorm, Bnorm, result;
715  double alpha, beta;
716  double *work = (double *)malloc(N*sizeof(double));
717 
718  alpha = 1.0;
719  beta = -1.0;
720 
721  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, NRHS, B2, LDB, &Xnorm );
722  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, N, A1, LDA, &Anorm );
723  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, NRHS, B1, LDB, &Bnorm );
724 
725  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB);
726  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, NRHS, B1, LDB, &Rnorm );
727 
728  if (getenv("MORSE_TESTING_VERBOSE")) {
729  printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e eps:%.2e\n", Anorm, Xnorm, Bnorm, Rnorm, eps );
730  }
731  result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
732  printf("============\n");
733  printf("Checking the Residual of the solution \n");
734  printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);
735 
736  if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
737  printf("-- The solution is suspicious ! \n");
738  info_solution = 1;
739  }
740  else{
741  printf("-- The solution is CORRECT ! \n");
742  info_solution = 0;
743  }
744 
745  free(work);
746 
747  return info_solution;
748 }
blas_order_type
Definition: testing_zposv.c:90
blas_norm_type
#define USAGE(name, args, details)
struct tm * tm_info
Definition: testing_zposv.c:59
int print_index_end
Definition: testing_zposv.c:71
int norm[4]
int global_check
Definition: testing_zposv.c:62
#define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_)
Definition: timing.h:168
STARSH_blrf * mpiF
Definition: testing_zposv.c:55
int print_mat
Definition: testing_zposv.c:73
#define PASTE_CODE_FREE_MATRIX(_desc_)
Definition: timing.h:165
int global_omit_computation
Definition: testing_zposv.c:65
char datebuf[128]
Definition: testing_zposv.c:57
int main_print_mat
Definition: testing_zposv.c:72
time_t timer
Definition: testing_zposv.c:58
int use_scratch
Definition: testing_zposv.c:74
int global_fixed_rank
Definition: testing_zposv.c:64
int store_only_diagonal_tiles
Definition: testing_zposv.c:61
int diag_nrows
Definition: testing_zposv.c:68
int check
Definition: testing_zposv.c:76
int main_print_index
Definition: testing_zposv.c:69
int testing_dposv(int argc, char **argv)
int HICMA_init()
Definition: hicma_init.c:2
STARSH_blrf * starsh_format
Definition: hicma_struct.h:22
int num_mpi_ranks
Definition: testing_zposv.c:66
#define HICMA_STARSH_PROB_SS
int HICMA_zhagdm_Tile(MORSE_enum uplo, MORSE_desc_t *Dense)
Definition: zhagdm.c:57
int HICMA_zhagdmdiag_Tile(MORSE_enum uplo, MORSE_desc_t *Dense)
Definition: zhagdm.c:162
int global_always_fixed_rank
Definition: testing_zposv.c:63
int testing_dtrsmd(int argc, char **argv)
int HICMA_ztrsm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, double alpha, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, MORSE_desc_t *BUV, MORSE_desc_t *Brk, int rk, int maxrk, double acc)
Definition: ztrsm.c:104
int HICMA_zpotrf_Tile(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, int rk, int maxrk, double acc)
Definition: zpotrf.c:79
int run_potrf
Definition: testing_zposv.c:67
int uplo[2]
int calc_rank_stat
Definition: testing_zposv.c:75
int print_index
Definition: testing_zposv.c:70
blas_cmach_type
Definition: testing_zposv.c:94
int HICMA_zhagcm_Tile(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *Ark, int numrows_matrix, int numcols_matrix, int numrows_block, int numcols_block, int maxrank, double tol)
Definition: zhagcm.c:92