49 #include "hicma_common.h" 51 #include "auxcompute_z.h" 52 #include "auxdescutil.h" 118 BLAS_error(
char *rname,
int err,
int val,
int x) {
119 fprintf( stderr,
"%s %d %d %d\n", rname, err, val, x );
126 int m,
int n,
const double *a,
int lda,
double *res) {
127 int i, j;
float anorm, v;
128 char rname[] =
"BLAS_dge_norm";
130 if (order !=
blas_colmajor) BLAS_error( rname, -1, order, 0 );
134 for (j = n; j; --j) {
135 for (i = m; i; --i) {
142 anorm = sqrt( anorm );
145 for (i = 0; i < m; ++i) {
147 for (j = 0; j < n; ++j) {
148 v += cabs( a[i + j * lda] );
154 BLAS_error( rname, -2,
norm, 0 );
158 if (res) *res = anorm;
163 BLAS_dpow_di(
double x,
int n) {
171 for (; n; n >>= 1, x *= x) {
182 double eps = 1.0, r = 1.0, o = 1.0, b = 2.0;
183 int t = 53, l = 1024, m = -1021;
184 char rname[] =
"BLAS_dfpinfo";
186 if ((
sizeof eps) ==
sizeof(
float)) {
197 eps = BLAS_dpow_di( b, -t );
199 r = BLAS_dpow_di( b, m-1 );
203 o = (o * BLAS_dpow_di( b, l-1 )) * b;
209 BLAS_error( rname, -1, cmach, 0 );
217 static int check_factorization(
int,
double*,
double*,
int,
int ,
double);
218 static int check_solution(
int,
int,
double*,
int,
double*,
double*,
int,
double);
245 USAGE(
"POSV",
"N LDA NRHS LDB",
246 " - N : the size of the matrix\n" 247 " - LDA : leading dimension of the matrix A\n" 248 " - NRHS : number of RHS\n" 249 " - LDB : leading dimension of the RHS B\n" 250 " - NB : Block size\n" 251 " - acc : Fixed accuracy threshold\n" 252 " - rk : Fixed rank threshold\n" 253 " - maxrank : maxrank for U and V storage\n" 254 " - compmaxrank : maxrank for starsh and buffers\n" 255 " - P : num procs in row dim\n" 256 " - Q : num procs in col dim\n" 262 int N = atoi(argv[0]);
263 int LDA = atoi(argv[1]);
264 int NRHS = atoi(argv[2]);
265 int LDB = atoi(argv[3]);
266 int NB = atoi(argv[4]);
267 double fixedacc = atof(argv[5]);
268 int fixedrank = atoi(argv[6]);
269 int maxrank = atoi(argv[7]);
270 int comp_maxrank = atoi(argv[8]);
271 int P = atoi(argv[9]);
272 int Q = atoi(argv[10]);
273 check = atoi(argv[11]);
276 int info_solution, info_factorization;
279 printf(
"A: %d-by-%d LD:%d\n", N, N, LDA);
280 printf(
"B: %d-by-%d LD:%d\n", N, NRHS, LDB);
281 printf(
"NB: %d fixedrank: %d fixedacc:%.1e\n", NB ,fixedrank, fixedacc);
282 printf(
"MaxrankUV: %d MaxrankBuffers: %d\n", maxrank, comp_maxrank);
291 A1 = (
double *)malloc(LDA*N*
sizeof(
double));
292 A2 = (
double *)malloc(LDA*N*
sizeof(
double));
293 B1 = (
double *)malloc(LDB*NRHS*
sizeof(
double));
294 B2 = (
double *)malloc(LDB*NRHS*
sizeof(
double));
297 if ((!A1)||(!A2)||(!B1)||(!B2)){
298 printf(
"Out of Memory \n ");
306 trans1 =
uplo == MorseUpper ? MorseTrans : MorseNoTrans;
307 trans2 =
uplo == MorseUpper ? MorseNoTrans : MorseTrans;
329 int NBxMaxrank = NB * maxrank;
331 int NTxMaxrank = NT * maxrank;
333 int NBxNRHS = NB * NRHS;
341 MORSE_desc_t *descAUV = NULL;
342 status = MORSE_Desc_Create(&descAUV, NULL, MorseRealDouble, NB, maxrank*2, NBxMaxrank*2, LDA, NTxMaxrank*2, 0, 0, N, NTxMaxrank*2, P, Q);
343 if(status != MORSE_SUCCESS){
return status; }
344 MORSE_desc_t *descAD = NULL;
345 status = MORSE_Desc_Create(&descAD, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, NB, 0, 0, N, NB, P, Q);
346 if(status != MORSE_SUCCESS){
return status; }
348 MORSE_desc_t *descArk = NULL;
349 status = MORSE_Desc_Create(&descArk, NULL, MorseRealDouble, 1, 1, 1, NT, NT, 0, 0, NT, NT, P, Q);
350 if(status != MORSE_SUCCESS){
return status; }
352 MORSE_dlaset_Tile(MorseLower, 0.0, NB, descArk);
354 MORSE_desc_t *descAdense = NULL;
356 status = MORSE_Desc_Create(&descAdense, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, N, 0, 0, N, N, P, Q);
357 if(status != MORSE_SUCCESS){
return status; }
361 hicma_problem.
ndim = 2;
363 hicma_problem.
beta = 0.1;
365 hicma_problem.
nu = 0.5;
367 hicma_problem.
noise = 1.e-4;
368 hicma_problem.
noise = 5.e-4;
369 hicma_problem.
noise = 1.e-2;
372 hicma_problem.
wave_k = 20;
373 hicma_problem.
diag = N;
375 HICMA_zgenerate_problem(probtype, sym, ddecay, N, NB, NT, NT, &hicma_problem);
381 struct timeval tvalBefore, tvalAfter;
382 gettimeofday (&tvalBefore, NULL);
385 gettimeofday (&tvalAfter, NULL);
386 double tgad = (tvalAfter.tv_sec - tvalBefore.tv_sec)
387 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
388 gettimeofday (&tvalBefore, NULL);
390 HICMA_zhagcm_Tile(MorseLower, descAUV, descArk, N, N, NB, NB, maxrank, fixedacc);
391 gettimeofday (&tvalAfter, NULL);
392 double tgauv = (tvalAfter.tv_sec - tvalBefore.tv_sec)
393 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
398 if(MORSE_My_Mpi_Rank()==0){
400 zget_stat(MorseLower, Ark_initial, NT, NT, NT, &hicma_statrk_initial);
401 printf(
"------------------------------A initial_ranks:");
402 zprint_stat(hicma_statrk_initial);
413 for(j = 0; j < N; j++){
414 for(i = 0; i < j; i++){
416 Aorg[j*LDA+i] = Aorg[i*LDA+j];
425 int NTB = ceil(NRHS*1.0/NB);
426 int NTBxMaxrank = NTB * maxrank;
427 MORSE_desc_t *descBUV = NULL;
428 status = MORSE_Desc_Create(&descBUV, NULL, MorseRealDouble, NB, maxrank*2, NBxMaxrank*2, LDA, NTBxMaxrank*2, 0, 0, N, NTBxMaxrank*2, P, Q);
429 if(status != MORSE_SUCCESS){
return status; }
430 MORSE_desc_t *descBrk = NULL;
431 status = MORSE_Desc_Create(&descBrk, NULL, MorseRealDouble, 1, 1, 1, NT, NTB, 0, 0, NT, NTB, P, Q);
432 if(status != MORSE_SUCCESS){
return status; }
433 MORSE_desc_t *descBdense = NULL;
435 status = MORSE_Desc_Create(&descBdense, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, NRHS, 0, 0, N, NRHS, P, Q);
436 if(status != MORSE_SUCCESS){
return status; }
441 hicma_problem.
noise = 0.0;
446 double fixedacc_B = 1e-12;
447 fixedacc_B = fixedacc;
448 HICMA_zgenerate_problem(probtype, sym, ddecay, N, NB, NT, NT, &hicma_problem);
456 gettimeofday (&tvalBefore, NULL);
457 HICMA_zhagcm_Tile(MorseUpperLower, descBUV, descBrk, N, NRHS, NB, NB, maxrank, fixedacc_B);
458 gettimeofday (&tvalAfter, NULL);
459 double tgbuv = (tvalAfter.tv_sec - tvalBefore.tv_sec)
460 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
466 if(MORSE_My_Mpi_Rank()==0){
468 zget_stat(MorseLower, Brk_initial, NT, NTB, NT, &hicma_statrk_initial);
469 printf(
"------------------------------B initial_ranks:");
470 zprint_stat(hicma_statrk_initial);
481 MORSE_dlacpy( MorseUpperLower, N, N, Aorg, LDA, A1, LDA );
482 MORSE_dlacpy( MorseUpperLower, N, N, Aorg, LDA, A2, LDA );
483 if(
main_print_mat ){printf(
"Aorg\n");printmat_format(Aorg,N,N,LDA,1000, 1000, 0);}
485 MORSE_dlacpy( MorseUpperLower, N, NRHS, Borg, LDA, B1, LDA );
486 MORSE_dlacpy( MorseUpperLower, N, NRHS, Borg, LDA, B2, LDA );
487 if(
main_print_mat ){printf(
"Borg\n");printmat_format(Borg,N,NRHS,LDB,1000, 1000, 0);}
490 MORSE_dpotrf(
uplo, N, A2, LDA);
491 MORSE_dtrsm(MorseLeft,
uplo, trans1, MorseNonUnit,
492 N, NRHS, 1.0, A2, LDA, B2, LDB);
493 if(0)cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, 1.0, A1, LDA, B2, LDB, 0.0, B1, LDB);
494 MORSE_dtrsm(MorseLeft,
uplo, trans2, MorseNonUnit,
495 N, NRHS, 1.0, A2, LDA, B2, LDB);
498 gettimeofday (&tvalBefore, NULL);
499 HICMA_zpotrf_Tile(MorseLower, descAUV, descAD, descArk, fixedrank, comp_maxrank, fixedacc );
500 gettimeofday (&tvalAfter, NULL);
501 double tpotrf = (tvalAfter.tv_sec - tvalBefore.tv_sec)
502 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
503 double *Ahicma = NULL;
506 HICMA_zuncompress(MorseLower, descAUV, descAdense, descArk);
508 HICMA_zdiag_vec2mat(descAD, descAdense);
514 if(MORSE_My_Mpi_Rank()==0){
516 zget_stat(MorseLower, Ark_initial, NT, NT, NT, &hicma_statrk_initial);
517 printf(
"------------------------------A final_ranks after HICMA_potrf():");
518 zprint_stat(hicma_statrk_initial);
528 info_factorization = check_factorization( N, Aorg, Ahicma, LDA,
uplo, fixedacc);
542 printf(
"------ TESTS FOR CHAMELEON ZPOTRF + ZTRSM + ZTRSM ROUTINE ------- \n");
543 printf(
" Size of the Matrix %d by %d\n", N, N);
545 printf(
" The matrix A is randomly generated for each test.\n");
546 printf(
"============\n");
547 printf(
" The relative machine precision (eps) is to be %e \n", eps);
548 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
550 if(0)MORSE_dtrsm_Tile(MorseLeft,
uplo, trans1, MorseNonUnit, 1.0,
551 descAdense, descBdense);
552 if(0)MORSE_dtrsm_Tile(MorseLeft,
uplo, trans2, MorseNonUnit, 1.0,
553 descAdense, descBdense);
554 gettimeofday (&tvalBefore, NULL);
561 fixedrank, comp_maxrank, fixedacc);
562 gettimeofday (&tvalAfter, NULL);
563 double ttrsm1 = (tvalAfter.tv_sec - tvalBefore.tv_sec)
564 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
568 if(MORSE_My_Mpi_Rank()==0){
570 zget_stat(MorseUpperLower, Brk_initial, NT, NTB, NT, &hicma_statrk_initial);
571 printf(
"------------------------------B ranks after 1st trsm:");
572 zprint_stat(hicma_statrk_initial);
577 gettimeofday (&tvalBefore, NULL);
584 fixedrank, comp_maxrank, fixedacc);
585 gettimeofday (&tvalAfter, NULL);
586 double ttrsm2 = (tvalAfter.tv_sec - tvalBefore.tv_sec)
587 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0;
590 if(MORSE_My_Mpi_Rank()==0){
592 zget_stat(MorseUpperLower, Brk_initial, NT, 1, NT, &hicma_statrk_initial);
593 printf(
"------------------------------B ranks after 2nd trsm:");
594 zprint_stat(hicma_statrk_initial);
601 if(MORSE_My_Mpi_Rank()==0){
602 printf(
"%d %d %d %d %d %d %d %d %d %.1e %d %d ", P*Q, P, Q, N, NRHS, LDA, LDB, NB, fixedrank, fixedacc, maxrank, comp_maxrank);
603 printf(
"%g %g %g %g %g %g\n", tgad, tgauv, tgbuv, tpotrf, ttrsm1, ttrsm2);
610 HICMA_zuncompress_custom_size(MorseUpperLower, descBUV, descBdense, descBrk, N, NRHS, NB, NB);
612 info_solution = check_solution(N, NRHS, Aorg, LDA, Borg, Bhicma, LDB, fixedacc);
615 info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);
617 if ((info_solution == 0)&(info_factorization == 0)){
618 printf(
"***************************************************\n");
619 printf(
" ---- TESTING ZPOTRF + ZTRSM + ZTRSM ..... PASSED !\n");
620 printf(
"***************************************************\n");
623 printf(
"***************************************************\n");
624 printf(
" - TESTING ZPOTRF + ZTRSM + ZTRSM ... FAILED !\n");
625 printf(
"***************************************************\n");
628 free(A1); free(A2); free(B1); free(B2);
649 static int check_factorization(
int N,
double *A1,
double *A2,
int LDA,
int uplo,
double eps)
653 int info_factorization;
656 double *Residual = (
double *)malloc(N*N*
sizeof(
double));
657 double *L1 = (
double *)malloc(N*N*
sizeof(
double));
658 double *L2 = (
double *)malloc(N*N*
sizeof(
double));
659 double *work = (
double *)malloc(N*
sizeof(
double));
661 memset((
void*)L1, 0, N*N*
sizeof(
double));
662 memset((
void*)L2, 0, N*N*
sizeof(
double));
666 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
' ', N, N, A1, LDA, Residual, N);
669 if (
uplo == MorseUpper){
670 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'u', N, N, A2, LDA, L1, N);
671 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'u', N, N, A2, LDA, L2, N);
672 cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
675 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'l', N, N, A2, LDA, L1, N);
676 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'l', N, N, A2, LDA, L2, N);
677 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
681 for (i = 0; i < N; i++)
682 for (j = 0; j < N; j++)
683 Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
688 printf(
"============\n");
689 printf(
"Checking the Cholesky Factorization \n");
690 printf(
"-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e eps:%.1e\n",Rnorm/(Anorm*N*eps), eps);
692 if ( isnan(Rnorm/(Anorm*N*eps)) || isinf(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 60.0) ){
693 printf(
"-- Factorization is suspicious ! \n");
694 info_factorization = 1;
697 printf(
"-- Factorization is CORRECT ! \n");
698 info_factorization = 0;
701 free(Residual); free(L1); free(L2); free(work);
703 return info_factorization;
711 static int check_solution(
int N,
int NRHS,
double *A1,
int LDA,
double *B1,
double *B2,
int LDB,
double eps )
714 double Rnorm, Anorm, Xnorm, Bnorm, result;
716 double *work = (
double *)malloc(N*
sizeof(
double));
725 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB);
728 if (getenv(
"MORSE_TESTING_VERBOSE")) {
729 printf(
"||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e eps:%.2e\n", Anorm, Xnorm, Bnorm, Rnorm, eps );
731 result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
732 printf(
"============\n");
733 printf(
"Checking the Residual of the solution \n");
734 printf(
"-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);
736 if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
737 printf(
"-- The solution is suspicious ! \n");
741 printf(
"-- The solution is CORRECT ! \n");
747 return info_solution;
#define USAGE(name, args, details)
#define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_)
#define PASTE_CODE_FREE_MATRIX(_desc_)
int global_omit_computation
int store_only_diagonal_tiles
int testing_dposv(int argc, char **argv)
STARSH_blrf * starsh_format
#define HICMA_STARSH_PROB_SS
int HICMA_zhagdm_Tile(MORSE_enum uplo, MORSE_desc_t *Dense)
int HICMA_zhagdmdiag_Tile(MORSE_enum uplo, MORSE_desc_t *Dense)
int global_always_fixed_rank
int testing_dtrsmd(int argc, char **argv)
int HICMA_ztrsm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, double alpha, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, MORSE_desc_t *BUV, MORSE_desc_t *Brk, int rk, int maxrk, double acc)
int HICMA_zpotrf_Tile(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, int rk, int maxrk, double acc)
int HICMA_zhagcm_Tile(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *Ark, int numrows_matrix, int numcols_matrix, int numrows_block, int numcols_block, int maxrank, double tol)