49 #include "hicma_common.h" 52 #include "auxcompute_z.h" 53 #include "auxdescutil.h" 116 BLAS_error(
char *rname,
int err,
int val,
int x) {
117 fprintf( stderr,
"%s %d %d %d\n", rname, err, val, x );
124 int m,
int n,
const double *a,
int lda,
double *res) {
125 int i, j;
float anorm, v;
126 char rname[] =
"BLAS_dge_norm";
128 if (order !=
blas_colmajor) BLAS_error( rname, -1, order, 0 );
132 for (j = n; j; --j) {
133 for (i = m; i; --i) {
140 anorm = sqrt( anorm );
143 for (i = 0; i < m; ++i) {
145 for (j = 0; j < n; ++j) {
146 v += cabs( a[i + j * lda] );
152 BLAS_error( rname, -2,
norm, 0 );
156 if (res) *res = anorm;
161 BLAS_dpow_di(
double x,
int n) {
169 for (; n; n >>= 1, x *= x) {
180 double eps = 1.0, r = 1.0, o = 1.0, b = 2.0;
181 int t = 53, l = 1024, m = -1021;
182 char rname[] =
"BLAS_dfpinfo";
184 if ((
sizeof eps) ==
sizeof(
float)) {
195 eps = BLAS_dpow_di( b, -t );
197 r = BLAS_dpow_di( b, m-1 );
201 o = (o * BLAS_dpow_di( b, l-1 )) * b;
207 BLAS_error( rname, -1, cmach, 0 );
215 static int check_factorization(
int,
double*,
double*,
int,
int ,
double);
216 static int check_solution(
int,
int,
double*,
int,
double*,
double*,
int,
double);
240 USAGE(
"TRSMD",
"N LDA NRHS LDB",
241 " - N : the size of the matrix\n" 242 " - LDA : leading dimension of the matrix A\n" 243 " - NRHS : number of RHS\n" 244 " - LDB : leading dimension of the RHS B\n" 245 " - NB : Block size\n" 246 " - acc : Fixed accuracy threshold\n" 247 " - rk : Fixed rank threshold\n" 252 int N = atoi(argv[0]);
253 int LDA = atoi(argv[1]);
254 int NRHS = atoi(argv[2]);
255 int LDB = atoi(argv[3]);
256 int NB = atoi(argv[4]);
257 double fixedacc = atof(argv[5]);
258 int fixedrank = atoi(argv[6]);
261 int info_solution, info_factorization;
263 printf(
"A: %d-by-%d LD:%d\n", N, N, LDA);
264 printf(
"B: %d-by-%d LD:%d\n", N, NRHS, LDB);
265 printf(
"NB: %d fixedrank: %d fixedacc:%.1e\n", NB ,fixedrank, fixedacc);
267 double *A1 = (
double *)malloc(LDA*N*
sizeof(
double));
268 double *A2 = (
double *)malloc(LDA*N*
sizeof(
double));
269 double *B1 = (
double *)malloc(LDB*NRHS*
sizeof(
double));
270 double *B2 = (
double *)malloc(LDB*NRHS*
sizeof(
double));
273 if ((!A1)||(!A2)||(!B1)||(!B2)){
274 printf(
"Out of Memory \n ");
282 trans1 =
uplo == MorseUpper ? MorseTrans : MorseNoTrans;
283 trans2 =
uplo == MorseUpper ? MorseNoTrans : MorseTrans;
303 int compress_diag = 0;
310 int comp_maxrank = NB;
312 int NBxMaxrank = NB * maxrank;
318 int NTxMaxrank = NT * maxrank;
320 int NBxNRHS = NB * NRHS;
329 MORSE_desc_t *descAUV = NULL;
330 status = MORSE_Desc_Create(&descAUV, NULL, MorseRealDouble, NB, maxrank*2, NBxMaxrank*2, NTxNB, NTxMaxrank*2, 0, 0, NTxNB, NTxMaxrank*2, P, Q);
331 printf(
"AUV: m:%d n:%d mb:%d nb:%d mt:%d nt:%d\n",
339 if(status != MORSE_SUCCESS){
return status; }
340 MORSE_desc_t *descAD = NULL;
341 status = MORSE_Desc_Create(&descAD, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, NB, 0, 0, N, NB, P, Q);
342 if(status != MORSE_SUCCESS){
return status; }
344 MORSE_desc_t *descArk = NULL;
345 status = MORSE_Desc_Create(&descArk, NULL, MorseRealDouble, 1, 1, 1, NT, NT, 0, 0, NT, NT, P, Q);
346 if(status != MORSE_SUCCESS){
return status; }
348 MORSE_dlaset_Tile(MorseLower, 0.0, NB, descArk);
350 MORSE_desc_t *descAdense = NULL;
351 status = MORSE_Desc_Create(&descAdense, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, N, 0, 0, N, N, P, Q);
352 if(status != MORSE_SUCCESS){
return status; }
354 MORSE_desc_t *descAdense2 = NULL;
355 status = MORSE_Desc_Create(&descAdense2, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, N, 0, 0, N, N, P, Q);
356 if(status != MORSE_SUCCESS){
return status; }
359 hicma_problem.
ndim = 2;
361 hicma_problem.
beta = 0.1;
363 hicma_problem.
nu = 0.5;
365 hicma_problem.
noise = 1.e-4;
366 hicma_problem.
noise = 5.e-4;
367 hicma_problem.
noise = 1.e-2;
369 HICMA_zgenerate_problem(probtype, sym, ddecay, N, NB, NT, NT, &hicma_problem);
371 printf(
"Generate dense A\n");
374 printf(
"Generate diagonals of A\n");
377 printf(
"Generate low rank off-diagonals of A\n");
379 status =
HICMA_zhagcm_Tile(MorseLower, descAUV, descArk, N, N, NB, NB, maxrank, fixedacc);
380 if(status != MORSE_SUCCESS){
return status; }
386 zget_stat(MorseLower, Ark_initial, NT, NT, NT, &hicma_statrk_initial);
387 if(MORSE_My_Mpi_Rank()==0){
388 printf(
"------------------------------A initial_ranks:");
389 zprint_stat(hicma_statrk_initial);
397 for(j = 0; j < N; j++){
398 for(i = 0; i < j; i++){
400 Aorg[j*LDA+i] = Aorg[i*LDA+j];
408 int NTB = ceil(NRHS*1.0/NB);
409 MORSE_desc_t *descBdense = NULL;
410 status = MORSE_Desc_Create(&descBdense, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, NRHS, 0, 0, N, NRHS, P, Q);
411 if(status != MORSE_SUCCESS){
return status; }
414 MORSE_desc_t *descBdense2 = NULL;
415 status = MORSE_Desc_Create(&descBdense2, NULL, MorseRealDouble, NB, NB, NBxNB, LDA, NRHS, 0, 0, N, NRHS, P, Q);
416 if(status != MORSE_SUCCESS){
return status; }
420 hicma_problem.
noise = 0.0;
424 double fixedacc_B = fixedacc;
429 printf(
"Generate dense B\n");
430 MORSE_dplrnt_Tile(descBdense, 371 );
431 MORSE_dlacpy_Tile( MorseUpperLower, descBdense, descBdense2 );
438 MORSE_dlacpy( MorseUpperLower, N, N, Aorg, LDA, A1, LDA );
439 MORSE_dlacpy( MorseUpperLower, N, N, Aorg, LDA, A2, LDA );
440 if(
main_print_mat ){printf(
"Aorg\n");printmat_format(Aorg,N,N,LDA,1000, 1000, 0);}
442 MORSE_dlacpy( MorseUpperLower, N, NRHS, Borg, LDA, B1, LDA );
443 MORSE_dlacpy( MorseUpperLower, N, NRHS, Borg, LDA, B2, LDA );
444 if(
main_print_mat ){printf(
"Borg\n");printmat_format(Borg,N,NRHS,LDB,1000, 1000, 0);}
447 MORSE_dpotrf(
uplo, N, A2, LDA);
448 MORSE_dtrsm(MorseLeft,
uplo, trans1, MorseNonUnit,
449 N, NRHS, 1.0, A2, LDA, B2, LDB);
450 if(0)cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, 1.0, A1, LDA, B2, LDB, 0.0, B1, LDB);
451 MORSE_dtrsm(MorseLeft,
uplo, trans2, MorseNonUnit,
452 N, NRHS, 1.0, A2, LDA, B2, LDB);
454 printf(
"Potrf on A\n");
456 HICMA_zpotrf_Tile(MorseLower, descAUV, descAD, descArk, fixedrank, comp_maxrank, fixedacc );
458 HICMA_zuncompress(MorseLower, descAUV, descAdense, descArk);
460 HICMA_zdiag_vec2mat(descAD, descAdense);
465 zget_stat(MorseLower, Ark_initial, NT, NT, NT, &hicma_statrk_initial);
466 if(MORSE_My_Mpi_Rank()==0){
467 printf(
"------------------------------A final_ranks after HICMA_potrf():");
468 zprint_stat(hicma_statrk_initial);
477 info_factorization = check_factorization( N, Aorg, Ahicma, LDA,
uplo, fixedacc);
491 printf(
"------ TESTS FOR CHAMELEON ZPOTRF + ZTRSM + ZTRSM ROUTINE ------- \n");
492 printf(
" Size of the Matrix %d by %d\n", N, N);
494 printf(
" The matrix A is randomly generated for each test.\n");
495 printf(
"============\n");
496 printf(
" The relative machine precision (eps) is to be %e \n", eps);
497 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
501 check_same_array(arrayB_1, arrayB2_1, N, __LINE__, __FILE__);
503 printf(
"First trsm\n");
504 if(1)MORSE_dtrsm_Tile(MorseLeft,
uplo, trans1, MorseNonUnit, 1.0,
505 descAdense, descBdense2);
510 descBdense, comp_maxrank);
513 check_same_array(arrayB_2, arrayB2_2, N, __LINE__, __FILE__);
515 printf(
"Second trsm\n");
516 if(1)MORSE_dtrsm_Tile(MorseLeft,
uplo, trans2, MorseNonUnit, 1.0,
517 descAdense, descBdense2);
522 descBdense, comp_maxrank);
525 check_same_array(arrayB_3, arrayB2_3, N, __LINE__, __FILE__);
529 int info_solution_hicma = check_solution(N, NRHS, Aorg, LDA, Borg, Bhicma, LDB, fixedacc);
533 info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);
535 if ((info_solution_hicma == 0)&(info_solution == 0)&(info_factorization == 0)){
536 printf(
"***************************************************\n");
537 printf(
" ---- TESTING ZPOTRF + ZTRSM + ZTRSM ..... PASSED !\n");
538 printf(
"***************************************************\n");
541 printf(
"***************************************************\n");
542 printf(
" - TESTING ZPOTRF + ZTRSM + ZTRSM ... FAILED !\n");
543 printf(
"***************************************************\n");
546 free(A1); free(A2); free(B1); free(B2);
562 static int check_factorization(
int N,
double *A1,
double *A2,
int LDA,
int uplo,
double eps)
566 int info_factorization;
569 double *Residual = (
double *)malloc(N*N*
sizeof(
double));
570 double *L1 = (
double *)malloc(N*N*
sizeof(
double));
571 double *L2 = (
double *)malloc(N*N*
sizeof(
double));
572 double *work = (
double *)malloc(N*
sizeof(
double));
574 memset((
void*)L1, 0, N*N*
sizeof(
double));
575 memset((
void*)L2, 0, N*N*
sizeof(
double));
579 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
' ', N, N, A1, LDA, Residual, N);
582 if (
uplo == MorseUpper){
583 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'u', N, N, A2, LDA, L1, N);
584 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'u', N, N, A2, LDA, L2, N);
585 cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
588 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'l', N, N, A2, LDA, L1, N);
589 LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'l', N, N, A2, LDA, L2, N);
590 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
594 for (i = 0; i < N; i++)
595 for (j = 0; j < N; j++)
596 Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
601 printf(
"============\n");
602 printf(
"Checking the Cholesky Factorization \n");
603 printf(
"-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e eps:%.1e\n",Rnorm/(Anorm*N*eps), eps);
605 if ( isnan(Rnorm/(Anorm*N*eps)) || isinf(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 60.0) ){
606 printf(
"-- Factorization is suspicious ! \n");
607 info_factorization = 1;
610 printf(
"-- Factorization is CORRECT ! \n");
611 info_factorization = 0;
614 free(Residual); free(L1); free(L2); free(work);
616 return info_factorization;
624 static int check_solution(
int N,
int NRHS,
double *A1,
int LDA,
double *B1,
double *B2,
int LDB,
double eps )
627 double Rnorm, Anorm, Xnorm, Bnorm, result;
629 double *work = (
double *)malloc(N*
sizeof(
double));
638 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB);
641 if (getenv(
"MORSE_TESTING_VERBOSE")) {
642 printf(
"||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e eps:%.2e\n", Anorm, Xnorm, Bnorm, Rnorm, eps );
644 result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
645 printf(
"============\n");
646 printf(
"Checking the Residual of the solution \n");
647 printf(
"-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);
649 if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
650 printf(
"-- The solution is suspicious ! \n");
654 printf(
"-- The solution is CORRECT ! \n");
660 return info_solution;
#define USAGE(name, args, details)
#define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_)
#define PASTE_CODE_FREE_MATRIX(_desc_)
#define HICMA_STARSH_PROB_RND
int global_always_fixed_rank
int testing_dposv(int argc, char **argv)
int store_only_diagonal_tiles
#define HICMA_STARSH_PROB_SS
int HICMA_zhagdm_Tile(MORSE_enum uplo, MORSE_desc_t *Dense)
int HICMA_zhagdmdiag_Tile(MORSE_enum uplo, MORSE_desc_t *Dense)
int HICMA_ztrsmd_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, double alpha, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, MORSE_desc_t *Bdense, int maxrk)
int testing_dtrsmd(int argc, char **argv)
int HICMA_zpotrf_Tile(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, int rk, int maxrk, double acc)
int global_omit_computation
int HICMA_zhagcm_Tile(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *Ark, int numrows_matrix, int numcols_matrix, int numrows_block, int numcols_block, int maxrank, double tol)