16 #include "coreblas/coreblas.h" 17 #include "coreblas/lapacke.h" 20 #include <lapacke_utils.h> 25 #define CBLAS_SADDR(_val) (_val) 33 printf(
"M:%d N:%d LD:%d\n[", m, n, ld);
38 printf(
"%+.2e",
A[j*ld+i]);
64 double* _CU,
int ld_CU,
int _Crk,
66 double* _AU,
int ld_AU,
int _Ark,
67 double alpha,
double beta,
double* qrtauA){
72 if ((_Ark + _Crk) > 2*maxrank){
73 fprintf(stderr,
"%s %s %d: Sum of ranks (%d) is too big! _Ark:%d _Crk:%d maxrank:%d (x2: %d)\n",
74 __FILE__, __func__, __LINE__, (_Ark + _Crk), _Ark, _Crk, maxrank, 2*maxrank);
77 int nelm_AU = AU_nrows * AU_ncols;
83 printf(
" QRA\t|%d\t|nelm_AU:%d alpha:%g CU_ncols:%d ld_CU:%d CU_ncols*ld_CU:%d\n", __LINE__, nelm_AU, alpha, CU_ncols, ld_CU, CU_ncols*ld_CU);
85 cblas_dcopy(nelm_AU, _AU, incOne, &_CU[CU_ncols*ld_CU], incOne);
87 double d_one = (double)1.0;
89 cblas_dscal(nelm_AU,
CBLAS_SADDR(alpha) , &_CU[CU_ncols*ld_CU], incOne);
92 cblas_dscal(_M * _Crk,
CBLAS_SADDR(beta) , _CU, incOne);
98 *pnew_CU_ncols = CU_ncols;
100 printf(
" QRA\t|%d\t|CU_nrows:%d CU_ncols:%d ld_CU:%d QRA:%p\n", __LINE__, CU_nrows, CU_ncols, ld_CU, _CU);
102 info = LAPACKE_dgeqrf(
103 LAPACK_COL_MAJOR, CU_nrows, CU_ncols, _CU, ld_CU, qrtauA);
109 "%s %d ERROR in LAPACKE_dgeqrf(1:CU_nrows:%d 2:CU_ncols:%d 3:_CU:%p 4:ld_CU:%d 5:qrtauA:%p) info=%d maxrank:%d\n",
110 __FILE__, __LINE__, CU_nrows, CU_ncols, _CU, ld_CU, qrtauA, info, maxrank);
126 double* _CV,
int ld_CV,
int _Crk,
128 double* _AV,
int ld_AV,
int _Ark,
129 double* _BU,
int ld_BU,
130 double* _BV,
int ld_BV,
int _Brk,
131 double* qrtauB,
double* AcolBcolT){
134 assert(AcolBcolT != NULL);
135 int ld_AcolBcolT = maxrank;
144 printf(
" QRB\t|%d\t| (AV*BV^T) M,N,K:%d,%d,%d LDA,LDB,LDC:%d,%d,%d alpha:%g beta:%g\n", __LINE__,AV_ncols, BV_ncols, BV_nrows,ld_AV,ld_BV,ld_AcolBcolT, alpha, beta);
147 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
150 AcolBcolT, ld_AcolBcolT);
151 int AcolBcolT_nrows = AV_ncols;
152 int AcolBcolT_ncols = BV_ncols;
157 if ((AcolBcolT_nrows + _Crk) > 2*maxrank){
158 fprintf(stderr,
"%s %s %d: Sum of two ranks (%d) is too big! \ 159 AcolBcolT:%d _Crk:%d maxrank:%d (x2: %d)\n",
160 __FILE__, __func__, __LINE__, (AcolBcolT_nrows + _Crk), AcolBcolT, _Crk, maxrank, 2*maxrank);
165 printf(
" QRB\t|%d\t| (AV*BV^T) * BU^T M,N,K:%d,%d,%d LDA,LDB,LDC:%d,%d,%d alpha:%g beta:%g CV_ncols:%d AcolBcolT_nrows:%d ldcB:%d\n",
166 __LINE__, BU_nrows,AcolBcolT_nrows, BU_ncols,ld_BU,ld_AcolBcolT,ld_CV, alpha, beta, CV_ncols, AcolBcolT_nrows, ld_CV);
170 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
171 BU_nrows, AcolBcolT_nrows, BU_ncols,
174 AcolBcolT, ld_AcolBcolT,
176 &_CV[CV_ncols*ld_CV], ld_CV);
177 CV_ncols += AcolBcolT_nrows;
178 *pnew_CV_ncols = CV_ncols;
182 printf(
" QRB\t|%d\t|CV_nrows:%d CV_ncols:%d ld_CV:%d QRB:%p\n",
183 __LINE__, CV_nrows, CV_ncols, ld_CV, _CV);
186 info = LAPACKE_dgeqrf(LAPACK_COL_MAJOR, CV_nrows, CV_ncols,
190 "%s %d ERROR in LAPACKE_dgeqrf(1:CV_nrows:%d 2:CV_ncols:%d 3:_CV:%p 4:ld_CV:%d 5:qrtauB:%p) info=%d maxrank:%d\n",
191 __FILE__, __LINE__, CV_nrows, CV_ncols, _CV, ld_CV, qrtauB,info, maxrank);
201 double* _CU,
int ld_CU,
202 double* _CV,
int ld_CV,
int _Crk,
203 double* _U,
int ld_U,
204 double* _V,
int ld_V,
int* pnew_UVrk,
207 double* _rA,
double* _rB,
double* _T,
double* sigma,
double* svdsuperb
214 int rA_nrows = chameleon_min(CU_nrows, CU_ncols);
215 int rA_ncols = CU_ncols;
216 int maxncolsR = 2*_Crk;
217 int ld_rA = rA_nrows;
219 if(rA_nrows != rA_ncols){
220 printf(
"TRMM cannot be used because R coming from QR factorization of A is not square nrows: %d ncols:%d \n", rA_nrows, rA_ncols);
226 printf(
" SVD\t|%d\t| copy rA rA_nrows:%d rA_ncols:%d ld_CU:%d ld_rA:%d CU:%p rA:%p\n",
227 __LINE__, rA_nrows, rA_ncols, ld_CU, ld_rA, _CU, _rA);
231 LAPACK_dlaset(&chlow,
232 &rA_nrows, &rA_ncols, &zero, &zero, _rA, &ld_rA);
235 &rA_nrows, &rA_ncols,
239 printf(
"%d\t|_CU and _rA\n", __LINE__);
247 int rB_nrows = chameleon_min(CV_nrows, CV_ncols);
248 int rB_ncols = CV_ncols;
249 int ld_rB = rB_nrows;
250 assert(rA_ncols == rB_ncols);
254 printf(
" SVD\t|%d\t| copy rB rB_nrows:%d rB_ncols:%d ld_rB:%d ld_CV:%d rB:%p CV:%p\n",
255 __LINE__, rB_nrows, rB_ncols, ld_rB, ld_CV, _rB, _CV);
259 LAPACK_dlaset(&chlow,
260 &rB_nrows, &rB_ncols, &zero, &zero, _rB, &ld_rB);
262 &rB_nrows, &rB_ncols, _CV, &ld_CV, _rB, &ld_rB);
268 printf(
"%d\t|_CV and _rB\n", __LINE__);
274 int T_nrows = rA_ncols;
275 int T_ncols = rA_ncols;
280 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, rA_nrows, rA_ncols, alpha, _rB, ld_rB, _rA, ld_rA);
284 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
285 rA_nrows, rB_nrows, rB_ncols,
CBLAS_SADDR(alpha), _rA, ld_rA,
289 printf(
" SVD\t|%d\t| T=rA*rB^T rA_nrows:%d rB_nrows:%d rB_ncols:%d ld_rA:%d ld_rB:%d ld_T:%d alpha:%g beta:%g\n",
290 __LINE__, rA_nrows, rB_nrows, rB_ncols, ld_rA, ld_rB, ld_T, alpha, beta);
296 assert(sigma != NULL);
297 int size_sigma = T_nrows;
298 assert(svdsuperb != NULL);
300 printf(
" SVD\t|%d\t| svd(T) (3.m)T_nrows:%d (4.n)T_ncols:%d ld_T:%d ld_U:%d (11.ldvt)ld_V:%d _T:%p (zero based parameter indices)\n",
301 __LINE__, T_nrows, T_ncols, ld_T, ld_U, ld_V, _T);
303 info = LAPACKE_dgesvd(LAPACK_COL_MAJOR,
'A',
'A',
304 T_nrows, T_ncols, _T, ld_T, sigma,
309 "%s %d ERROR in LAPACKE_dgesvd() info=%d" 310 "1:T_nrows=%d, 2:T_ncols=%d, 3:_T=%p, 4:ld_T=:%d, 5:sigma=%p," 311 "6:_U=%p, 7:ld_U=%d, 8:_V=%p, 9:ld_V:%d," 314 __FILE__, __LINE__, info,
315 T_nrows, T_ncols, _T, ld_T, sigma,
320 int U_nrows, U_ncols, V_nrows, V_ncols;
321 U_nrows = U_ncols = V_nrows = V_ncols = T_nrows;
327 printf(
"%d %e\n", rank, acc);
331 double relacc = (acc);
335 if(rank > size_sigma)
336 finalrank = size_sigma;
339 int newrank = size_sigma;
341 for(i=2;i<size_sigma;i++){
342 if(sigma[i] < relacc)
350 if(finalrank > maxrank){
351 fprintf(stderr,
"%s %s %d: Rank after truncation is too big! finalrank:%d maxrank:%d\n", __FILE__, __func__, __LINE__, finalrank, maxrank);
356 printf(
"rank:%d acc:%.2e relac:%.2e size_sigma:%d final_rank:%d: ",
357 rank, acc, relacc, size_sigma, finalrank);
358 for(i=0;i<size_sigma;i++){
359 printf(
"%d:%.2e ", i,sigma[i]);
364 printf(
"size_sigma:%d finalrank:%d %.2e\n", size_sigma, finalrank, acc);
369 int rank_V = finalrank;
371 for(k = 0; k < rank_V; k++){
372 double diagval = sigma[k];
373 cblas_dscal(V_ncols,
CBLAS_SADDR(diagval), &_V[k], ld_V);
376 printf(
" SVD\t|%d\t| S*V V_ncols:%d ld_V:%d _V:%p\n",
377 __LINE__, V_ncols, ld_V, _V);
382 *pnew_UVrk = finalrank;
388 double* _CU,
int ld_CU,
int _Crk,
389 double* _U,
int ld_U,
int _Urk,
394 int CU_ncols = ncols_qA;
395 int U_nrows = ncols_qA;
397 int nrows = CU_nrows - U_nrows;
400 printf(
" NEWU\t|%d\t| zero nrows:%d U_ncols:%d ld_U:%d _Crk:%d CU_ncols:%d _Urk:%d CU_nrows:%d U_nrows:%d diff:%d\n",
401 __LINE__, nrows, U_ncols, ld_U, _Crk, CU_ncols, _Urk, CU_nrows, U_nrows, nrows);
407 LAPACK_dlaset( &
uplo, &nrows, &U_ncols, &zero, &zero, &_U[U_nrows], &ld_U );
410 "%s %d ERROR in LAPACKE_dlaset() info=%d\n",
411 __FILE__, __LINE__, info);
418 info = LAPACKE_dormqr(LAPACK_COL_MAJOR,
'L',
'N',
419 CU_nrows, U_ncols, ncols_qA, _CU, ld_CU, qrtauA, _U, ld_U);
421 printf(
" NEWU\t|%d\t| ormqr CU_nrows (new U_nrows):%d U_ncols:%d ncols_qA:%d ld_CU:%d ld_U:%d\n",
422 __LINE__, CU_nrows, U_ncols, ncols_qA, ld_CU, ld_U);
429 "%s %d ERROR in LAPACKE_dormqr() info=%d\n",
430 __FILE__, __LINE__, info);
431 printf(
" NEWU\t|%d\t| ormqr CU_nrows (new U_nrows):%d U_ncols:%d ncols_qA:%d ld_CU:%d ld_U:%d U_nrows:%d\n",
432 __LINE__, CU_nrows, U_ncols, ncols_qA, ld_CU, ld_U, U_nrows);
434 int i, j, ssend, ldarr;
436 arr = _CU; ldarr = ld_CU; ssend = ncols_qA;
439 printf(
"%.3e ", arr[j*ldarr+i]);
442 for(j=ssend-4;j<ssend;j++){
443 printf(
"%.3e ", arr[j*ldarr+i]);
448 arr = _U; ldarr = ld_U; ssend = U_ncols;
451 printf(
"%.3e ", arr[j*ldarr+i]);
454 for(j=ssend-4;j<ssend;j++){
455 printf(
"%.3e ", arr[j*ldarr+i]);
459 for(j=0;j<U_ncols;j++){
460 for(i=0; i < CU_nrows; i++){
461 double val = _U[j*ld_U+i];
463 printf(
"%d,%d is nan (%g) CU_nrows:%d U_ncols:%d ld_U:%d\n", i, j, val, CU_nrows, U_ncols, ld_U);
471 LAPACKE_dlacpy(LAPACK_COL_MAJOR,
'A', U_nrows, U_ncols,
472 _U, ld_U, _CU, ld_CU);
474 printf(
" NEWU\t|%d\t| copy U_nrows:%d U_ncols:%d ld_CU:%d ld_U:%d\n",
475 __LINE__, U_nrows, U_ncols, ld_CU, ld_U);
480 printf(
"%.3e ", _CU[j*ld_CU+i]);
483 for(j=U_ncols-4;j<U_ncols;j++){
484 printf(
"%.3e ", _CU[j*ld_CU+i]);
496 double* _CV,
int ld_CV,
int _Crk,
497 double* _V,
int ld_V,
int _Vrk,
502 int CV_ncols = ncols_qB;
504 int V_ncols = ncols_qB;
505 int ncols = CV_nrows - V_ncols;
508 printf(
" NEWV\t|%d\t| zero V_nrows:%d ncols:%d ld_V:%d _Crk:%d CV_ncols:%d _Vrk:%d CV_nrows:%d V_ncols:%d diff:%d\n",
509 __LINE__, V_nrows, ncols, ld_V, _Crk, CV_ncols, _Vrk, CV_nrows, V_ncols, ncols);
514 size_t iv = V_ncols*ld_V;
515 LAPACK_dlaset( &
uplo, &V_nrows, &ncols, &zero, &zero, &(_V[iv]), &ld_V );
520 printf(
" NEWV\t|%d\t| ormqr V_nrows:%d CV_nrows:%d ncols_qB:%d ld_CV:%d ld_V:%d\n",
521 __LINE__, V_nrows, CV_nrows, ncols_qB, ld_CV, ld_V);
524 info = LAPACKE_dormqr(LAPACK_COL_MAJOR,
'R',
'T',
525 V_nrows, CV_nrows, ncols_qB, _CV, ld_CV, qrtauB, _V, ld_V);
531 "%s %d ERROR in LAPACKE_dormqr() info=%d\n",
532 __FILE__, __LINE__, info);
538 printf(
" NEWV\t|%d\t| trans V_nrows:%d V_ncols:%d ld_V:%d ld_CV%d\n",
539 __LINE__, V_nrows, V_ncols, ld_V, ld_CV);
541 LAPACKE_dge_trans(LAPACK_COL_MAJOR, V_nrows, V_ncols,
542 _V, ld_V, _CV, ld_CV);
544 printf(
" NEWV\t|%d\t| copy V_nrows:%d V_ncols:%d ld_CV:%d ld_V:%d\n",
545 __LINE__, V_nrows, V_ncols, ld_CV, ld_V);
550 printf(
"%.3e ", _CV[j*ld_CV+i]);
553 for(j=V_ncols-4;j<V_ncols;j++){
554 printf(
"%.3e ", _CV[j*ld_CV+i]);
591 printf(
"%d:%s work:%p ", __LINE__, __func__, work);
592 printf(
"M:%d N:%d LDA:%d LDB:%d LDC:%d rk:%d maxrk:%d acc:%e a:%e b:%e\n",
593 M, N, LDA, LDB, LDC, rk, maxrk, acc, alpha, beta);
605 int _Ark = (int)(Ark[0]);
606 int _Brk = (int)(Brk[0]);
607 int _Crk = (int)(Crk[0]);
608 if(_Ark == 0 || _Brk == 0 || _Crk == 0){
609 fprintf(stderr,
"%s %d: _Ark=%d _Brk=%d _Crk=%d. These rank values should not be zero.\n", __FILE__, __LINE__, _Ark, _Brk, _Crk);
614 int _M = M;
int _N = N;
615 double* _CU = CU;
int ld_CU = LDC;
616 double* _CV = CV;
int ld_CV = LDC;
int* pnew_Crk = &new_Crk;
617 double* _AU = AU;
int ld_AU = LDA;
618 double* _AV = AV;
int ld_AV = LDA;
619 double* _BU = BU;
int ld_BU = LDB;
620 double* _BV = BV;
int ld_BV = LDB;
627 double* CUclone = NULL;
628 size_t CUclone_nelm = _M * 2 * maxrk;
630 int use_CUV_clone = 1;
631 if(use_CUV_clone == 1) {
634 work += CUclone_nelm;
636 CUclone = malloc(CUclone_nelm *
sizeof(
double));
638 LAPACK_dlacpy(&chall,
641 CUclone, &ld_CUclone);
644 double* CVclone = NULL;
645 size_t CVclone_nelm = _M * 2 * maxrk;
647 if(use_CUV_clone == 1) {
650 work += CVclone_nelm;
652 CVclone = malloc(CVclone_nelm *
sizeof(
double));
654 LAPACK_dlacpy(&chall,
657 CVclone, &ld_CVclone);
659 double* _CU_save = _CU;
660 double* _CV_save = _CV;
662 if(use_CUV_clone == 1) {
667 double* qrtauA = NULL;
668 size_t qrtauA_nelm = nb;
672 qrtauA = malloc(qrtauA_nelm *
sizeof(
double));
674 assert(qrtauA != NULL);
675 double* qrtauB = NULL;
676 size_t qrtauB_nelm = nb;
678 qrtauB = work + qrtauA_nelm;
680 qrtauB = malloc(qrtauB_nelm *
sizeof(
double));
682 assert(qrtauB != NULL);
689 __qra(_M, maxrk, _CU, ld_CU, _Crk, &CU_ncols, _AU, ld_AU, _Ark, alpha, beta, qrtauA);
691 assert(CU_ncols == (_Crk + _Ark));
696 double* qrb_aubut = NULL;
697 size_t qrb_aubut_nelm = maxrk * maxrk;
699 qrb_aubut = work + qrtauA_nelm + qrtauB_nelm;
701 qrb_aubut = malloc(qrb_aubut_nelm *
sizeof(
double));
703 __qrb(_M, maxrk, _CV, ld_CV, _Crk, &CV_ncols,
704 _AV, ld_AV, _Ark, _BU, ld_BU, _BV, ld_BV, _Brk, qrtauB, qrb_aubut);
705 if(CU_ncols == 0 || CV_ncols == 0){
706 fprintf(stderr,
"%s %d: CU_ncols=%d CV_ncols=%d. These values should not be zero.\n", __FILE__, __LINE__, CU_ncols, CV_ncols);
713 assert(CU_ncols == CV_ncols);
720 size_t newU_nelm = nb * maxrk;
722 newU = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm;
724 newU = malloc(newU_nelm *
sizeof(
double));
727 size_t newV_nelm = nb * maxrk;
730 newV = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm;
732 newV = malloc(newV_nelm *
sizeof(
double));
734 assert(newU != NULL);
735 assert(newV != NULL);
736 double *svd_rA = NULL;
737 int svd_rA_nrows = chameleon_min(_M, CU_ncols);
738 int svd_rA_ncols = CU_ncols;
741 svd_rA_nelm = svd_rA_nrows * svd_rA_nrows;
743 svd_rA_nelm = svd_rA_nrows * svd_rA_ncols;
746 svd_rA = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm;
748 svd_rA = malloc(svd_rA_nelm *
sizeof(
double));
750 double *svd_rB = NULL;
751 int svd_rB_nrows = chameleon_min(_M, CV_ncols);
752 int svd_rB_ncols = CV_ncols;
755 svd_rB_nelm = svd_rB_nrows * svd_rB_ncols;
757 svd_rB = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm;
759 svd_rB = malloc(svd_rB_nelm *
sizeof(
double));
764 double *svd_T = NULL;
765 int svd_T_nrows = svd_rA_ncols;
766 int svd_T_ncols = svd_rA_ncols;
769 svd_T_nelm = svd_T_nrows * svd_T_ncols;
771 svd_T = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm + svd_rB_nelm;
773 svd_T = malloc(svd_T_nelm *
sizeof(
double));
779 double *svd_sigma = NULL;
780 double *svd_superb = NULL;
781 size_t svd_sigma_nelm = svd_T_nrows;
782 size_t svd_superb_nelm = svd_T_nrows;
784 svd_sigma = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm + svd_rB_nelm + svd_T_nelm;
785 svd_superb = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm + svd_rB_nelm + svd_T_nelm + svd_sigma_nelm;
787 svd_sigma = malloc(svd_sigma_nelm *
sizeof(
double));
788 svd_superb = malloc(svd_superb_nelm *
sizeof(
double));
790 if(ld_newV < CU_ncols){
791 fprintf(stderr,
"%s %d: Increase maxrank. %d is not enough ld_newV:%d CU_ncols:%d\n", __FILE__, __LINE__, maxrk, ld_newV, CU_ncols);
799 _CV, ld_CV, CU_ncols,
801 newV, ld_newV, &new_UVrk ,
803 svd_rA, svd_rB, svd_T, svd_sigma, svd_superb
812 int ncols_qA = CU_ncols;
817 newU, ld_newU, new_UVrk,
822 int ncols_qB = CV_ncols;
827 newV, ld_newV, new_UVrk,
830 *pnew_Crk = new_UVrk;
835 if(use_CUV_clone == 1) {
836 LAPACK_dlacpy(&chall,
838 CUclone, &ld_CUclone,
842 LAPACK_dlacpy(&chall,
844 CVclone, &ld_CVclone,
865 int old_Crk = Crk[0];
867 printf(
"Ark:%d Brk:%d Crk[0]:%d %g RANK CHANGE: %d->%d\n",
868 _Ark, _Brk, Crk[0], Crk[0], old_Crk, *pnew_Crk);
872 int casted_Crk = (int)(Crk[0]);
873 printf(
"casted_Crk:%d Ark:%d Brk:%d Crk[0]:%d %g RANK CHANGE: %d->%d\n",
874 casted_Crk, _Ark, _Brk, Crk[0], Crk[0], old_Crk, new_Crk);
void __newu(int _M, int ncols_qA, double *_CU, int ld_CU, int _Crk, double *_U, int ld_U, int _Urk, double *qrtauA)
void __newv(int _M, int ncols_qB, double *_CV, int ld_CV, int _Crk, double *_V, int ld_V, int _Vrk, double *qrtauB)
void __qrb(int _M, int maxrank, double *_CV, int ld_CV, int _Crk, int *pnew_CV_ncols, double *_AV, int ld_AV, int _Ark, double *_BU, int ld_BU, double *_BV, int ld_BV, int _Brk, double *qrtauB, double *AcolBcolT)
void __qra(int _M, int maxrank, double *_CU, int ld_CU, int _Crk, int *pnew_CU_ncols, double *_AU, int ld_AU, int _Ark, double alpha, double beta, double *qrtauA)
void HCORE_zgemm(MORSE_enum transA, int transB, int M, int N, double alpha, double *AU, double *AV, double *Ark, int LDA, double *BU, double *BV, double *Brk, int LDB, double beta, double *CU, double *CV, double *Crk, int LDC, int rk, int maxrk, double acc, double *work)
#define CBLAS_SADDR(_val)
void __svd(int _M, int maxrank, double *_CU, int ld_CU, double *_CV, int ld_CV, int _Crk, double *_U, int ld_U, double *_V, int ld_V, int *pnew_UVrk, int rank, double acc, double *_rA, double *_rB, double *_T, double *sigma, double *svdsuperb)
void hc_printmat(double *A, int m, int n, int ld)