16 #include "coreblas/coreblas.h"    17 #include "coreblas/lapacke.h"    20 #include <lapacke_utils.h>    25 #define CBLAS_SADDR(_val) (_val)    33     printf(
"M:%d N:%d LD:%d\n[", m, n, ld);
    38             printf(
"%+.2e", 
A[j*ld+i]);
    64         double* _CU, 
int ld_CU, 
int _Crk,
    66         double* _AU, 
int ld_AU, 
int _Ark,
    67         double alpha, 
double beta, 
double* qrtauA){
    72     if ((_Ark + _Crk) > 2*maxrank){
    73         fprintf(stderr, 
"%s %s %d: Sum of ranks (%d) is too big! _Ark:%d _Crk:%d maxrank:%d (x2: %d)\n",
    74                 __FILE__, __func__, __LINE__, (_Ark + _Crk), _Ark, _Crk, maxrank, 2*maxrank);
    77     int nelm_AU = AU_nrows * AU_ncols;
    83         printf(
" QRA\t|%d\t|nelm_AU:%d alpha:%g CU_ncols:%d ld_CU:%d CU_ncols*ld_CU:%d\n", __LINE__, nelm_AU, alpha, CU_ncols, ld_CU, CU_ncols*ld_CU);
    85     cblas_dcopy(nelm_AU, _AU, incOne,  &_CU[CU_ncols*ld_CU], incOne);
    87     double d_one = (double)1.0;
    89         cblas_dscal(nelm_AU, 
CBLAS_SADDR(alpha) , &_CU[CU_ncols*ld_CU], incOne);
    92         cblas_dscal(_M * _Crk, 
CBLAS_SADDR(beta) , _CU, incOne);
    98     *pnew_CU_ncols = CU_ncols; 
   100         printf(
" QRA\t|%d\t|CU_nrows:%d CU_ncols:%d ld_CU:%d  QRA:%p\n", __LINE__, CU_nrows, CU_ncols, ld_CU, _CU);
   102     info = LAPACKE_dgeqrf(
   103             LAPACK_COL_MAJOR, CU_nrows, CU_ncols, _CU, ld_CU, qrtauA);
   109                 "%s %d ERROR in LAPACKE_dgeqrf(1:CU_nrows:%d 2:CU_ncols:%d 3:_CU:%p 4:ld_CU:%d 5:qrtauA:%p) info=%d maxrank:%d\n",
   110                 __FILE__, __LINE__, CU_nrows, CU_ncols, _CU, ld_CU, qrtauA, info, maxrank);
   126         double* _CV, 
int ld_CV, 
int _Crk,
   128         double* _AV, 
int ld_AV, 
int _Ark,
   129         double* _BU, 
int ld_BU,
   130         double* _BV, 
int ld_BV, 
int _Brk,
   131         double* qrtauB, 
double* AcolBcolT){
   134     assert(AcolBcolT != NULL);
   135     int ld_AcolBcolT = maxrank; 
   144         printf(
" QRB\t|%d\t| (AV*BV^T)       M,N,K:%d,%d,%d  LDA,LDB,LDC:%d,%d,%d alpha:%g beta:%g\n", __LINE__,AV_ncols, BV_ncols, BV_nrows,ld_AV,ld_BV,ld_AcolBcolT, alpha, beta);
   147     cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
   150             AcolBcolT, ld_AcolBcolT);
   151     int AcolBcolT_nrows = AV_ncols;
   152     int AcolBcolT_ncols = BV_ncols;
   157     if ((AcolBcolT_nrows + _Crk) > 2*maxrank){
   158         fprintf(stderr, 
"%s %s %d: Sum of two ranks (%d) is too big! \   159                 AcolBcolT:%d _Crk:%d maxrank:%d (x2: %d)\n",
   160                 __FILE__, __func__, __LINE__, (AcolBcolT_nrows + _Crk), AcolBcolT, _Crk, maxrank, 2*maxrank);
   165         printf(
" QRB\t|%d\t| (AV*BV^T) * BU^T  M,N,K:%d,%d,%d  LDA,LDB,LDC:%d,%d,%d alpha:%g beta:%g   CV_ncols:%d AcolBcolT_nrows:%d ldcB:%d\n",
   166                 __LINE__, BU_nrows,AcolBcolT_nrows, BU_ncols,ld_BU,ld_AcolBcolT,ld_CV, alpha, beta, CV_ncols, AcolBcolT_nrows, ld_CV);
   170     cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
   171             BU_nrows, AcolBcolT_nrows, BU_ncols,
   174             AcolBcolT, ld_AcolBcolT,
   176             &_CV[CV_ncols*ld_CV], ld_CV);
   177     CV_ncols        += AcolBcolT_nrows;
   178     *pnew_CV_ncols   = CV_ncols;       
   182         printf(
" QRB\t|%d\t|CV_nrows:%d CV_ncols:%d ld_CV:%d QRB:%p\n",
   183                 __LINE__, CV_nrows, CV_ncols, ld_CV, _CV);
   186     info = LAPACKE_dgeqrf(LAPACK_COL_MAJOR, CV_nrows, CV_ncols,
   190                 "%s %d ERROR in LAPACKE_dgeqrf(1:CV_nrows:%d 2:CV_ncols:%d 3:_CV:%p 4:ld_CV:%d 5:qrtauB:%p) info=%d maxrank:%d\n",
   191                 __FILE__, __LINE__, CV_nrows, CV_ncols, _CV, ld_CV, qrtauB,info,  maxrank);
   201         double* _CU, 
int ld_CU,
   202         double* _CV, 
int ld_CV, 
int _Crk,
   203         double* _U, 
int ld_U,
   204         double* _V, 
int ld_V, 
int* pnew_UVrk,
   207         double* _rA, 
double* _rB, 
double* _T, 
double* sigma, 
double* svdsuperb
   214     int rA_nrows  = chameleon_min(CU_nrows, CU_ncols);
   215     int rA_ncols  = CU_ncols;    
   216     int maxncolsR = 2*_Crk; 
   217     int ld_rA     = rA_nrows;
   219     if(rA_nrows != rA_ncols){
   220         printf(
"TRMM cannot be used because R coming from QR factorization of A is not square nrows: %d ncols:%d \n", rA_nrows, rA_ncols);
   226         printf(
" SVD\t|%d\t| copy rA rA_nrows:%d rA_ncols:%d ld_CU:%d ld_rA:%d CU:%p rA:%p\n",
   227               __LINE__, rA_nrows, rA_ncols, ld_CU, ld_rA, _CU, _rA);
   231     LAPACK_dlaset(&chlow,
   232             &rA_nrows, &rA_ncols, &zero, &zero, _rA, &ld_rA);
   235             &rA_nrows, &rA_ncols,
   239         printf(
"%d\t|_CU and _rA\n", __LINE__);
   247     int rB_nrows = chameleon_min(CV_nrows, CV_ncols);
   248     int rB_ncols = CV_ncols;
   249     int ld_rB    = rB_nrows;     
   250     assert(rA_ncols == rB_ncols);
   254         printf(
" SVD\t|%d\t| copy rB rB_nrows:%d rB_ncols:%d ld_rB:%d ld_CV:%d rB:%p CV:%p\n",
   255               __LINE__, rB_nrows, rB_ncols, ld_rB, ld_CV, _rB, _CV);
   259         LAPACK_dlaset(&chlow,
   260                 &rB_nrows, &rB_ncols, &zero, &zero, _rB, &ld_rB);
   262                 &rB_nrows, &rB_ncols, _CV, &ld_CV, _rB, &ld_rB);
   268         printf(
"%d\t|_CV and _rB\n", __LINE__);
   274     int T_nrows = rA_ncols;
   275     int T_ncols = rA_ncols;
   280         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, rA_nrows, rA_ncols,  alpha, _rB, ld_rB, _rA, ld_rA); 
   284         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
   285                 rA_nrows, rB_nrows, rB_ncols, 
CBLAS_SADDR(alpha), _rA, ld_rA,
   289         printf(
" SVD\t|%d\t| T=rA*rB^T  rA_nrows:%d rB_nrows:%d rB_ncols:%d ld_rA:%d ld_rB:%d ld_T:%d alpha:%g beta:%g\n",
   290               __LINE__, rA_nrows, rB_nrows, rB_ncols, ld_rA, ld_rB, ld_T, alpha, beta);
   296     assert(sigma != NULL);
   297     int size_sigma = T_nrows;
   298     assert(svdsuperb != NULL);
   300         printf(
" SVD\t|%d\t| svd(T)    (3.m)T_nrows:%d (4.n)T_ncols:%d ld_T:%d ld_U:%d (11.ldvt)ld_V:%d _T:%p (zero based parameter indices)\n",
   301               __LINE__, T_nrows, T_ncols,  ld_T, ld_U, ld_V, _T);
   303     info = LAPACKE_dgesvd(LAPACK_COL_MAJOR, 
'A', 
'A',
   304             T_nrows, T_ncols, _T, ld_T, sigma,
   309                 "%s %d ERROR in LAPACKE_dgesvd() info=%d"   310                 "1:T_nrows=%d, 2:T_ncols=%d, 3:_T=%p, 4:ld_T=:%d, 5:sigma=%p,"   311                 "6:_U=%p, 7:ld_U=%d, 8:_V=%p, 9:ld_V:%d,"   314                 __FILE__, __LINE__, info,
   315                 T_nrows, T_ncols, _T, ld_T, sigma,
   320     int U_nrows, U_ncols, V_nrows, V_ncols;
   321     U_nrows = U_ncols = V_nrows = V_ncols = T_nrows;
   327         printf(
"%d %e\n", rank, acc);
   331     double relacc = (acc);
   335         if(rank > size_sigma)
   336             finalrank = size_sigma;
   339         int newrank = size_sigma;
   341         for(i=2;i<size_sigma;i++){
   342             if(sigma[i] < relacc)
   350     if(finalrank > maxrank){
   351         fprintf(stderr, 
"%s %s %d: Rank after truncation is too big! finalrank:%d maxrank:%d\n", __FILE__, __func__, __LINE__, finalrank, maxrank);
   356         printf(
"rank:%d acc:%.2e   relac:%.2e size_sigma:%d final_rank:%d:  ",
   357                 rank,   acc, relacc, size_sigma, finalrank);
   358         for(i=0;i<size_sigma;i++){
   359             printf(
"%d:%.2e ", i,sigma[i]);
   364         printf(
"size_sigma:%d finalrank:%d %.2e\n", size_sigma, finalrank, acc);
   369     int rank_V = finalrank;
   371     for(k = 0; k < rank_V; k++){
   372         double diagval = sigma[k];
   373         cblas_dscal(V_ncols, 
CBLAS_SADDR(diagval), &_V[k], ld_V);
   376     printf(
" SVD\t|%d\t| S*V     V_ncols:%d ld_V:%d _V:%p\n",
   377               __LINE__, V_ncols,  ld_V, _V);
   382     *pnew_UVrk = finalrank;
   388         double* _CU, 
int ld_CU, 
int _Crk,
   389         double* _U,  
int ld_U,  
int _Urk,
   394     int CU_ncols = ncols_qA;
   395     int U_nrows = ncols_qA; 
   397     int nrows = CU_nrows - U_nrows;
   400         printf(
" NEWU\t|%d\t|    zero     nrows:%d U_ncols:%d ld_U:%d _Crk:%d CU_ncols:%d _Urk:%d   CU_nrows:%d U_nrows:%d diff:%d\n",
   401               __LINE__, nrows, U_ncols,  ld_U, _Crk, CU_ncols,  _Urk, CU_nrows, U_nrows, nrows);
   407     LAPACK_dlaset( &
uplo, &nrows, &U_ncols, &zero, &zero, &_U[U_nrows], &ld_U );
   410                 "%s %d ERROR in LAPACKE_dlaset() info=%d\n",
   411                 __FILE__, __LINE__, info);
   418     info = LAPACKE_dormqr(LAPACK_COL_MAJOR, 
'L', 
'N',
   419             CU_nrows, U_ncols, ncols_qA, _CU, ld_CU, qrtauA, _U, ld_U);
   421         printf(
" NEWU\t|%d\t|    ormqr     CU_nrows (new U_nrows):%d U_ncols:%d ncols_qA:%d ld_CU:%d ld_U:%d\n",
   422               __LINE__, CU_nrows, U_ncols, ncols_qA, ld_CU, ld_U);
   429                 "%s %d ERROR in LAPACKE_dormqr() info=%d\n",
   430                 __FILE__, __LINE__, info);
   431         printf(
" NEWU\t|%d\t|    ormqr     CU_nrows (new U_nrows):%d U_ncols:%d ncols_qA:%d ld_CU:%d ld_U:%d U_nrows:%d\n",
   432               __LINE__, CU_nrows, U_ncols, ncols_qA, ld_CU, ld_U, U_nrows);
   434             int i, j, ssend, ldarr;
   436             arr = _CU; ldarr = ld_CU; ssend = ncols_qA;
   439                     printf(
"%.3e ", arr[j*ldarr+i]);
   442                 for(j=ssend-4;j<ssend;j++){
   443                     printf(
"%.3e ", arr[j*ldarr+i]);
   448             arr = _U; ldarr = ld_U; ssend = U_ncols;
   451                     printf(
"%.3e ", arr[j*ldarr+i]);
   454                 for(j=ssend-4;j<ssend;j++){
   455                     printf(
"%.3e ", arr[j*ldarr+i]);
   459             for(j=0;j<U_ncols;j++){
   460                 for(i=0; i < CU_nrows; i++){
   461                     double val = _U[j*ld_U+i];
   463                         printf(
"%d,%d is nan (%g) CU_nrows:%d U_ncols:%d ld_U:%d\n", i, j, val,  CU_nrows, U_ncols, ld_U);
   471     LAPACKE_dlacpy(LAPACK_COL_MAJOR, 
'A', U_nrows, U_ncols,
   472             _U, ld_U, _CU, ld_CU);
   474         printf(
" NEWU\t|%d\t|    copy     U_nrows:%d U_ncols:%d ld_CU:%d ld_U:%d\n",
   475                 __LINE__, U_nrows, U_ncols, ld_CU, ld_U);
   480                     printf(
"%.3e ", _CU[j*ld_CU+i]);
   483                 for(j=U_ncols-4;j<U_ncols;j++){
   484                     printf(
"%.3e ", _CU[j*ld_CU+i]);
   496         double* _CV, 
int ld_CV, 
int _Crk,
   497         double* _V,  
int ld_V,  
int _Vrk,
   502     int CV_ncols = ncols_qB;
   504     int V_ncols = ncols_qB; 
   505     int ncols = CV_nrows - V_ncols;
   508         printf(
" NEWV\t|%d\t|    zero     V_nrows:%d ncols:%d ld_V:%d _Crk:%d CV_ncols:%d _Vrk:%d CV_nrows:%d V_ncols:%d diff:%d\n",
   509               __LINE__, V_nrows,  ncols, ld_V, _Crk, CV_ncols,  _Vrk, CV_nrows, V_ncols, ncols);
   514     size_t iv = V_ncols*ld_V;
   515     LAPACK_dlaset( &
uplo, &V_nrows, &ncols, &zero, &zero, &(_V[iv]), &ld_V );
   520         printf(
" NEWV\t|%d\t|    ormqr     V_nrows:%d CV_nrows:%d ncols_qB:%d ld_CV:%d ld_V:%d\n",
   521               __LINE__, V_nrows, CV_nrows, ncols_qB, ld_CV, ld_V);
   524     info = LAPACKE_dormqr(LAPACK_COL_MAJOR, 
'R', 
'T',
   525             V_nrows, CV_nrows, ncols_qB, _CV, ld_CV, qrtauB, _V, ld_V);
   531                 "%s %d ERROR in LAPACKE_dormqr() info=%d\n",
   532                 __FILE__, __LINE__, info);
   538         printf(
" NEWV\t|%d\t|    trans    V_nrows:%d V_ncols:%d ld_V:%d ld_CV%d\n",
   539               __LINE__, V_nrows,  V_ncols, ld_V, ld_CV);
   541     LAPACKE_dge_trans(LAPACK_COL_MAJOR, V_nrows, V_ncols,
   542             _V, ld_V, _CV, ld_CV);
   544         printf(
" NEWV\t|%d\t|    copy     V_nrows:%d V_ncols:%d ld_CV:%d ld_V:%d\n",
   545               __LINE__, V_nrows, V_ncols, ld_CV, ld_V);
   550                       printf(
"%.3e ", _CV[j*ld_CV+i]);
   553                   for(j=V_ncols-4;j<V_ncols;j++){
   554                       printf(
"%.3e ", _CV[j*ld_CV+i]);
   591         printf(
"%d:%s work:%p ", __LINE__, __func__, work);
   592         printf(
"M:%d N:%d  LDA:%d LDB:%d LDC:%d rk:%d maxrk:%d acc:%e a:%e b:%e\n",
   593                 M, N, LDA, LDB, LDC, rk, maxrk, acc, alpha, beta);
   605     int _Ark = (int)(Ark[0]);
   606     int _Brk = (int)(Brk[0]);
   607     int _Crk = (int)(Crk[0]);
   608     if(_Ark == 0 || _Brk == 0 || _Crk == 0){
   609         fprintf(stderr, 
"%s %d: _Ark=%d _Brk=%d _Crk=%d. These rank values should not be zero.\n", __FILE__, __LINE__, _Ark, _Brk, _Crk);
   614     int _M = M; 
int _N = N;
   615     double* _CU = CU; 
int ld_CU = LDC;
   616     double* _CV = CV; 
int ld_CV = LDC; 
int* pnew_Crk = &new_Crk;
   617     double* _AU = AU; 
int ld_AU = LDA;
   618     double* _AV = AV; 
int ld_AV = LDA;
   619     double* _BU = BU; 
int ld_BU = LDB;
   620     double* _BV = BV; 
int ld_BV = LDB;
   627     double* CUclone = NULL;
   628     size_t CUclone_nelm =  _M * 2 * maxrk;
   630     int use_CUV_clone = 1;
   631     if(use_CUV_clone == 1) {
   634             work += CUclone_nelm;
   636             CUclone = malloc(CUclone_nelm * 
sizeof(
double));
   638         LAPACK_dlacpy(&chall,
   641                 CUclone, &ld_CUclone);
   644     double* CVclone = NULL;
   645     size_t CVclone_nelm = _M * 2 * maxrk;
   647     if(use_CUV_clone == 1) {
   650             work += CVclone_nelm;
   652             CVclone = malloc(CVclone_nelm * 
sizeof(
double));
   654         LAPACK_dlacpy(&chall,
   657                 CVclone, &ld_CVclone);
   659     double* _CU_save = _CU;
   660     double* _CV_save = _CV;
   662     if(use_CUV_clone == 1) {
   667     double* qrtauA = NULL;
   668     size_t qrtauA_nelm = nb;
   672         qrtauA = malloc(qrtauA_nelm * 
sizeof(
double));
   674     assert(qrtauA != NULL);
   675     double* qrtauB = NULL;
   676     size_t qrtauB_nelm = nb;
   678         qrtauB = work + qrtauA_nelm;
   680         qrtauB = malloc(qrtauB_nelm * 
sizeof(
double));
   682     assert(qrtauB != NULL);
   689     __qra(_M, maxrk, _CU, ld_CU, _Crk, &CU_ncols, _AU, ld_AU, _Ark, alpha, beta, qrtauA);
   691     assert(CU_ncols == (_Crk + _Ark));
   696     double* qrb_aubut = NULL;
   697     size_t qrb_aubut_nelm = maxrk * maxrk;
   699        qrb_aubut = work + qrtauA_nelm + qrtauB_nelm;
   701        qrb_aubut = malloc(qrb_aubut_nelm * 
sizeof(
double));
   703     __qrb(_M, maxrk, _CV, ld_CV, _Crk, &CV_ncols,
   704             _AV, ld_AV, _Ark, _BU, ld_BU, _BV, ld_BV, _Brk, qrtauB, qrb_aubut);
   705     if(CU_ncols == 0 || CV_ncols == 0){
   706         fprintf(stderr, 
"%s %d: CU_ncols=%d CV_ncols=%d. These values should not be zero.\n", __FILE__, __LINE__, CU_ncols, CV_ncols);
   713     assert(CU_ncols == CV_ncols);
   720     size_t newU_nelm = nb * maxrk;
   722         newU = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm;
   724         newU = malloc(newU_nelm * 
sizeof(
double));
   727     size_t newV_nelm = nb * maxrk;
   730         newV = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm;
   732         newV = malloc(newV_nelm *  
sizeof(
double));
   734     assert(newU != NULL);
   735     assert(newV != NULL);
   736     double *svd_rA = NULL;
   737     int svd_rA_nrows  = chameleon_min(_M, CU_ncols);
   738     int svd_rA_ncols  = CU_ncols;    
   741         svd_rA_nelm = svd_rA_nrows * svd_rA_nrows;
   743         svd_rA_nelm = svd_rA_nrows * svd_rA_ncols;
   746         svd_rA = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm;
   748         svd_rA = malloc(svd_rA_nelm * 
sizeof(
double));
   750     double *svd_rB = NULL;
   751     int svd_rB_nrows = chameleon_min(_M, CV_ncols);
   752     int svd_rB_ncols = CV_ncols;
   755         svd_rB_nelm = svd_rB_nrows * svd_rB_ncols;
   757             svd_rB = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm;
   759             svd_rB = malloc(svd_rB_nelm * 
sizeof(
double));
   764     double *svd_T = NULL;
   765     int svd_T_nrows = svd_rA_ncols;
   766     int svd_T_ncols = svd_rA_ncols;
   769         svd_T_nelm = svd_T_nrows * svd_T_ncols;
   771             svd_T = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm + svd_rB_nelm;
   773             svd_T = malloc(svd_T_nelm * 
sizeof(
double));
   779     double *svd_sigma  = NULL;
   780     double *svd_superb = NULL;
   781     size_t svd_sigma_nelm  = svd_T_nrows;
   782     size_t svd_superb_nelm = svd_T_nrows;
   784         svd_sigma  = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm + svd_rB_nelm + svd_T_nelm;
   785         svd_superb = work + qrtauA_nelm + qrtauB_nelm + qrb_aubut_nelm + newU_nelm + newV_nelm + svd_rA_nelm + svd_rB_nelm + svd_T_nelm + svd_sigma_nelm;
   787         svd_sigma  = malloc(svd_sigma_nelm  * 
sizeof(
double));
   788         svd_superb = malloc(svd_superb_nelm * 
sizeof(
double));
   790     if(ld_newV < CU_ncols){
   791         fprintf(stderr, 
"%s %d: Increase maxrank. %d is not enough ld_newV:%d CU_ncols:%d\n", __FILE__, __LINE__, maxrk, ld_newV, CU_ncols);
   799             _CV,  ld_CV, CU_ncols,
   801             newV,   ld_newV,  &new_UVrk ,
   803             svd_rA, svd_rB, svd_T, svd_sigma, svd_superb
   812     int ncols_qA = CU_ncols;
   817             newU, ld_newU, new_UVrk,
   822     int ncols_qB = CV_ncols;
   827             newV, ld_newV, new_UVrk,
   830     *pnew_Crk = new_UVrk;
   835     if(use_CUV_clone == 1) {
   836         LAPACK_dlacpy(&chall,
   838                 CUclone, &ld_CUclone,
   842         LAPACK_dlacpy(&chall,
   844                 CVclone, &ld_CVclone,
   865     int old_Crk = Crk[0];
   867         printf(
"Ark:%d Brk:%d Crk[0]:%d %g RANK CHANGE: %d->%d\n",
   868                 _Ark, _Brk, Crk[0], Crk[0], old_Crk, *pnew_Crk);
   872         int casted_Crk = (int)(Crk[0]);
   873         printf(
"casted_Crk:%d Ark:%d Brk:%d Crk[0]:%d %g RANK CHANGE: %d->%d\n",
   874                 casted_Crk, _Ark, _Brk, Crk[0], Crk[0], old_Crk, new_Crk);
 
void __newu(int _M, int ncols_qA, double *_CU, int ld_CU, int _Crk, double *_U, int ld_U, int _Urk, double *qrtauA)
 
void __newv(int _M, int ncols_qB, double *_CV, int ld_CV, int _Crk, double *_V, int ld_V, int _Vrk, double *qrtauB)
 
void __qrb(int _M, int maxrank, double *_CV, int ld_CV, int _Crk, int *pnew_CV_ncols, double *_AV, int ld_AV, int _Ark, double *_BU, int ld_BU, double *_BV, int ld_BV, int _Brk, double *qrtauB, double *AcolBcolT)
 
void __qra(int _M, int maxrank, double *_CU, int ld_CU, int _Crk, int *pnew_CU_ncols, double *_AU, int ld_AU, int _Ark, double alpha, double beta, double *qrtauA)
 
void HCORE_zgemm(MORSE_enum transA, int transB, int M, int N, double alpha, double *AU, double *AV, double *Ark, int LDA, double *BU, double *BV, double *Brk, int LDB, double beta, double *CU, double *CV, double *Crk, int LDC, int rk, int maxrk, double acc, double *work)
 
#define CBLAS_SADDR(_val)
 
void __svd(int _M, int maxrank, double *_CU, int ld_CU, double *_CV, int ld_CV, int _Crk, double *_U, int ld_U, double *_V, int ld_V, int *pnew_UVrk, int rank, double acc, double *_rA, double *_rB, double *_T, double *sigma, double *svdsuperb)
 
void hc_printmat(double *A, int m, int n, int ld)