16 #include "coreblas/coreblas.h"    17 #include "coreblas/lapacke.h"    48                 const double *AU, 
int LDAU,
    49                 const double *AV, 
int LDAV,
    65     int64_t A_colfactor_ncols = K;
    69     int64_t bufmtx_nrows = A_colfactor_ncols;
    70     int64_t bufmtx_ncols = A_colfactor_ncols;
    71     size_t bufmtx_nelm = bufmtx_nrows * bufmtx_ncols;
    74     double* bufmtx = NULL;
    78         bufmtx = malloc(bufmtx_nelm * 
sizeof(
double));
    80     assert(bufmtx != NULL);
    84     int64_t ld_bufmtx = bufmtx_nrows; 
    89         printf(
" SYRK 1 |%d\t|Trans NoTrans AV_ncols:%d AV_ncols:%d AV_nrows:%d alpha2:%.2e ld_AV:%d beta2:%.2e ld_bufmtx:%d AV:%p bufmtx:%p\n", __LINE__,
    90                 AV_ncols, AV_ncols, AV_nrows, alpha2, ld_AV, beta2, ld_bufmtx, AV, bufmtx    
    92         printf(
" SYRK 1 |%d\t|Trans NoTrans K:%d K:%d N:%d alpha2:%.2e LDA:%d beta2:%.2e K:%d AV:%p bufmtx:%p\n", __LINE__,
    93                                               K, K, N,     alpha2,     LDA,   beta2,     K,   AV, bufmtx
   100     cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, K, K, N, alpha2, AV, LDA, AV, LDA, beta2, bufmtx, K);
   102     double* AcolBcolT = bufmtx;
   103     int64_t AcolBcolT_ncols = bufmtx_ncols;
   106     int64_t A_rowfactor_nrows = M;
   107     int64_t A_rowfactor_ncols = K;
   108     int64_t AU_nrows = M;
   109     int64_t AU_ncols = K;
   110     int64_t bufmtx2_nrows = A_rowfactor_nrows; 
   111     int64_t bufmtx2_ncols = A_rowfactor_ncols;
   114     double* bufmtx2 = NULL;
   117         bufmtx2 = work + bufmtx_nelm;
   120         bufmtx2 = malloc(M * K * 
sizeof(
double));
   122     assert(bufmtx2 != NULL);
   125     int64_t ld_bufmtx2 = M; 
   127         printf(
" SYRK 2 |%d\t|NoTrans NoTrans AU_nrows:%d AcolBcolT_ncols:%d AU_ncols:%d alpha2:%.2e ld_AU:%d ld_bufmtx:%d beta2:%.2e ld_bufmtx2:%d AU:%p AcolBcolT:%p bufmtx2:%p\n", __LINE__,
   128                  AU_nrows, AcolBcolT_ncols, AU_ncols, alpha2, ld_AU, ld_bufmtx, beta2, ld_bufmtx2, AU, AcolBcolT, bufmtx2
   130         printf(
" SYRK 2 |%d\t|NoTrans NoTrans M:%d K:%d K:%d alpha2:%.2e LDA:%d K:%d beta2:%.2e M:%d AU:%p AcolBcolT:%p bufmtx2:%p\n", __LINE__,
   131                                                 M,   K,   K,        alpha2, LDA, K,   beta2,     M , AU, AcolBcolT, bufmtx2
   140     cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, K, K, alpha2, AU, LDA, AcolBcolT, K, beta2, bufmtx2, M);
   141     bufmtx2_nrows = AU_nrows;
   142     bufmtx2_ncols = AcolBcolT_ncols;
   145     double* Arow_AcolBcolT = bufmtx2; 
   148     int64_t Arow_AcolBcolT_nrows = bufmtx2_nrows;
   149     int64_t Arow_AcolBcolT_ncols = bufmtx2_ncols;
   151         printf(
" SYRK 3 |%d\t|NoTrans Trans Arow_AcolBcolT_nrows:%d AU_nrows:%d Arow_AcolBcolT_ncols:%d alpha:%.2e ld_bufmtx2:%d ld_AU:%d alpha2:%.2e LDCD:%d Arow_AcolBcolT:%p AU:%p CD:%p\n", 
   152                 __LINE__,Arow_AcolBcolT_nrows,  AU_nrows,  Arow_AcolBcolT_ncols,  alpha,  ld_bufmtx2,  ld_AU,  alpha2,  LDCD,  Arow_AcolBcolT,  AU,  CD 
   154         printf(
" SYRK 3 |%d\t|NoTrans Trans M:%d M:%d K:%d alpha:%.2e M:%d LDA:%d alpha2:%.2e LDCD:%d Arow_AcolBcolT:%p AU:%p CD:%p\n", 
   155                 __LINE__, M, M,  K,  alpha,  M,  LDA,  alpha2,  LDCD,  Arow_AcolBcolT,  AU,  CD 
   160     cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, 
   167     int64_t C_ncols = AU_nrows;
   170         for(j = 1; j < M; j++){
   171             for(i = 0; i < j; i++){
   172                 CD[j*LDCD+i] = beta2;
 void HCORE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int M, int K, double alpha, const double *AU, int LDAU, const double *AV, int LDAV, double beta, double *CD, int LDCD, double *work)