16 #include "coreblas/coreblas.h" 17 #include "coreblas/lapacke.h" 48 const double *AU,
int LDAU,
49 const double *AV,
int LDAV,
65 int64_t A_colfactor_ncols = K;
69 int64_t bufmtx_nrows = A_colfactor_ncols;
70 int64_t bufmtx_ncols = A_colfactor_ncols;
71 size_t bufmtx_nelm = bufmtx_nrows * bufmtx_ncols;
74 double* bufmtx = NULL;
78 bufmtx = malloc(bufmtx_nelm *
sizeof(
double));
80 assert(bufmtx != NULL);
84 int64_t ld_bufmtx = bufmtx_nrows;
89 printf(
" SYRK 1 |%d\t|Trans NoTrans AV_ncols:%d AV_ncols:%d AV_nrows:%d alpha2:%.2e ld_AV:%d beta2:%.2e ld_bufmtx:%d AV:%p bufmtx:%p\n", __LINE__,
90 AV_ncols, AV_ncols, AV_nrows, alpha2, ld_AV, beta2, ld_bufmtx, AV, bufmtx
92 printf(
" SYRK 1 |%d\t|Trans NoTrans K:%d K:%d N:%d alpha2:%.2e LDA:%d beta2:%.2e K:%d AV:%p bufmtx:%p\n", __LINE__,
93 K, K, N, alpha2, LDA, beta2, K, AV, bufmtx
100 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, K, K, N, alpha2, AV, LDA, AV, LDA, beta2, bufmtx, K);
102 double* AcolBcolT = bufmtx;
103 int64_t AcolBcolT_ncols = bufmtx_ncols;
106 int64_t A_rowfactor_nrows = M;
107 int64_t A_rowfactor_ncols = K;
108 int64_t AU_nrows = M;
109 int64_t AU_ncols = K;
110 int64_t bufmtx2_nrows = A_rowfactor_nrows;
111 int64_t bufmtx2_ncols = A_rowfactor_ncols;
114 double* bufmtx2 = NULL;
117 bufmtx2 = work + bufmtx_nelm;
120 bufmtx2 = malloc(M * K *
sizeof(
double));
122 assert(bufmtx2 != NULL);
125 int64_t ld_bufmtx2 = M;
127 printf(
" SYRK 2 |%d\t|NoTrans NoTrans AU_nrows:%d AcolBcolT_ncols:%d AU_ncols:%d alpha2:%.2e ld_AU:%d ld_bufmtx:%d beta2:%.2e ld_bufmtx2:%d AU:%p AcolBcolT:%p bufmtx2:%p\n", __LINE__,
128 AU_nrows, AcolBcolT_ncols, AU_ncols, alpha2, ld_AU, ld_bufmtx, beta2, ld_bufmtx2, AU, AcolBcolT, bufmtx2
130 printf(
" SYRK 2 |%d\t|NoTrans NoTrans M:%d K:%d K:%d alpha2:%.2e LDA:%d K:%d beta2:%.2e M:%d AU:%p AcolBcolT:%p bufmtx2:%p\n", __LINE__,
131 M, K, K, alpha2, LDA, K, beta2, M , AU, AcolBcolT, bufmtx2
140 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, K, K, alpha2, AU, LDA, AcolBcolT, K, beta2, bufmtx2, M);
141 bufmtx2_nrows = AU_nrows;
142 bufmtx2_ncols = AcolBcolT_ncols;
145 double* Arow_AcolBcolT = bufmtx2;
148 int64_t Arow_AcolBcolT_nrows = bufmtx2_nrows;
149 int64_t Arow_AcolBcolT_ncols = bufmtx2_ncols;
151 printf(
" SYRK 3 |%d\t|NoTrans Trans Arow_AcolBcolT_nrows:%d AU_nrows:%d Arow_AcolBcolT_ncols:%d alpha:%.2e ld_bufmtx2:%d ld_AU:%d alpha2:%.2e LDCD:%d Arow_AcolBcolT:%p AU:%p CD:%p\n",
152 __LINE__,Arow_AcolBcolT_nrows, AU_nrows, Arow_AcolBcolT_ncols, alpha, ld_bufmtx2, ld_AU, alpha2, LDCD, Arow_AcolBcolT, AU, CD
154 printf(
" SYRK 3 |%d\t|NoTrans Trans M:%d M:%d K:%d alpha:%.2e M:%d LDA:%d alpha2:%.2e LDCD:%d Arow_AcolBcolT:%p AU:%p CD:%p\n",
155 __LINE__, M, M, K, alpha, M, LDA, alpha2, LDCD, Arow_AcolBcolT, AU, CD
160 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
167 int64_t C_ncols = AU_nrows;
170 for(j = 1; j < M; j++){
171 for(i = 0; i < j; i++){
172 CD[j*LDCD+i] = beta2;
void HCORE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int M, int K, double alpha, const double *AU, int LDAU, const double *AV, int LDAV, double beta, double *CD, int LDCD, double *work)