HiCMA
Hierarchical Computations on Manycore Architectures
hcore_zsyrk.c
Go to the documentation of this file.
1 
16 #include "coreblas/coreblas.h"
17 #include "coreblas/lapacke.h"
18 #include <assert.h>
19 
20 
22 extern int use_scratch;
23 
24 /*
25  * N != LDA
26  * Leading dim of AU is LDA
27  * M LDA
28  * ______
29  * | AU |
30  * K |______|
31  * ____
32  * | |
33  * N | AV |
34  * LDA | |
35  * |____|
36  * N LDA X
37  * _________
38  * | AV |
39  * M K |_________|
40  * _____ _____ ____
41  * | | | | M | |
42  * M | CD | = | CD | + LDA | AU |
43  * LDCD |_____| |_____| |____|
44  */
45 void HCORE_zsyrk(MORSE_enum uplo, MORSE_enum trans,
46  int M, int K,
47  double alpha,
48  const double *AU, int LDAU,
49  const double *AV, int LDAV,
50  double beta,
51  double *CD, int LDCD,
52  double* work
53  )
54 {
55  int64_t N = LDAU; //ASSUMPTION FIXME
56  int64_t LDA = LDAU;
57 
58  /*cblas_zsyrk(*/
59  /*CblasColMajor,*/
60  /*(CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,*/
61  /*N, N,*/
62  /*CBLAS_SADDR(alpha), A, LDA,*/
63  /*CBLAS_SADDR(beta), C, LDC);*/
64 
65  int64_t A_colfactor_ncols = K;
69  int64_t bufmtx_nrows = A_colfactor_ncols;
70  int64_t bufmtx_ncols = A_colfactor_ncols;
71  size_t bufmtx_nelm = bufmtx_nrows * bufmtx_ncols;
72  /*size_t bufmtx_nelm = K * K;*/
73  //ALLOCATE1 bufmtx_syrk_kk[myid];
74  double* bufmtx = NULL;
75  if(use_scratch){
76  bufmtx = work;
77  } else {
78  bufmtx = malloc(bufmtx_nelm * sizeof(double));
79  }
80  assert(bufmtx != NULL);
81  double alpha2 = 1.0f;
82  double beta2 = 0.0f;
83  int64_t ld_AV = M; //ASSUMPTION
84  int64_t ld_bufmtx = bufmtx_nrows; //ASSUMPTION
85 
86  int64_t AV_nrows = M; //ASSUMPTION
87  int64_t AV_ncols = K;
88  if(syrk_print_index){
89  printf(" SYRK 1 |%d\t|Trans NoTrans AV_ncols:%d AV_ncols:%d AV_nrows:%d alpha2:%.2e ld_AV:%d beta2:%.2e ld_bufmtx:%d AV:%p bufmtx:%p\n", __LINE__,
90  AV_ncols, AV_ncols, AV_nrows, alpha2, ld_AV, beta2, ld_bufmtx, AV, bufmtx
91  );
92  printf(" SYRK 1 |%d\t|Trans NoTrans K:%d K:%d N:%d alpha2:%.2e LDA:%d beta2:%.2e K:%d AV:%p bufmtx:%p\n", __LINE__,
93  K, K, N, alpha2, LDA, beta2, K, AV, bufmtx
94  );
95  }
96  /*assert(AV_ncols == K);*/
97  /*assert(AV_nrows == N);*/
98  /*assert(ld_AV == LDA);*/
99  /*assert(ld_bufmtx== K);*/
100  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, K, K, N, alpha2, AV, LDA, AV, LDA, beta2, bufmtx, K);
101  /*bufmtx_nrows = bufmtx_ncols = AV_ncols; */
102  double* AcolBcolT = bufmtx;
103  int64_t AcolBcolT_ncols = bufmtx_ncols;
104 
106  int64_t A_rowfactor_nrows = M;
107  int64_t A_rowfactor_ncols = K;
108  int64_t AU_nrows = M;
109  int64_t AU_ncols = K;
110  int64_t bufmtx2_nrows = A_rowfactor_nrows;
111  int64_t bufmtx2_ncols = A_rowfactor_ncols;
112  //ALLOCATE2
113  //bufmtx_syrk_U[myid];
114  double* bufmtx2 = NULL;
115 
116  if(use_scratch){
117  bufmtx2 = work + bufmtx_nelm;
118  } else {
119  /*malloc(bufmtx2_nrows * bufmtx2_ncols * sizeof(double));*/
120  bufmtx2 = malloc(M * K * sizeof(double));
121  }
122  assert(bufmtx2 != NULL);
123 
124  int64_t ld_AU = M; //ASSUMPTION
125  int64_t ld_bufmtx2 = M; //ASSUMPTION
126  if(syrk_print_index){
127  printf(" SYRK 2 |%d\t|NoTrans NoTrans AU_nrows:%d AcolBcolT_ncols:%d AU_ncols:%d alpha2:%.2e ld_AU:%d ld_bufmtx:%d beta2:%.2e ld_bufmtx2:%d AU:%p AcolBcolT:%p bufmtx2:%p\n", __LINE__,
128  AU_nrows, AcolBcolT_ncols, AU_ncols, alpha2, ld_AU, ld_bufmtx, beta2, ld_bufmtx2, AU, AcolBcolT, bufmtx2
129  );
130  printf(" SYRK 2 |%d\t|NoTrans NoTrans M:%d K:%d K:%d alpha2:%.2e LDA:%d K:%d beta2:%.2e M:%d AU:%p AcolBcolT:%p bufmtx2:%p\n", __LINE__,
131  M, K, K, alpha2, LDA, K, beta2, M , AU, AcolBcolT, bufmtx2
132  );
133 
134  }
135  /*assert(AU_nrows == M);*/
136  /*assert(AcolBcolT_ncols == K);*/
137  /*assert(AU_ncols == K);*/
138  /*assert(ld_AU == LDA);*/
139  /*assert(ld_bufmtx2 == M);*/
140  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, K, K, alpha2, AU, LDA, AcolBcolT, K, beta2, bufmtx2, M);
141  bufmtx2_nrows = AU_nrows;
142  bufmtx2_ncols = AcolBcolT_ncols;
143 
144 
145  double* Arow_AcolBcolT = bufmtx2;
146  int64_t ld_C = M; //ASSUMPTION
148  int64_t Arow_AcolBcolT_nrows = bufmtx2_nrows;
149  int64_t Arow_AcolBcolT_ncols = bufmtx2_ncols;
150  if(syrk_print_index){
151  printf(" SYRK 3 |%d\t|NoTrans Trans Arow_AcolBcolT_nrows:%d AU_nrows:%d Arow_AcolBcolT_ncols:%d alpha:%.2e ld_bufmtx2:%d ld_AU:%d alpha2:%.2e LDCD:%d Arow_AcolBcolT:%p AU:%p CD:%p\n",
152  __LINE__,Arow_AcolBcolT_nrows, AU_nrows, Arow_AcolBcolT_ncols, alpha, ld_bufmtx2, ld_AU, alpha2, LDCD, Arow_AcolBcolT, AU, CD
153  );
154  printf(" SYRK 3 |%d\t|NoTrans Trans M:%d M:%d K:%d alpha:%.2e M:%d LDA:%d alpha2:%.2e LDCD:%d Arow_AcolBcolT:%p AU:%p CD:%p\n",
155  __LINE__, M, M, K, alpha, M, LDA, alpha2, LDCD, Arow_AcolBcolT, AU, CD
156  );
157  }
158  /*assert(Arow_AcolBcolT_nrows == M);*/
159  /*assert(Arow_AcolBcolT_ncols == K);*/
160  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
161  M, M, K,
162  alpha,
163  Arow_AcolBcolT, M,
164  AU, LDA,
165  alpha2,
166  CD, LDCD);
167  int64_t C_ncols = AU_nrows;
168  {
169  int i,j;
170  for(j = 1; j < M; j++){
171  for(i = 0; i < j; i++){
172  CD[j*LDCD+i] = beta2;
173  }
174  }
175  }
176 
177  if(use_scratch == 0){
178  //FREE1
179  free(bufmtx);
180  //FREE2
181  free(bufmtx2);
182  }
183 }
184 
185 
void HCORE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int M, int K, double alpha, const double *AU, int LDAU, const double *AV, int LDAV, double beta, double *CD, int LDCD, double *work)
Definition: hcore_zsyrk.c:45
int syrk_print_index
Definition: hcore_zsyrk.c:21
int use_scratch
int trans[3]
int uplo[2]