HiCMA
Hierarchical Computations on Manycore Architectures
codelet_zsyrk.c
Go to the documentation of this file.
1 
16 #include "morse.h"
17 #include "runtime/starpu/chameleon_starpu.h"
18 //#include "runtime/starpu/include/runtime_codelet_z.h"
19 
20 #include <sys/time.h>
21 
22 #include "runtime/starpu/runtime_codelets.h"
23 ZCODELETS_HEADER(syrk_hcore)
24 
25 //UPDATE this definition. I only copy-paste from runtime/starpu/codelets/codelet_zcallback.c
26 /*CHAMELEON_CL_CB(zsyrk_hcore, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N)*/
27 
28 
29 #include "hcore_z.h"
30 
31 extern int global_always_fixed_rank;
32 extern int global_fixed_rank;
33 extern int print_index;
34 extern int print_index_end;
35 extern int print_mat;
36 extern void _printmat(double * A, int64_t m, int64_t n, int64_t ld);
43 void HICMA_TASK_zsyrk(const MORSE_option_t *options,
44  MORSE_enum uplo, MORSE_enum trans,
45  int n, int nb,
46  double alpha,
47  const MORSE_desc_t *AUV, int ldauv,
48  const MORSE_desc_t *Ark,
49  int Am, int An,
50  double beta,
51  const MORSE_desc_t *CD, int ldcd,
52  int Cm, int Cn)
53 {
54  int nAUV = AUV->nb;
55  (void)nb;
56  struct starpu_codelet *codelet = &cl_zsyrk_hcore;
57  /*void (*callback)(void*) = options->profiling ? cl_zsyrk_hcore_callback : NULL;*/
58  void (*callback)(void*) = NULL;
59  MORSE_starpu_ws_t *h_work = (MORSE_starpu_ws_t*)(options->ws_host);
60 
61  MORSE_BEGIN_ACCESS_DECLARATION;
62  MORSE_ACCESS_R(AUV, Am, An);
63 #if !defined(HICMA_ALWAYS_FIX_RANK)
64  MORSE_ACCESS_R(Ark, Am, An);
65 #endif
66  MORSE_ACCESS_RW(CD, Cm, Cn);
67  MORSE_END_ACCESS_DECLARATION;
68  starpu_insert_task(
69  starpu_mpi_codelet(codelet),
70  STARPU_VALUE, &uplo, sizeof(MORSE_enum),
71  STARPU_VALUE, &trans, sizeof(MORSE_enum),
72  STARPU_VALUE, &n, sizeof(int),
73  STARPU_VALUE, &alpha, sizeof(double),
74  STARPU_R, RTBLKADDR(AUV, double, Am, An),
75  STARPU_VALUE, &ldauv, sizeof(int),
76 #if !defined(HICMA_ALWAYS_FIX_RANK)
77  STARPU_R, RTBLKADDR(Ark, double, Am, An),
78 #endif
79  STARPU_VALUE, &beta, sizeof(double),
80  STARPU_RW, RTBLKADDR(CD, double, Cm, Cn),
81  STARPU_VALUE, &ldcd, sizeof(int),
82  STARPU_VALUE, &Am, sizeof(int),
83  STARPU_VALUE, &An, sizeof(int),
84  STARPU_VALUE, &Cm, sizeof(int),
85  STARPU_VALUE, &Cn, sizeof(int),
86  STARPU_VALUE, &nAUV, sizeof(int),
87  STARPU_SCRATCH, options->ws_worker,
88  STARPU_VALUE, &h_work, sizeof(MORSE_starpu_ws_t *),
89  STARPU_PRIORITY, options->priority,
90  STARPU_CALLBACK, callback,
91 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
92  STARPU_NAME, "hcore_zsyrk",
93 #endif
94  0);
95 }
96 
97 
98 #if !defined(CHAMELEON_SIMULATION)
99 static void cl_zsyrk_hcore_cpu_func(void *descr[], void *cl_arg)
100 {
101 #ifdef HICMA_DISABLE_ALL_COMPUTATIONS
102  return;
103 #endif
104 #ifdef HICMA_DISABLE_HCORE_COMPUTATIONS
105  return;
106 #endif
107  struct timeval tvalBefore, tvalAfter; // removed comma
108  gettimeofday (&tvalBefore, NULL);
109  MORSE_enum uplo;
110  MORSE_enum trans;
111  int n;
112  double alpha;
113  double *AUV;
114  double *Ark;
115  int ldauv;
116  double beta;
117  double *CD;
118  int ldcd;
119  int Am, An, Cm, Cn;
120  int nAUV;
121 
122  int idescr = 0;
123  AUV = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
124 #if !defined(HICMA_ALWAYS_FIX_RANK)
125  Ark = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
126  if(global_always_fixed_rank == 1){
127  fprintf(stderr, "global_always_fixed_rank is one. But HICMA_ALWAYS_FIX_RANK is not defined. Exiting...\n");
128  exit(1);
129  }
130 #else
131  if(global_always_fixed_rank != 1){
132  fprintf(stderr, "global_always_fixed_rank must be one. But it is %d. Exiting...\n", global_always_fixed_rank);
133  exit(1);
134  }
135 #endif
136  int _Ark;
137  if(global_always_fixed_rank == 1){
138  _Ark = global_fixed_rank;
139  } else {
140  _Ark = Ark[0];
141  }
142  CD = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
143  double* work = NULL;
144  work = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
145  MORSE_starpu_ws_t *h_work;
146  starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &alpha, &ldauv, &beta, &ldcd, &Am, &An, &Cm, &Cn, &nAUV, &h_work);
147  double *AU = AUV;
148  int nAU = nAUV/2;
149  size_t nelm_AU = (size_t)ldauv * (size_t)nAU;
150  double *AV = &(AUV[nelm_AU]);
151  int ldau = ldauv;
152  int ldav = ldauv;
153 
154  if(print_index){
155  printf("%d+SYRK\t|CD(%d,%d) AUV(%d,%d)%d N:%d\n",MORSE_My_Mpi_Rank(),Cm, Cn, Am, An, _Ark, n);
156  }
157  if(print_mat){
158  printf("%d\tsyrk-input\n");
159  _printmat(AU, n, _Ark, ldau);
160  _printmat(AV, ldau, _Ark, ldau);
161  _printmat(CD, n, n, ldcd);
162  }
164  n, _Ark,
165  alpha,
166  AU, ldau,
167  AV, ldav,
168  beta,
169  CD, ldcd, work
170  );
171  /*cblas_zsyrk(*/
172  /*CblasColMajor,*/
173  /*(CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,*/
174  /*n, k,*/
175  /*CBLAS_SADDR(alpha), A, lda,*/
176  /*CBLAS_SADDR(beta), C, ldc);*/
178  gettimeofday (&tvalAfter, NULL);
179  printf("%d-SYRK\t|CD(%d,%d) AUV(%d,%d)%d N:%d LDA:%d LDCD:%d\t\t\t\t\tSYRK:%.4f\n",MORSE_My_Mpi_Rank(),Cm, Cn, Am, An, _Ark, n,
180  ldauv, ldcd,
181  (tvalAfter.tv_sec - tvalBefore.tv_sec)
182  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0
183  );
184  }
185  if(print_mat){
186  printf("%d\tsyrk-output\n");
187  _printmat(CD, n, n, ldcd);
188  }
189 }
190 #endif /* !defined(MORSE_SIMULATION) */
191 
192 /*
193  * Codelet definition
194  */
195 #if defined(HICMA_ALWAYS_FIX_RANK)
196 CODELETS_CPU(zsyrk_hcore, 3, cl_zsyrk_hcore_cpu_func)
197 // CODELETS(zsyrk_hcore, 3, cl_zsyrk_hcore_cpu_func, cl_zsyrk_hcore_cuda_func, STARPU_CUDA_ASYNC)
198 #else
199 CODELETS_CPU(zsyrk_hcore, 4, cl_zsyrk_hcore_cpu_func)
200 // CODELETS(zsyrk_hcore, 4, cl_zsyrk_hcore_cpu_func, cl_zsyrk_hcore_cuda_func, STARPU_CUDA_ASYNC)
201 #endif
int print_index_end
Definition: hcore_zgytlr.c:38
void _printmat(double *A, int64_t m, int64_t n, int64_t ld)
int print_mat
void HCORE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int M, int K, double alpha, const double *AU, int LDAU, const double *AV, int LDAV, double beta, double *CD, int LDCD, double *work)
Definition: hcore_zsyrk.c:45
#define AUV(m, n)
Definition: pzgemm.c:60
#define A(m, n)
Definition: pzgemm.c:56
int global_always_fixed_rank
Definition: hcore_zgytlr.c:46
int print_index
int trans[3]
int uplo[2]
void HICMA_TASK_zsyrk(const MORSE_option_t *options, MORSE_enum uplo, MORSE_enum trans, int n, int nb, double alpha, const MORSE_desc_t *AUV, int ldauv, const MORSE_desc_t *Ark, int Am, int An, double beta, const MORSE_desc_t *CD, int ldcd, int Cm, int Cn)
Definition: codelet_zsyrk.c:43
int global_fixed_rank
Definition: hcore_zgytlr.c:47