HiCMA
Hierarchical Computations on Manycore Architectures
codelet_zgemm_bdcd.c
Go to the documentation of this file.
1 
17 #include "morse.h"
18 #include "hicma.h"
19 #include "hicma_common.h"
20 #include "runtime/starpu/chameleon_starpu.h"
21 //#include "runtime/starpu/include/runtime_codelet_z.h"
22 
23 #include <sys/time.h>
24 
25 #include "runtime/starpu/runtime_codelets.h"
26 ZCODELETS_HEADER(gemmbdcd_hcore)
27 
28 //UPDATE this definition. I only copy-paste from runtime/starpu/codelets/codelet_zcallback.c
29 /*CHAMELEON_CL_CB(zgemm_hcore, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), starpu_matrix_get_ny(task->handles[0]), 2. *M*N*K) [> If A^t, computation is wrong <]*/
30 
31 #include "hcore_z.h"
32 
33 extern int global_always_fixed_rank;
34 extern int global_fixed_rank;
35 extern int print_mat;
36 extern void _printmat(double * A, int64_t m, int64_t n, int64_t ld);
43 void HICMA_TASK_zgemm_bdcd(const MORSE_option_t *options,
44  MORSE_enum transA, int transB,
45  int m, int n,
46  double alpha,
47  const MORSE_desc_t *AUV,
48  const MORSE_desc_t *Ark,
49  int Am, int An, int lda,
50  const MORSE_desc_t *BD,
51  int Bm, int Bn, int ldb,
52  double beta,
53  const MORSE_desc_t *CD,
54  int Cm, int Cn, int ldc
55  )
56 {
57  int nAUV = AUV->nb;
58  struct starpu_codelet *codelet = &cl_zgemmbdcd_hcore;
59  /*void (*callback)(void*) = options->profiling ? cl_zgemm_hcore_callback : NULL;*/
60  void (*callback)(void*) = NULL;
61  MORSE_starpu_ws_t *h_work = (MORSE_starpu_ws_t*)(options->ws_host);
62  /*printf("%s %d:\t%p %p\n", __FILE__, __LINE__, h_work, options->ws_host);*/
63 
64  int sizeA = lda*nAUV; //FIXME Think about scheduling of tasks according to sizes of the matrices
65  int sizeB = ldb*n;
66  int sizeC = ldc*m;
67  int execution_rank = CD->get_rankof( CD, Cm, Cn );
68  int rank_changed=0;
69  (void)execution_rank;
70 
71  /* force execution on the rank owning the largest data (tile) */
72  int threshold;
73  char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD");
74 
75  int ifval = 0, elseifval = 0, initialval = execution_rank;
76  if (env != NULL)
77  threshold = (unsigned)atoi(env);
78  else
79  threshold = 10;
80  if ( sizeA > threshold*sizeC ){
81  execution_rank = AUV->get_rankof( AUV, Am, An );
82  ifval = execution_rank;
83  rank_changed = 1;
84  }else if( sizeB > threshold*sizeC ){
85  execution_rank = BD->get_rankof( BD, Bm, Bn );
86  elseifval = execution_rank;
87  rank_changed = 1;
88  }
89  //printf("m:%d n:%d k:%d nb:%d\n", m, n, k, nb); all of them are nb (1156)
90  //printf("initialval:\t%d if:%d\t else:\t%d rc:\t%d\n", initialval, ifval, elseifval, rank_changed);
91  MORSE_BEGIN_ACCESS_DECLARATION;
92  MORSE_ACCESS_R(AUV, Am, An);
93  MORSE_ACCESS_R(BD, Bm, Bn);
94  MORSE_ACCESS_RW(CD, Cm, Cn);
95 #if !defined(HICMA_ALWAYS_FIX_RANK)
96  MORSE_ACCESS_R(Ark, Am, An);
97 #endif
98  if (rank_changed)
99  MORSE_RANK_CHANGED(execution_rank);
100  MORSE_END_ACCESS_DECLARATION;
101 
102  //printf("%s %d n:%d\n", __func__, __LINE__,n );
103  starpu_insert_task(
104  starpu_mpi_codelet(codelet),
105  STARPU_VALUE, &transA, sizeof(MORSE_enum),
106  STARPU_VALUE, &transB, sizeof(MORSE_enum),
107  STARPU_VALUE, &m, sizeof(int),
108  STARPU_VALUE, &n, sizeof(int),
109  STARPU_VALUE, &alpha, sizeof(double),
110  STARPU_R, RTBLKADDR(AUV, double, Am, An),
111  STARPU_VALUE, &lda, sizeof(int),
112  STARPU_R, RTBLKADDR(BD, double, Bm, Bn),
113  STARPU_VALUE, &ldb, sizeof(int),
114  STARPU_VALUE, &beta, sizeof(double),
115  STARPU_RW, RTBLKADDR(CD, double, Cm, Cn),
116 #if !defined(HICMA_ALWAYS_FIX_RANK)
117  STARPU_R, RTBLKADDR(Ark, double, Am, An),
118 #endif
119  STARPU_VALUE, &ldc, sizeof(int),
120  STARPU_VALUE, &Am, sizeof(int),
121  STARPU_VALUE, &An, sizeof(int),
122  STARPU_VALUE, &Bm, sizeof(int),
123  STARPU_VALUE, &Bn, sizeof(int),
124  STARPU_VALUE, &Cm, sizeof(int),
125  STARPU_VALUE, &Cn, sizeof(int),
126  STARPU_VALUE, &nAUV, sizeof(int),
127  STARPU_SCRATCH, options->ws_worker,
128  STARPU_VALUE, &h_work, sizeof(MORSE_starpu_ws_t *),
129  STARPU_PRIORITY, options->priority,
130  STARPU_CALLBACK, callback,
131 #if defined(CHAMELEON_USE_MPI)
132  STARPU_EXECUTE_ON_NODE, execution_rank,
133 #endif
134 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
135  STARPU_NAME, "hcore_zgemm_bdcd",
136 #endif
137  0);
138 }
139 
140 #if !defined(CHAMELEON_SIMULATION)
141 static void cl_zgemmbdcd_hcore_cpu_func(void *descr[], void *cl_arg)
142 {
143 #ifdef HICMA_DISABLE_ALL_COMPUTATIONS
144  return;
145 #endif
146 #ifdef HICMA_DISABLE_HCORE_COMPUTATIONS
147  return;
148 #endif
149  struct timeval tvalBefore, tvalAfter; // removed comma
150  gettimeofday (&tvalBefore, NULL);
151  MORSE_enum transA;
152  MORSE_enum transB;
153  int m;
154  int n;
155  double alpha;
156  double *AUV = NULL;
157  double *Ark = NULL;
158  int lda;
159  double *BD = NULL;
160  int ldb;
161  double beta;
162  double *CD = NULL;
163  int ldc;
164  int nAUV;
165 
166  int idescr = 0;
167  AUV = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
168  BD = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
169  CD = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
170 #if !defined(HICMA_ALWAYS_FIX_RANK)
171  Ark = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
172 #else
173  double _gemm_rank = global_fixed_rank;
174  Ark = &_gemm_rank;
175 #endif
176 
177  double* work = NULL;
178  work = (double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
179 
180  int Am, An, Bm, Bn, Cm, Cn;
181 
182  MORSE_starpu_ws_t *h_work;
183  starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &alpha, &lda, &ldb, &beta, &ldc, &Am, &An, &Bm, &Bn, &Cm, &Cn, &nAUV, &h_work);
184 
185  double *AU = AUV;
186 
187  int nAU = nAUV/2;
188  size_t nelm_AU = (size_t)lda * (size_t)nAU;
189  double *AV = &(AUV[nelm_AU]);
190 
191  char datebuf_start[128];
192  if(HICMA_get_print_index()){
193  time_t timer;
194  struct tm* tm_info;
195  gettimeofday (&tvalAfter, NULL);
196  time(&timer);
197  tm_info = localtime(&timer);
198  strftime(datebuf_start, 26, "%Y-%m-%d %H:%M:%S",tm_info);
199  printf("%d+GEMMBDCD\t|CD(%d,%d) AUV(%d,%d)%g BD(%d,%d) m:%d n:%d lda:%d ldb:%d ldc:%d \t\t\t\t\tGEMMBDCD: %s\n",MORSE_My_Mpi_Rank(), Cm, Cn, Am, An, Ark[0], Bm, Bn, m, n, lda, ldb, ldc, datebuf_start);
200  }
201 
202  int isTransA = transA == MorseTrans;
203  int isTransB = transB == MorseTrans;
204  if(isTransB == 1){
205  printf("%s %d %s: Transpose of B is not supported yet. isTransB: %d transB:%d\n", __FILE__, __LINE__, __func__, isTransB, transB);
206  exit(101);
207  }
208 
209  HCORE_zgemmbdcd(transA, transB,
210  m, n,
211  alpha, (isTransA ? AV : AU), (isTransA ? AU : AV), Ark, lda,
212  BD, ldb,
213  beta, CD, ldc, work);
214 
216  char datebuf[128];
217  time_t timer;
218  struct tm* tm_info;
219  gettimeofday (&tvalAfter, NULL);
220  time(&timer);
221  tm_info = localtime(&timer);
222  strftime(datebuf, 26, "%Y-%m-%d %H:%M:%S",tm_info);
223  printf("%d-GEMMBDCD\t|CD(%d,%d) AUV(%d,%d)%g BD(%d,%d)\t\t\tGEMMBDCD: %.4f\t%s---%s\n",MORSE_My_Mpi_Rank(),Cm, Cn, Am, An, Ark[0], Bm, Bn,
224  (tvalAfter.tv_sec - tvalBefore.tv_sec)
225  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0,
226  datebuf_start, datebuf
227  );
228  }
229 }
230 #endif /* !defined(MORSE_SIMULATION) */
231 
232 /*
233  * Codelet definition
234  */
235 CODELETS_CPU(zgemmbdcd_hcore, 5, cl_zgemmbdcd_hcore_cpu_func)
236 
int print_mat
#define AUV(m, n)
Definition: pzgemm.c:60
#define A(m, n)
Definition: pzgemm.c:56
void HCORE_zgemmbdcd(MORSE_enum transA, MORSE_enum transB, int M, int N, double alpha, double *AU, double *AV, double *Ark, int LDA, double *BD, int LDB, double beta, double *CD, int LDC, double *work)
void _printmat(double *A, int64_t m, int64_t n, int64_t ld)
time_t timer
struct tm * tm_info
int global_always_fixed_rank
Definition: hcore_zgytlr.c:46
void HICMA_TASK_zgemm_bdcd(const MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, double alpha, const MORSE_desc_t *AUV, const MORSE_desc_t *Ark, int Am, int An, int lda, const MORSE_desc_t *BD, int Bm, int Bn, int ldb, double beta, const MORSE_desc_t *CD, int Cm, int Cn, int ldc)
char datebuf[128]
int HICMA_get_print_index_end()
Definition: hicma_init.c:53
int HICMA_get_print_index()
Definition: hicma_init.c:50
int global_fixed_rank
Definition: hcore_zgytlr.c:47