19 #include "hicma_common.h" 20 #include "runtime/starpu/chameleon_starpu.h" 25 #include "runtime/starpu/runtime_codelets.h" 26 ZCODELETS_HEADER(gemmbdcd_hcore)
36 extern void _printmat(
double *
A, int64_t m, int64_t n, int64_t ld);
44 MORSE_enum transA,
int transB,
47 const MORSE_desc_t *
AUV,
48 const MORSE_desc_t *Ark,
49 int Am,
int An,
int lda,
50 const MORSE_desc_t *BD,
51 int Bm,
int Bn,
int ldb,
53 const MORSE_desc_t *CD,
54 int Cm,
int Cn,
int ldc
58 struct starpu_codelet *codelet = &cl_zgemmbdcd_hcore;
60 void (*callback)(
void*) = NULL;
61 MORSE_starpu_ws_t *h_work = (MORSE_starpu_ws_t*)(options->ws_host);
67 int execution_rank = CD->get_rankof( CD, Cm, Cn );
73 char* env = getenv(
"MORSE_COMM_FACTOR_THRESHOLD");
75 int ifval = 0, elseifval = 0, initialval = execution_rank;
77 threshold = (unsigned)atoi(env);
80 if ( sizeA > threshold*sizeC ){
81 execution_rank =
AUV->get_rankof(
AUV, Am, An );
82 ifval = execution_rank;
84 }
else if( sizeB > threshold*sizeC ){
85 execution_rank = BD->get_rankof( BD, Bm, Bn );
86 elseifval = execution_rank;
91 MORSE_BEGIN_ACCESS_DECLARATION;
92 MORSE_ACCESS_R(
AUV, Am, An);
93 MORSE_ACCESS_R(BD, Bm, Bn);
94 MORSE_ACCESS_RW(CD, Cm, Cn);
95 #if !defined(HICMA_ALWAYS_FIX_RANK) 96 MORSE_ACCESS_R(Ark, Am, An);
99 MORSE_RANK_CHANGED(execution_rank);
100 MORSE_END_ACCESS_DECLARATION;
104 starpu_mpi_codelet(codelet),
105 STARPU_VALUE, &transA,
sizeof(MORSE_enum),
106 STARPU_VALUE, &transB,
sizeof(MORSE_enum),
107 STARPU_VALUE, &m,
sizeof(
int),
108 STARPU_VALUE, &n,
sizeof(
int),
109 STARPU_VALUE, &alpha,
sizeof(
double),
110 STARPU_R, RTBLKADDR(
AUV,
double, Am, An),
111 STARPU_VALUE, &lda,
sizeof(
int),
112 STARPU_R, RTBLKADDR(BD,
double, Bm, Bn),
113 STARPU_VALUE, &ldb,
sizeof(
int),
114 STARPU_VALUE, &beta,
sizeof(
double),
115 STARPU_RW, RTBLKADDR(CD,
double, Cm, Cn),
116 #
if !defined(HICMA_ALWAYS_FIX_RANK)
117 STARPU_R, RTBLKADDR(Ark,
double, Am, An),
119 STARPU_VALUE, &ldc,
sizeof(
int),
120 STARPU_VALUE, &Am,
sizeof(
int),
121 STARPU_VALUE, &An,
sizeof(
int),
122 STARPU_VALUE, &Bm,
sizeof(
int),
123 STARPU_VALUE, &Bn,
sizeof(
int),
124 STARPU_VALUE, &Cm,
sizeof(
int),
125 STARPU_VALUE, &Cn,
sizeof(
int),
126 STARPU_VALUE, &nAUV,
sizeof(
int),
127 STARPU_SCRATCH, options->ws_worker,
128 STARPU_VALUE, &h_work,
sizeof(MORSE_starpu_ws_t *),
129 STARPU_PRIORITY, options->priority,
130 STARPU_CALLBACK, callback,
131 #
if defined(CHAMELEON_USE_MPI)
132 STARPU_EXECUTE_ON_NODE, execution_rank,
134 #
if defined(CHAMELEON_CODELETS_HAVE_NAME)
135 STARPU_NAME,
"hcore_zgemm_bdcd",
140 #if !defined(CHAMELEON_SIMULATION) 141 static void cl_zgemmbdcd_hcore_cpu_func(
void *descr[],
void *cl_arg)
143 #ifdef HICMA_DISABLE_ALL_COMPUTATIONS 146 #ifdef HICMA_DISABLE_HCORE_COMPUTATIONS 149 struct timeval tvalBefore, tvalAfter;
150 gettimeofday (&tvalBefore, NULL);
167 AUV = (
double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
168 BD = (
double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
169 CD = (
double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
170 #if !defined(HICMA_ALWAYS_FIX_RANK) 171 Ark = (
double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
178 work = (
double *)STARPU_MATRIX_GET_PTR(descr[idescr++]);
180 int Am, An, Bm, Bn, Cm, Cn;
182 MORSE_starpu_ws_t *h_work;
183 starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &alpha, &lda, &ldb, &beta, &ldc, &Am, &An, &Bm, &Bn, &Cm, &Cn, &nAUV, &h_work);
188 size_t nelm_AU = (size_t)lda * (
size_t)nAU;
189 double *AV = &(
AUV[nelm_AU]);
191 char datebuf_start[128];
195 gettimeofday (&tvalAfter, NULL);
198 strftime(datebuf_start, 26,
"%Y-%m-%d %H:%M:%S",
tm_info);
199 printf(
"%d+GEMMBDCD\t|CD(%d,%d) AUV(%d,%d)%g BD(%d,%d) m:%d n:%d lda:%d ldb:%d ldc:%d \t\t\t\t\tGEMMBDCD: %s\n",MORSE_My_Mpi_Rank(), Cm, Cn, Am, An, Ark[0], Bm, Bn, m, n, lda, ldb, ldc, datebuf_start);
202 int isTransA = transA == MorseTrans;
203 int isTransB = transB == MorseTrans;
205 printf(
"%s %d %s: Transpose of B is not supported yet. isTransB: %d transB:%d\n", __FILE__, __LINE__, __func__, isTransB, transB);
211 alpha, (isTransA ? AV : AU), (isTransA ? AU : AV), Ark, lda,
213 beta, CD, ldc, work);
219 gettimeofday (&tvalAfter, NULL);
223 printf(
"%d-GEMMBDCD\t|CD(%d,%d) AUV(%d,%d)%g BD(%d,%d)\t\t\tGEMMBDCD: %.4f\t%s---%s\n",MORSE_My_Mpi_Rank(),Cm, Cn, Am, An, Ark[0], Bm, Bn,
224 (tvalAfter.tv_sec - tvalBefore.tv_sec)
225 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0,
235 CODELETS_CPU(zgemmbdcd_hcore, 5, cl_zgemmbdcd_hcore_cpu_func)
void HCORE_zgemmbdcd(MORSE_enum transA, MORSE_enum transB, int M, int N, double alpha, double *AU, double *AV, double *Ark, int LDA, double *BD, int LDB, double beta, double *CD, int LDC, double *work)
void _printmat(double *A, int64_t m, int64_t n, int64_t ld)
int global_always_fixed_rank
void HICMA_TASK_zgemm_bdcd(const MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, double alpha, const MORSE_desc_t *AUV, const MORSE_desc_t *Ark, int Am, int An, int lda, const MORSE_desc_t *BD, int Bm, int Bn, int ldb, double beta, const MORSE_desc_t *CD, int Cm, int Cn, int ldc)
int HICMA_get_print_index_end()
int HICMA_get_print_index()