17 #include "hicma_common.h" 18 #include "auxdescutil.h" 20 #include "coreblas/lapacke.h" 22 #include "runtime/starpu/chameleon_starpu.h" 27 #include "runtime/starpu/runtime_codelets.h" 28 ZCODELETS_HEADER(trsm_hcore)
34 #define CBLAS_SADDR(_val) (_val) 39 MORSE_enum
side, MORSE_enum
uplo, MORSE_enum transA, MORSE_enum
diag,
41 double alpha,
const MORSE_desc_t *
A,
int Am,
int An,
int lda,
42 const MORSE_desc_t *
BUV,
int Bm,
int Bn,
int ldb,
const MORSE_desc_t *Brk)
45 struct starpu_codelet *codelet = &cl_ztrsm_hcore;
47 void (*callback)(
void*) = NULL;
50 int execution_rank =
BUV->get_rankof(
BUV, Bm, Bn );
56 char* env = getenv(
"MORSE_COMM_FACTOR_THRESHOLD");
58 threshold = (unsigned)atoi(env);
61 if ( sizeA > threshold*sizeB ){
62 execution_rank =
A->get_rankof(
A, Am, An );
65 MORSE_BEGIN_ACCESS_DECLARATION;
66 MORSE_ACCESS_R(
A, Am, An);
67 MORSE_ACCESS_RW(
BUV, Bm, Bn);
68 #if !defined(HICMA_ALWAYS_FIX_RANK) 69 MORSE_ACCESS_R(Brk, Bm, Bn);
72 MORSE_RANK_CHANGED(execution_rank);
73 MORSE_END_ACCESS_DECLARATION;
76 starpu_mpi_codelet(codelet),
77 STARPU_VALUE, &
side,
sizeof(MORSE_enum),
78 STARPU_VALUE, &
uplo,
sizeof(MORSE_enum),
79 STARPU_VALUE, &transA,
sizeof(MORSE_enum),
80 STARPU_VALUE, &
diag,
sizeof(MORSE_enum),
81 STARPU_VALUE, &m,
sizeof(
int),
82 STARPU_VALUE, &alpha,
sizeof(
double),
83 STARPU_R, RTBLKADDR(
A,
double, Am, An),
84 STARPU_VALUE, &lda,
sizeof(
int),
85 STARPU_RW, RTBLKADDR(
BUV,
double, Bm, Bn),
86 STARPU_VALUE, &ldb,
sizeof(
int),
87 #
if !defined(HICMA_ALWAYS_FIX_RANK)
88 STARPU_R, RTBLKADDR(Brk,
double, Bm, Bn),
90 STARPU_VALUE, &Am,
sizeof(
int),
91 STARPU_VALUE, &An,
sizeof(
int),
92 STARPU_VALUE, &Bm,
sizeof(
int),
93 STARPU_VALUE, &Bn,
sizeof(
int),
94 STARPU_VALUE, &nBUV,
sizeof(
int),
95 STARPU_PRIORITY, options->priority,
96 STARPU_CALLBACK, callback,
97 #
if defined(CHAMELEON_USE_MPI)
98 STARPU_EXECUTE_ON_NODE, execution_rank,
100 #
if defined(CHAMELEON_CODELETS_HAVE_NAME)
101 STARPU_NAME,
"hcore_ztrsm",
107 #if !defined(CHAMELEON_SIMULATION) 108 static void cl_ztrsm_hcore_cpu_func(
void *descr[],
void *cl_arg)
110 #ifdef HICMA_DISABLE_ALL_COMPUTATIONS 113 #ifdef HICMA_DISABLE_HCORE_COMPUTATIONS 116 struct timeval tvalBefore, tvalAfter;
117 gettimeofday (&tvalBefore, NULL);
135 A = (
double *)STARPU_MATRIX_GET_PTR(descr[0]);
136 BUV = (
double *)STARPU_MATRIX_GET_PTR(descr[1]);
137 #if !defined(HICMA_ALWAYS_FIX_RANK) 138 Brk = (
double *)STARPU_MATRIX_GET_PTR(descr[2]);
140 fprintf(stderr,
"global_always_fixed_rank is one. But HICMA_ALWAYS_FIX_RANK is not defined. Exiting...\n");
156 starpu_codelet_unpack_args(cl_arg, &
side, &
uplo, &transA, &
diag, &m, &alpha, &lda, &ldb, &Am, &An, &Bm, &Bn, &nBUV);
159 size_t nelm_BU = (size_t)ldb * (
size_t)nBU;
160 double *
B = &(
BUV[nelm_BU]);
168 printf(
"%d+TRSM\t|AD(%d,%d) BV(%d,%d)%d m:%d lda(11):%d ldb(12):%d\n",MORSE_My_Mpi_Rank(),Am,An, Bm, Bn, _Brk, m, lda, ldb);
171 printf(
"%d\ttrsm-input A\n", __LINE__);
173 printf(
"%d\ttrsm-input B\n", __LINE__);
179 (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)
diag,
185 gettimeofday (&tvalAfter, NULL);
186 printf(
"%d-TRSM\t|AD(%d,%d)%dx%d-%d BV(%d,%d)%dx%d-%d m:%d\t\t\t\tTRSM: %.4f\n",MORSE_My_Mpi_Rank(),Am,An, m, m, lda,Bm, Bn, m, _Brk, ldb, m,
187 (tvalAfter.tv_sec - tvalBefore.tv_sec)
188 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0
192 printf(
"%d\ttrsm-output\n", __LINE__);
201 #if defined(HICMA_ALWAYS_FIX_RANK) 202 CODELETS_CPU(ztrsm_hcore, 2, cl_ztrsm_hcore_cpu_func)
205 CODELETS_CPU(ztrsm_hcore, 3, cl_ztrsm_hcore_cpu_func)
#define CBLAS_SADDR(_val)
int HICMA_get_print_mat()
void _printmat(double *A, int m, int n, int ld)
int HICMA_get_print_index_end()
int HICMA_get_fixed_rank()
int HICMA_get_print_index()
void HICMA_TASK_ztrsm(const MORSE_option_t *options, MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int m, double alpha, const MORSE_desc_t *A, int Am, int An, int lda, const MORSE_desc_t *BUV, int Bm, int Bn, int ldb, const MORSE_desc_t *Brk)
int HICMA_get_always_fixed_rank()