HiCMA
Hierarchical Computations on Manycore Architectures
codelet_zuncompress.c
Go to the documentation of this file.
1 
16 #include "morse.h"
17 #include "runtime/starpu/chameleon_starpu.h"
18 /*#include "runtime/starpu/include/runtime_codelet_z.h"*/
19 
20 #include "runtime/starpu/runtime_codelets.h"
21 ZCODELETS_HEADER(uncompress_hcore)
22 
23 #include "hcore_z.h"
24 
27 extern int print_mat;
28 extern void _printmat(double * A, int64_t m, int64_t n, int64_t ld);
41 void HICMA_TASK_zuncompress(const MORSE_option_t *options,
42  MORSE_enum transA, int transB,
43  int m, int n,
44  double alpha,
45  const MORSE_desc_t *AUBV,
46  const MORSE_desc_t *Ark,
47  int Am, int An, int lda,
48  double beta,
49  const MORSE_desc_t *CD,
50  int Cm, int Cn, int ldc
51  )
52 {
53  int nAUBV = AUBV->nb;
54  struct starpu_codelet *codelet = &cl_zuncompress_hcore;
55  /*void (*callback)(void*) = options->profiling ? cl_zgemmfrk_callback : NULL;*/
56  void (*callback)(void*) = NULL;
57  int sizeA = lda*nAUBV;
58  // I converted n to k
59  int sizeC = ldc*n;
60  int execution_rank = CD->get_rankof( CD, Cm, Cn );
61  int rank_changed=0;
62  (void)execution_rank;
63 
64  /* force execution on the rank owning the largest data (tile) */
65  int threshold;
66  char* env = getenv("CHAMELEON_COMM_FACTOR_THRESHOLD");
67 
68  if (env != NULL)
69  threshold = (unsigned)atoi(env);
70  else
71  threshold = 10;
72  if ( sizeA > threshold*sizeC ){
73  execution_rank = AUBV->get_rankof( AUBV, Am, An );
74  rank_changed = 1;
75  }
76  MORSE_BEGIN_ACCESS_DECLARATION;
77  MORSE_ACCESS_R(AUBV, Am, An);
78  MORSE_ACCESS_R(Ark, Am, An);
79  MORSE_ACCESS_RW(CD, Cm, Cn);
80  if (rank_changed)
81  MORSE_RANK_CHANGED(execution_rank);
82  MORSE_END_ACCESS_DECLARATION;
83 
84  /*printf("%s %d (%d,%d) is queued to execute on rank:%d. rank_changed:%d\n", */
85  /*__func__, __LINE__, Cm, Cn, execution_rank, rank_changed );*/
86  starpu_insert_task(
87  starpu_mpi_codelet(codelet),
88  STARPU_VALUE, &transA, sizeof(MORSE_enum),
89  STARPU_VALUE, &transB, sizeof(MORSE_enum),
90  STARPU_VALUE, &m, sizeof(int),
91  STARPU_VALUE, &n, sizeof(int),
92  STARPU_VALUE, &alpha, sizeof(double),
93  STARPU_R, RTBLKADDR(AUBV, double, Am, An),
94  STARPU_R, RTBLKADDR(Ark, double, Am, An),
95  STARPU_VALUE, &lda, sizeof(int),
96  STARPU_VALUE, &beta, sizeof(double),
97  STARPU_RW, RTBLKADDR(CD, double, Cm, Cn),
98  STARPU_VALUE, &ldc, sizeof(int),
99  STARPU_VALUE, &Am, sizeof(int),
100  STARPU_VALUE, &An, sizeof(int),
101  STARPU_VALUE, &Cm, sizeof(int),
102  STARPU_VALUE, &Cn, sizeof(int),
103  STARPU_VALUE, &nAUBV, sizeof(int),
104  STARPU_PRIORITY, options->priority,
105  STARPU_CALLBACK, callback,
106 #if defined(CHAMELEON_USE_MPI)
107  STARPU_EXECUTE_ON_NODE, execution_rank,
108 #endif
109 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
110  STARPU_NAME, "hcore_zuncompress",
111 #endif
112  0);
113 }
114 
115 #if !defined(CHAMELEON_SIMULATION)
116 static void cl_zuncompress_hcore_cpu_func(void *descr[], void *cl_arg)
117 {
118  MORSE_enum transA;
119  MORSE_enum transB;
120  int m;
121  int n;
122  double alpha;
123  double *AUBV;
124  double *Ark;
125  int lda;
126  double beta;
127  double *CD;
128  int ldc;
129  int rk;
130  double acc ;
131  int nAUBV;
132 
133  AUBV = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
134  Ark = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
135 
136  CD = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
137  int Am, An, Bm, Bn, Cm, Cn;
138  starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &alpha, &lda, &beta, &ldc, &Am, &An, &Cm, &Cn, &nAUBV);
140  //printf("+GEMMFRK\t|CUV(%d,%d) AUV(%d,%d) BUV(%d,%d)\n",Cm, Cn, Am, An, Bm, Bn);
141  }
143  printf("%d-UNCOMPRESS\t|CUV(%d,%d) AUV(%d,%d)%g mn:%d %d ldac:%d %d\n",MORSE_My_Mpi_Rank(),Cm, Cn, Am, An, Ark[0], m, n, lda, ldc);
144  }
145 
146  double *AU = AUBV;
147  int nAU = nAUBV/2;
148  size_t nelm_AU = (size_t)lda * (size_t)nAU;
149  double *BV = &(AUBV[nelm_AU]);
150  HCORE_zuncompress(transA, transB,
151  m, n,
152  alpha, AU, Ark, lda,
153  BV, Ark, lda,
154  beta, CD, ldc);
155 
156 
157 }
158 
159 #endif /* !defined(CHAMELEON_SIMULATION) */
160 
161 /*
162  * Codelet definition
163  */
164 CODELETS_CPU(zuncompress_hcore, 3, cl_zuncompress_hcore_cpu_func)
165 // CODELETS(zuncompress_hcore, 3, cl_zuncompress_hcore_cpu_func, cl_zuncompress_hcore_cuda_func, STARPU_CUDA_ASYNC)
int print_mat
void _printmat(double *A, int64_t m, int64_t n, int64_t ld)
#define A(m, n)
Definition: pzgemm.c:56
void HICMA_TASK_zuncompress(const MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, double alpha, const MORSE_desc_t *AUBV, const MORSE_desc_t *Ark, int Am, int An, int lda, double beta, const MORSE_desc_t *CD, int Cm, int Cn, int ldc)
int gemmfrk_cl_print_mat
int gemmfrk_cl_print_index
void HCORE_zuncompress(MORSE_enum transA, MORSE_enum transB, int M, int N, double alpha, double *AU, double *Ark, int LDA, double *BV, double *Brk, int LDB, double beta, double *CD, int LDC)