HiCMA
Hierarchical Computations on Manycore Architectures
pzgemm.c
Go to the documentation of this file.
1 
19 /*
20  * @copyright (c) 2009-2014 The University of Tennessee and The University
21  * of Tennessee Research Foundation.
22  * All rights reserved.
23  * @copyright (c) 2012-2014 Inria. All rights reserved.
24  * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
25  *
26  */
27 
28 /*
29  *
30  * @file pzgemm.c
31  *
32  * MORSE auxiliary routines
33  * MORSE is a software package provided by Univ. of Tennessee,
34  * Univ. of California Berkeley and Univ. of Colorado Denver
35  *
36  * @version 2.5.0
37  * @comment This file has been automatically generated
38  * from Plasma 2.5.0 for MORSE 1.0.0
39  * @author Mathieu Faverge
40  * @author Emmanuel Agullo
41  * @author Cedric Castagnede
42  * @date 2018-11-08
43  * @precisions normal z -> s d c
44  *
45  **/
46 #include "hicma_common.h"
47 #include "morse.h"
48 #include "control/common.h"
49 #include "hicma_runtime_z.h"
50 #include "coreblas/lapacke.h"
51 
52 #include "control/hicma_config.h"
53 
54 // #define SYNCHRONOUS
55 
56 #define A(m, n) AUV, m, n
57 #define B(m, n) BUV, m, n
58 #define C(m, n) CUV, m, n
59 
60 #define AUV(m, n) AUV, Ark, m, n
61 #define BUV(m, n) BUV, Brk, m, n
62 #define CUV(m, n) CUV, Crk, m, n
63 
64 #include "hicma.h"
65 
69 void hicma_pzgemm(MORSE_enum transA, MORSE_enum transB,
70  double alpha, MORSE_desc_t *AUV, MORSE_desc_t *Ark,
71  // MORSE_Complex64_t alpha, MORSE_desc_t *AUV, MORSE_desc_t *Ark,
72  MORSE_desc_t *BUV, MORSE_desc_t *Brk,
73  double beta, MORSE_desc_t *CUV, MORSE_desc_t *Crk,
74  // MORSE_Complex64_t beta, MORSE_desc_t *CUV, MORSE_desc_t *Crk,
75  MORSE_sequence_t *sequence, MORSE_request_t *request,
76  int rk, int maxrk, double acc)
77 {
78  MORSE_context_t *morse;
79  MORSE_option_t options;
80 
81  int m, n, k;
82  int ldam, ldak, ldbn, ldbk, ldcm;
83  int tempmm, tempnn, tempkn, tempkm;
84  size_t ws_host = 0;
85  size_t ws_worker = 0;
86 
87 
88  double zbeta;
89  double zone = (double)1.0;
90  // MORSE_Complex64_t zbeta;
91  // MORSE_Complex64_t zone = (MORSE_Complex64_t)1.0;
92 
93  morse = morse_context_self();
94  if (sequence->status != MORSE_SUCCESS)
95  return;
96  RUNTIME_options_init(&options, morse, sequence, request);
97 
98  ws_worker = //FIXME tentative size. FInd exact size. I think syrk uses less memory
99  //Ali says: this workspace need to be fixed, not all tasks below need it nor need that much
100  2 * CUV->mb * 2 * maxrk //CUV clone
101  + 2 * CUV->mb // qrtauA qrtauB
102  + maxrk * maxrk // qrb_aubut AcolBcolT
103  + 2 * CUV->mb * maxrk // newU newV
104  + (2*maxrk) * (2*maxrk) // svd_rA _rA
105  //+ maxrk * maxrk // svd_rB _rB I assume that use_trmm=1 so I commented out
106  //+ maxrk * maxrk // svd_T _T I assume that use_trmm=1 so I commented out
107  + (2*maxrk) // sigma
108  #ifdef HCORE_GEMM_USE_ORGQR
109  + CUV->mb * 2*maxrk // newUV gemms
110  #endif
111  ;
113  double work_query;
114  int lwork = -1;
115  int info = LAPACKE_dgesvd_work( LAPACK_COL_MAJOR, 'A', 'A',
116  2*maxrk, 2*maxrk,
117  NULL, 2*maxrk,
118  NULL,
119  NULL, 2*maxrk,
120  NULL, 2*maxrk, &work_query, lwork );
121  lwork = (int)work_query;
122  ws_worker += lwork; // superb
123  }else{
124  ws_worker += (2*maxrk); // superb
125  }
126 
127  ws_worker *= sizeof(double); //FIXME use MORSE_Complex64_t
128  RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
129 
130  for (m = 0; m < CUV->mt; m++) {
131  tempmm = m == CUV->mt-1 ? CUV->m-m*CUV->mb : CUV->mb;
132  ldcm = BLKLDD(CUV, m);
133  for (n = 0; n < CUV->nt; n++) {
134  tempnn = n == CUV->nt-1 ? CUV->n-n*CUV->nb : CUV->nb;
135  /*
136  * A: MorseNoTrans / B: MorseNoTrans
137  */
138  if (transA == MorseNoTrans) {
139  ldam = BLKLDD(AUV, m);
140  if (transB == MorseNoTrans) {
141  for (k = 0; k < AUV->nt; k++) {
142  tempkn = k == AUV->nt-1 ? AUV->n-k*AUV->nb : AUV->nb;
143  ldbk = BLKLDD(BUV, k);
144  zbeta = k == 0 ? beta : zone;
146  &options,
147  transA, transB,
148  tempmm, tempnn,
149  alpha, AUV(m, k), ldam, /* lda * Z */
150  BUV(k, n), ldbk, /* ldb * Y */
151  zbeta, CUV(m, n), ldcm, /* ldc * Y */
152  rk, maxrk, acc);
153  #ifdef SYNCHRONOUS
154  RUNTIME_barrier(morse);
155  #endif
156  }
157  }
158  /*
159  * A: MorseNoTrans / B: Morse[Conj]Trans
160  */
161  else {
162  ldbn = BLKLDD(BUV, n);
163  for (k = 0; k < AUV->nt; k++) {
164  tempkn = k == AUV->nt-1 ? AUV->n-k*AUV->nb : AUV->nb;
165  zbeta = k == 0 ? beta : zone;
167  &options,
168  transA, transB,
169  tempmm, tempnn,
170  alpha, AUV(m, k), ldam, /* lda * Z */
171  BUV(n, k), ldbn, /* ldb * Z */
172  zbeta, CUV(m, n), ldcm, /* ldc * Y */
173  rk, maxrk, acc);
174  #ifdef SYNCHRONOUS
175  RUNTIME_barrier(morse);
176  #endif
177  }
178  }
179  }
180  /*
181  * A: Morse[Conj]Trans / B: MorseNoTrans
182  */
183  else {
184  if (transB == MorseNoTrans) {
185  for (k = 0; k < AUV->mt; k++) {
186  tempkm = k == AUV->mt-1 ? AUV->m-k*AUV->mb : AUV->mb;
187  ldak = BLKLDD(AUV, k);
188  ldbk = BLKLDD(BUV, k);
189  zbeta = k == 0 ? beta : zone;
191  &options,
192  transA, transB,
193  tempmm, tempnn,
194  alpha, AUV(k, m), ldak, /* lda * X */
195  BUV(k, n), ldbk, /* ldb * Y */
196  zbeta, CUV(m, n), ldcm, /* ldc * Y */
197  rk, maxrk, acc);
198  #ifdef SYNCHRONOUS
199  RUNTIME_barrier(morse);
200  #endif
201  }
202  }
203  /*
204  * A: Morse[Conj]Trans / B: Morse[Conj]Trans
205  */
206  else {
207  ldbn = BLKLDD(BUV, n);
208  for (k = 0; k < AUV->mt; k++) {
209  tempkm = k == AUV->mt-1 ? AUV->m-k*AUV->mb : AUV->mb;
210  ldak = BLKLDD(AUV, k);
211  zbeta = k == 0 ? beta : zone;
213  &options,
214  transA, transB,
215  tempmm, tempnn,
216  alpha, AUV(k, m), ldak, /* lda * X */
217  BUV(n, k), ldbn, /* ldb * Z */
218  zbeta, CUV(m, n), ldcm, /* ldc * Y */
219  rk, maxrk, acc);
220  #ifdef SYNCHRONOUS
221  RUNTIME_barrier(morse);
222  #endif
223  }
224  }
225  }
226  RUNTIME_data_flush( sequence, C(m, n) );
227  }
228  if (transA == MorseNoTrans) {
229  for (k = 0; k < AUV->nt; k++) {
230  //MORSE_TASK_dataflush( &options, A(m, k) );
231  RUNTIME_data_flush( sequence, A(m, k) );
232  }
233  } else {
234  for (k = 0; k < AUV->mt; k++) {
235  /*MORSE_TASK_dataflush( &options, A(k, m) );*/
236  RUNTIME_data_flush( sequence, A(k, m) );
237  }
238  }
239  /*for (n = 0; n < CUV->nt; n++) {*/
240  /*MORSE_TASK_dataflush( &options, C(m, n) );*/
241  /*}*/
242  }
243  RUNTIME_options_ws_free(&options);
244  RUNTIME_options_finalize(&options, morse);
245  //MORSE_TASK_dataflush_all(); removed in newer chameleon
246 }
#define AUV(m, n)
Definition: pzgemm.c:60
#define A(m, n)
Definition: pzgemm.c:56
#define BUV(m, n)
Definition: pzgemm.c:61
#define CUV(m, n)
Definition: pzgemm.c:62
void hicma_pzgemm(MORSE_enum transA, MORSE_enum transB, double alpha, MORSE_desc_t *AUV, MORSE_desc_t *Ark, MORSE_desc_t *BUV, MORSE_desc_t *Brk, double beta, MORSE_desc_t *CUV, MORSE_desc_t *Crk, MORSE_sequence_t *sequence, MORSE_request_t *request, int rk, int maxrk, double acc)
Definition: pzgemm.c:69
#define C(m, n)
Definition: pzgemm.c:58
void HICMA_TASK_zgemm(const MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, double alpha, const MORSE_desc_t *AUV, const MORSE_desc_t *Ark, int Am, int An, int lda, const MORSE_desc_t *BUV, const MORSE_desc_t *Brk, int Bm, int Bn, int ldb, double beta, const MORSE_desc_t *CUV, const MORSE_desc_t *Crk, int Cm, int Cn, int ldc, int rk, int maxrk, double acc)
Definition: codelet_zgemm.c:45
int HICMA_get_use_fast_hcore_zgemm()
Definition: hicma_init.c:26