HiCMA
Hierarchical Computations on Manycore Architectures
pzpotrf.c
Go to the documentation of this file.
1 
16 /*
17  * @copyright (c) 2009-2014 The University of Tennessee and The University
18  * of Tennessee Research Foundation.
19  * All rights reserved.
20  * @copyright (c) 2012-2016 Inria. All rights reserved.
21  * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
22  */
23 /*
24  *
25  * file pzpotrf.c
26  *
27  * MORSE auxiliary routines
28  * MORSE is a software package provided by Univ. of Tennessee,
29  * Univ. of California Berkeley and Univ. of Colorado Denver
30  *
31  * version 2.5.0
32  * comment This file has been automatically generated
33  * from Plasma 2.5.0 for MORSE 1.0.0
34  * author Jakub Kurzak
35  * author Hatem Ltaief
36  * author Mathieu Faverge
37  * author Emmanuel Agullo
38  * author Cedric Castagnede
39  * author Florent Pruvost
40  * date 2010-11-15
41  *
42  **/
43 #include "morse.h"
44 #include "hicma.h"
45 #include "hicma_common.h"
46 #include "control/common.h"
47 #include "hicma_runtime_z.h"
48 #include "coreblas/lapacke.h"
49 
50 #include "control/hicma_config.h"
51 #include <stdio.h>
52 
53 extern int store_only_diagonal_tiles;
54 extern int print_index;
56 extern int print_mat;
57 extern int run_org;
58 int extra_barrier = 0;
59 /***************************************************************************/
62 void hicma_pzpotrf(MORSE_enum uplo,
63  MORSE_desc_t *AUV,
64  MORSE_desc_t *AD,
65  MORSE_desc_t *Ark,
66  MORSE_sequence_t *sequence, MORSE_request_t *request,
67  int rk, int maxrk, double acc)
68 {
69  MORSE_context_t *morse;
70  MORSE_option_t options;
71 
72  int k, m, n;
73  size_t ws_host = 0;
74  size_t ws_worker = 0;
75 
76  double zone = (double) 1.0;
77  double mzone = (double)-1.0;
78 
79  morse = morse_context_self();
80  if (sequence->status != MORSE_SUCCESS)
81  return;
82  RUNTIME_options_init(&options, morse, sequence, request);
83 
84 
85 /*#ifdef CHAMELEON_USE_MAGMA*/
86  /*if (0) [> Disable the workspace as long as it is is not used (See StarPU codelet) <]*/
87  /*{*/
88  /*int nb = MORSE_IB; [> Approximate nb for simulation <]*/
89 /*#if !defined(CHAMELEON_SIMULATION)*/
90  /*nb = magma_get_zpotrf_nb(AD->nb);*/
91 /*#endif*/
92  /*ws_host = sizeof(double)*nb*nb;*/
93  /*}*/
94 /*#endif*/
95  //printf("%s %s %d maxrank=%d\n", __FILE__, __func__, __LINE__, maxrk);
96  ws_worker = //FIXME tentative size. FInd exact size. I think syrk uses less memory
97  //Ali says: this workspace need to be fixed, not all tasks below need it nor need that much
98  2 * AD->mb * 2 * maxrk // for copying CU and CV into temporary buffer instead of using CUV itself. There is 2*maxrk because these buffers will be used to put two U's side by side
99  + 2 * AD->mb // qrtauA qrtauB
100  + maxrk * maxrk // qrb_aubut AcolBcolT
101  + 2 * AD->mb * maxrk // newU newV
102  + (2*maxrk) * (2*maxrk) // svd_rA _rA
103  //+ maxrk * maxrk // svd_rB _rB I assume that use_trmm=1 so I commented out
104  //+ maxrk * maxrk // svd_T _T I assume that use_trmm=1 so I commented out
105  + (2*maxrk) // sigma
106  #ifdef HCORE_GEMM_USE_ORGQR
107  + CUV->mb * 2*maxrk // newUV gemms
108  #endif
109  ;
111  double work_query;
112  int lwork = -1;
113  int info = LAPACKE_dgesvd_work( LAPACK_COL_MAJOR, 'A', 'A',
114  2*maxrk, 2*maxrk,
115  NULL, 2*maxrk,
116  NULL,
117  NULL, 2*maxrk,
118  NULL, 2*maxrk, &work_query, lwork );
119  lwork = (int)work_query;
120  ws_worker += lwork; // superb
121  }else{
122  ws_worker += (2*maxrk); // superb
123  }
124 
125  ws_worker *= sizeof(double); //FIXME use MORSE_Complex64_t
126  //FIXME add ws_worker and ws_host calculation from compute/pzgeqrf.c when GPU/MAGMA is supported
127  RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
128 
129 
130  /*
131  * MorseLower
132  */
133  if (uplo == MorseLower) {
134  for (k = 0; k < AD->mt; k++) {
135  RUNTIME_iteration_push(morse, k);
136 
137  int tempkmd = k == AD->mt-1 ? AD->m-k*AD->mb : AD->mb;
138  int ldakd = BLKLDD(AD, k);
139 
140  //options.priority = 2*AD->mt - 2*k;
141  options.priority = 5;
143  printf("POTRF\t|tempkmd:%d k:%d ldakd:%d\n", tempkmd, k, ldakd);
144 
145  }
146  int ADicol;
147  if(store_only_diagonal_tiles == 1){
148  ADicol = 0;
149  } else {
150  ADicol = k;
151  }
153  &options,
154  MorseLower, tempkmd, AD->mb,
155  AD, k, ADicol, ldakd, 0);
156 
157  for (m = k+1; m < AD->mt; m++) {
158  int ldamuv = BLKLDD(AUV, m);
159 
160  //options.priority = 2*AD->mt - 2*k - m;
161  options.priority = 4;
163  printf("TRSM\t|m:%d k:%d ldakd:%d ldamuv:%d\n", m, k, ldakd, ldamuv);
164  }
165  /*
166  * X D^t = U V^t
167  * X = U V^t * inv(D^t)
168  * X = U (inv(D) * V )^t
169  * X = U * (trsm (lower, left, notranspose, D, V) )^t
170  * X = U * newV^t
171  */
173  &options,
174  MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit,
175  tempkmd, // number of rows of the diagonal block
176  zone, AD, k, ADicol,
177  ldakd,
178  AUV, m, k,
179  ldamuv,
180  Ark);
181  }
182  //MORSE_TASK_dataflush( &options, AV, k, k );
183  //MORSE_TASK_dataflush( &options, AUV, k, k );
184  RUNTIME_data_flush( sequence, AUV, k, k);
185 
186  for (n = k+1; n < AD->mt; n++) {
187  int tempnnd = n == AD->mt-1 ? AD->m-n*AD->mb : AD->mb;
188  int ldand = BLKLDD(AD, n);
189  int ldanuv = BLKLDD(AUV, n);
190  int ADicol;
191  if(store_only_diagonal_tiles == 1){
192  ADicol = 0;
193  } else {
194  ADicol = n;
195  }
196 
197  //options.priority = 2*AD->mt - 2*k - n;
198  options.priority = 3;
200  &options,
201  MorseLower, MorseNoTrans,
202  tempnnd, 0,
203  -1.0,
204  AUV, ldanuv,
205  Ark,
206  n, k,
207  1.0,
208  AD, ldand,
209  n, ADicol
210  );
211 
212  for (m = n+1; m < AD->mt; m++) {
213  int tempmmuv = m == AUV->mt-1 ? AUV->m - m*AUV->mb : AUV->mb;
214  int ldamuv = BLKLDD(AUV, m);
215 
216  //options.priority = 2*AD->mt - 2*k - n - m;
217  options.priority = 2;
218  if(pzpotrf_print_index ){
219  printf("GEMM\t|A(%d,%d)=A(%d,%d)-A(%d,%d)*A(%d,%d) ldamuv:%d tempmmuv:%d\n",
220  m,n,m,n,m,k,n,k,ldamuv, tempmmuv);
221  }
223  &options,
224  MorseNoTrans, MorseTrans,
225  tempmmuv,
226  tempmmuv,
227  mzone, AUV, Ark, m, k, ldamuv,
228  AUV, Ark, n, k, ldamuv,
229  zone, AUV, Ark, m, n, ldamuv,
230  rk, maxrk, acc);
231  }
232  //MORSE_TASK_dataflush( &options, AUV, n, k );
233  RUNTIME_data_flush( sequence, AUV, n, k);
234  }
235  RUNTIME_iteration_pop(morse);
236 
237  if(extra_barrier){
238 // RUNTIME_barrier(morse);
239  }
240  }
241  }
242  /*
243  * MorseUpper
244  */
245 
246 
247  RUNTIME_options_ws_free(&options);
248  RUNTIME_options_finalize(&options, morse);
249 }
int pzpotrf_print_index
Definition: pzpotrf.c:55
int print_mat
#define AUV(m, n)
Definition: pzgemm.c:60
int store_only_diagonal_tiles
#define CUV(m, n)
Definition: pzgemm.c:62
void HICMA_TASK_zpotrf(const MORSE_option_t *options, MORSE_enum uplo, int n, int nb, const MORSE_desc_t *A, int Am, int An, int lda, int iinfo)
int run_org
int print_index
void hicma_pzpotrf(MORSE_enum uplo, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, MORSE_sequence_t *sequence, MORSE_request_t *request, int rk, int maxrk, double acc)
Definition: pzpotrf.c:62
int extra_barrier
Definition: pzpotrf.c:58
void HICMA_TASK_zgemm(const MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, double alpha, const MORSE_desc_t *AUV, const MORSE_desc_t *Ark, int Am, int An, int lda, const MORSE_desc_t *BUV, const MORSE_desc_t *Brk, int Bm, int Bn, int ldb, double beta, const MORSE_desc_t *CUV, const MORSE_desc_t *Crk, int Cm, int Cn, int ldc, int rk, int maxrk, double acc)
Definition: codelet_zgemm.c:45
int uplo[2]
void HICMA_TASK_zsyrk(const MORSE_option_t *options, MORSE_enum uplo, MORSE_enum trans, int n, int nb, double alpha, const MORSE_desc_t *AUV, int ldauv, const MORSE_desc_t *Ark, int Am, int An, double beta, const MORSE_desc_t *CD, int ldcd, int Cm, int Cn)
Definition: codelet_zsyrk.c:43
int HICMA_get_use_fast_hcore_zgemm()
Definition: hicma_init.c:26
void HICMA_TASK_ztrsm(const MORSE_option_t *options, MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int m, double alpha, const MORSE_desc_t *A, int Am, int An, int lda, const MORSE_desc_t *BUV, int Bm, int Bn, int ldb, const MORSE_desc_t *Brk)
Definition: codelet_ztrsm.c:38