HiCMA
Hierarchical Computations on Manycore Architectures
pztrsm.c
Go to the documentation of this file.
1 
48 #include "control/common.h"
49 #include "hicma.h"
50 #include "hicma_runtime_z.h"
51 #include <stdio.h>
52 #include <assert.h>
53 
54 #define A(m,n) A, m, n
55 #define B(m,n) B, m, n
57 /***************************************************************************/
60 void hicma_pztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum trans, MORSE_enum diag,
61  double alpha,
62  MORSE_desc_t *AUV,
63  MORSE_desc_t *AD,
64  MORSE_desc_t *Ark,
65  MORSE_desc_t *BUV,
66  MORSE_desc_t *Brk,
67  int rk,
68  int maxrk,
69  double acc,
70  MORSE_sequence_t *sequence, MORSE_request_t *request)
71 {
72  if(HICMA_get_print_index() == 1){
73  printf("%d:%s rk:%d maxrk:%d acc:%e alpha:%e\n",
74  __LINE__, __func__,
75  rk, maxrk, acc, alpha);
76  }
77  MORSE_desc_t* A = AUV;
78  MORSE_desc_t* B = BUV;
79 
80  MORSE_context_t *morse;
81  MORSE_option_t options;
82 
83  int k, m, n;
84  int ldak, ldam, ldan, ldbk, ldbm;
85  int tempkm, tempkn, tempmm, tempnn;
86 
87  double zone = (double) 1.0;
88  double mzone = (double)-1.0;
89  double minvalpha = (double)-1.0 / alpha;
90  double lalpha;
91 
92  morse = morse_context_self();
93  if (sequence->status != MORSE_SUCCESS)
94  return;
95  RUNTIME_options_init(&options, morse, sequence, request);
96  size_t ws_host = 0;
97  size_t ws_worker = 0;
98  ws_worker = //FIXME tentative size. Find exact size. I think syrk uses less memory
99  2 * AD->mb * 2 * maxrk // for copying CU and CV into temporary buffer instead of using CUV itself. There is 2*maxrk because these buffers will be used to put two U's side by side
100  + 2 * AD->mb // qrtauA qrtauB
101  + maxrk * maxrk // qrb_aubut AcolBcolT
102  + 2 * AD->mb * maxrk // newU newV
103  + (2*maxrk) * (2*maxrk) // svd_rA _rA
104  + (2*maxrk) // sigma
105  ;
106  ws_worker *= sizeof(double); //FIXME use MORSE_Complex64_t
107  //FIXME add ws_worker and ws_host calculation from compute/pzgeqrf.c when GPU/MAGMA is supported
108  RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
109  /*
110  * MorseLeft / MorseUpper / MorseNoTrans
111  */
112  if (side == MorseLeft) {
113  if (uplo == MorseUpper) {
114  assert("Not implemented yet" == 0);
115  if (trans == MorseNoTrans) {
116  for (k = 0; k < B->mt; k++) {
117  tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
118  ldak = BLKLDD(A, B->mt-1-k);
119  ldbk = BLKLDD(B, B->mt-1-k);
120  lalpha = k == 0 ? alpha : zone;
121  for (n = 0; n < B->nt; n++) {
122  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
123  MORSE_TASK_dtrsm(
124  &options,
125  side, uplo, trans, diag,
126  tempkm, tempnn, A->mb,
127  lalpha, A(B->mt-1-k, B->mt-1-k), ldak, /* lda * tempkm */
128  B(B->mt-1-k, n), ldbk); /* ldb * tempnn */
129  }
130  //RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
131  RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
132  for (m = k+1; m < B->mt; m++) {
133  ldam = BLKLDD(A, B->mt-1-m);
134  ldbm = BLKLDD(B, B->mt-1-m);
135  for (n = 0; n < B->nt; n++) {
136  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
137  MORSE_TASK_dgemm(
138  &options,
139  MorseNoTrans, MorseNoTrans,
140  B->mb, tempnn, tempkm, A->mb,
141  mzone, A(B->mt-1-m, B->mt-1-k), ldam,
142  B(B->mt-1-k, n ), ldbk,
143  lalpha, B(B->mt-1-m, n ), ldbm);
144  }
145  //RUNTIME_data_flush( sequence, A(B->mt-1-m, B->mt-1-k) );
146  RUNTIME_data_flush( sequence, A(B->mt-1-m, B->mt-1-k) );
147  }
148  for (n = 0; n < B->nt; n++) {
149  //RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
150  RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
151  }
152  }
153  }
154  /*
155  * MorseLeft / MorseUpper / Morse[Conj]Trans
156  */
157  else {
158  for (k = 0; k < B->mt; k++) {
159  tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
160  ldak = BLKLDD(A, k);
161  ldbk = BLKLDD(B, k);
162  lalpha = k == 0 ? alpha : zone;
163  for (n = 0; n < B->nt; n++) {
164  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
165  MORSE_TASK_dtrsm(
166  &options,
167  side, uplo, trans, diag,
168  tempkm, tempnn, A->mb,
169  lalpha, A(k, k), ldak,
170  B(k, n), ldbk);
171  }
172  //RUNTIME_data_flush( sequence, A(k, k) );
173  RUNTIME_data_flush( sequence, A(k, k) );
174  for (m = k+1; m < B->mt; m++) {
175  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
176  ldbm = BLKLDD(B, m);
177  for (n = 0; n < B->nt; n++) {
178  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
179  MORSE_TASK_dgemm(
180  &options,
181  trans, MorseNoTrans,
182  tempmm, tempnn, B->mb, A->mb,
183  mzone, A(k, m), ldak,
184  B(k, n), ldbk,
185  lalpha, B(m, n), ldbm);
186  }
187  //RUNTIME_data_flush( sequence, A(k, m) );
188  RUNTIME_data_flush( sequence, A(k, m) );
189  }
190  for (n = 0; n < B->nt; n++) {
191  //RUNTIME_data_flush( sequence, B(k, n) );
192  RUNTIME_data_flush( sequence, B(k, n) );
193  }
194 
195  }
196  }
197  }
198  /*
199  * MorseLeft / MorseLower / MorseNoTrans
200  */
201  else {
202  if (trans == MorseNoTrans) {
203  //@1
204  //printf("%s %d Left Lower Notrans\n", __FILE__, __LINE__);
205  for (k = 0; k < B->mt; k++) {
206  int ldbkuv = BLKLDD(BUV, k);
207  int ldakd = BLKLDD(AD, k);
208  tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
209  ldak = BLKLDD(A, k);
210  ldbk = BLKLDD(B, k);
211  lalpha = k == 0 ? alpha : zone;
212  for (n = 0; n < B->nt; n++) {
213  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
215  MORSE_TASK_dtrsm(
216  &options,
217  side, uplo, trans, diag,
218  tempkm, tempnn, A->mb,
219  lalpha, A(k, k), ldak,
220  B(k, n), ldbk);
221  else {
223  &options,
224  //MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit,
225  side, uplo, trans, diag,
226  tempkm, //FIXME must be number of rows of the diagonal block
227  lalpha, AD, k,
228  0, // I assume that only diags are stored
229  ldakd,
230  BUV, k, n,
231  ldbkuv,
232  Brk);
233  }
234  }
235  //RUNTIME_data_flush( sequence, A(k, k) );
236  RUNTIME_data_flush( sequence, A(k, k) );
237  for (m = k+1; m < B->mt; m++) {
238  int ldamuv = BLKLDD(AUV, m);
239  int ldbmuv = BLKLDD(BUV, m);
240  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
241  ldam = BLKLDD(A, m);
242  ldbm = BLKLDD(B, m);
243  for (n = 0; n < B->nt; n++) {
244  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
246  MORSE_TASK_dgemm(
247  &options,
248  MorseNoTrans, MorseNoTrans,
249  tempmm, tempnn, B->mb, A->mb,
250  mzone, A(m, k), ldam,
251  B(k, n), ldbk,
252  lalpha, B(m, n), ldbm);
253  else {
255  &options,
256  MorseNoTrans, MorseNoTrans,
257  tempmm, //TODO tempmmuv,
258  tempnn, //TODO tempmmuv,
259  mzone,
260  AUV, Ark, m, k, ldamuv,
261  BUV, Brk, k, n, ldbkuv,
262  lalpha,
263  BUV, Brk, m, n, ldbmuv,
264  rk, maxrk, acc);
265  }
266  }
267  //RUNTIME_data_flush( sequence, A(m, k) );
268  RUNTIME_data_flush( sequence, A(m, k) );
269  }
270  for (n = 0; n < B->nt; n++) {
271  //RUNTIME_data_flush( sequence, B(k, n) );
272  RUNTIME_data_flush( sequence, B(k, n) );
273  }
274  }
275  }
276  /*
277  * MorseLeft / MorseLower / Morse[Conj]Trans
278  */
279  else {
280  //@2
281  //printf("%s %d Left Lower Trans\n", __FILE__, __LINE__);
282  for (k = 0; k < B->mt; k++) {
283  int ldakuv = BLKLDD(AUV, B->mt-1-k);
284  int ldbkuv = BLKLDD(BUV, B->mt-1-k);
285  int ldakd = BLKLDD(AD, B->mt-1-k);
286  tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
287  ldak = BLKLDD(A, B->mt-1-k);
288  ldbk = BLKLDD(B, B->mt-1-k);
289  lalpha = k == 0 ? alpha : zone;
290  for (n = 0; n < B->nt; n++) {
291  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
293  MORSE_TASK_dtrsm(
294  &options,
295  side, uplo, trans, diag,
296  tempkm, tempnn, A->mb,
297  lalpha, A(B->mt-1-k, B->mt-1-k), ldak,
298  B(B->mt-1-k, n), ldbk);
299  else {
301  &options,
302  //MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit,
303  side, uplo, trans, diag,
304  tempkm, //FIXME must be number of rows of the diagonal block
305  lalpha, AD, B->mt-1-k,
306  0, // I assume that only diags are stored
307  ldakd,
308  BUV, k, n,
309  ldbkuv,
310  Brk);
311  }
312  }
313  //RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
314  RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
315  for (m = k+1; m < B->mt; m++) {
316  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
317  ldbm = BLKLDD(B, B->mt-1-m);
318  int ldbmuv = BLKLDD(BUV, B->mt-1-m);
319  for (n = 0; n < B->nt; n++) {
320  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
322  MORSE_TASK_dgemm(
323  &options,
324  trans, MorseNoTrans,
325  B->mb, tempnn, tempkm, A->mb,
326  mzone, A(B->mt-1-k, B->mt-1-m), ldak,
327  B(B->mt-1-k, n ), ldbk,
328  lalpha, B(B->mt-1-m, n ), ldbm);
329  else {
331  &options,
332  trans, MorseNoTrans,
333  B->mb, //TODO tempmmuv,
334  tempnn, //TODO tempmmuv,
335  mzone,
336  AUV, Ark, B->mt-1-k, B->mt-1-m, ldakuv,
337  BUV, Brk, B->mt-1-k, n, ldbkuv,
338  lalpha,
339  BUV, Brk, B->mt-1-m, n, ldbmuv,
340  rk, maxrk, acc);
341  }
342  }
343  //RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-m) );
344  RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-m) );
345  }
346  for (n = 0; n < B->nt; n++) {
347  //RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
348  RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
349  }
350  }
351  }
352  }
353  }
354  /*
355  * MorseRight / MorseUpper / MorseNoTrans
356  */
357  else {
358  assert("Not implemented yet" == 0);
359  if (uplo == MorseUpper) {
360  if (trans == MorseNoTrans) {
361  for (k = 0; k < B->nt; k++) {
362  tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
363  ldak = BLKLDD(A, k);
364  lalpha = k == 0 ? alpha : zone;
365  for (m = 0; m < B->mt; m++) {
366  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
367  ldbm = BLKLDD(B, m);
368  MORSE_TASK_dtrsm(
369  &options,
370  side, uplo, trans, diag,
371  tempmm, tempkn, A->mb,
372  lalpha, A(k, k), ldak, /* lda * tempkn */
373  B(m, k), ldbm); /* ldb * tempkn */
374  }
375  //RUNTIME_data_flush( sequence, A(k, k) );
376  RUNTIME_data_flush( sequence, A(k, k) );
377  for (m = 0; m < B->mt; m++) {
378  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
379  ldbm = BLKLDD(B, m);
380  for (n = k+1; n < B->nt; n++) {
381  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
382  MORSE_TASK_dgemm(
383  &options,
384  MorseNoTrans, MorseNoTrans,
385  tempmm, tempnn, B->mb, A->mb,
386  mzone, B(m, k), ldbm, /* ldb * B->mb */
387  A(k, n), ldak, /* lda * tempnn */
388  lalpha, B(m, n), ldbm); /* ldb * tempnn */
389  }
390  //RUNTIME_data_flush( sequence, B(m, k) );
391  RUNTIME_data_flush( sequence, B(m, k) );
392  }
393  for (n = k+1; n < B->nt; n++) {
394  //RUNTIME_data_flush( sequence, A(k, n) );
395  RUNTIME_data_flush( sequence, A(k, n) );
396  }
397  }
398  }
399  /*
400  * MorseRight / MorseUpper / Morse[Conj]Trans
401  */
402  else {
403  for (k = 0; k < B->nt; k++) {
404  tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
405  ldak = BLKLDD(A, B->nt-1-k);
406  for (m = 0; m < B->mt; m++) {
407  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
408  ldbm = BLKLDD(B, m);
409  MORSE_TASK_dtrsm(
410  &options,
411  side, uplo, trans, diag,
412  tempmm, tempkn, A->mb,
413  alpha, A(B->nt-1-k, B->nt-1-k), ldak, /* lda * tempkn */
414  B( m, B->nt-1-k), ldbm); /* ldb * tempkn */
415  //RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
416  RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
417 
418  for (n = k+1; n < B->nt; n++) {
419  ldan = BLKLDD(A, B->nt-1-n);
420  MORSE_TASK_dgemm(
421  &options,
422  MorseNoTrans, trans,
423  tempmm, B->nb, tempkn, A->mb,
424  minvalpha, B(m, B->nt-1-k), ldbm, /* ldb * tempkn */
425  A(B->nt-1-n, B->nt-1-k), ldan, /* A->mb * tempkn (Never last row) */
426  zone, B(m, B->nt-1-n), ldbm); /* ldb * B->nb */
427  }
428  //RUNTIME_data_flush( sequence, B(m, B->nt-1-k) );
429  RUNTIME_data_flush( sequence, B(m, B->nt-1-k) );
430  }
431  for (n = k+1; n < B->nt; n++) {
432  //RUNTIME_data_flush( sequence, A(B->nt-1-n, B->nt-1-k) );
433  RUNTIME_data_flush( sequence, A(B->nt-1-n, B->nt-1-k) );
434  }
435  }
436  }
437  }
438  /*
439  * MorseRight / MorseLower / MorseNoTrans
440  */
441  else {
442  if (trans == MorseNoTrans) {
443  for (k = 0; k < B->nt; k++) {
444  tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
445  ldak = BLKLDD(A, B->nt-1-k);
446  lalpha = k == 0 ? alpha : zone;
447  for (m = 0; m < B->mt; m++) {
448  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
449  ldbm = BLKLDD(B, m);
450  MORSE_TASK_dtrsm(
451  &options,
452  side, uplo, trans, diag,
453  tempmm, tempkn, A->mb,
454  lalpha, A(B->nt-1-k, B->nt-1-k), ldak, /* lda * tempkn */
455  B( m, B->nt-1-k), ldbm); /* ldb * tempkn */
456  //RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
457  RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
458 
459  for (n = k+1; n < B->nt; n++) {
460  MORSE_TASK_dgemm(
461  &options,
462  MorseNoTrans, MorseNoTrans,
463  tempmm, B->nb, tempkn, A->mb,
464  mzone, B(m, B->nt-1-k), ldbm, /* ldb * tempkn */
465  A(B->nt-1-k, B->nt-1-n), ldak, /* lda * B->nb */
466  lalpha, B(m, B->nt-1-n), ldbm); /* ldb * B->nb */
467  }
468  //RUNTIME_data_flush( sequence, B(m, B->nt-1-k) );
469  RUNTIME_data_flush( sequence, B(m, B->nt-1-k) );
470  }
471  for (n = k+1; n < B->nt; n++) {
472  //RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-n) );
473  RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-n) );
474  }
475  }
476  }
477  /*
478  * MorseRight / MorseLower / Morse[Conj]Trans
479  */
480  else {
481  for (k = 0; k < B->nt; k++) {
482  tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
483  ldak = BLKLDD(A, k);
484  for (m = 0; m < B->mt; m++) {
485  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
486  ldbm = BLKLDD(B, m);
487  MORSE_TASK_dtrsm(
488  &options,
489  side, uplo, trans, diag,
490  tempmm, tempkn, A->mb,
491  alpha, A(k, k), ldak, /* lda * tempkn */
492  B(m, k), ldbm); /* ldb * tempkn */
493  //RUNTIME_data_flush( sequence, A(k, k) );
494  RUNTIME_data_flush( sequence, A(k, k) );
495 
496  for (n = k+1; n < B->nt; n++) {
497  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
498  ldan = BLKLDD(A, n);
499  MORSE_TASK_dgemm(
500  &options,
501  MorseNoTrans, trans,
502  tempmm, tempnn, B->mb, A->mb,
503  minvalpha, B(m, k), ldbm, /* ldb * tempkn */
504  A(n, k), ldan, /* ldan * tempkn */
505  zone, B(m, n), ldbm); /* ldb * tempnn */
506  }
507  RUNTIME_data_flush( sequence, B(m, k) );
508  }
509  for (n = k+1; n < B->nt; n++) {
510  RUNTIME_data_flush( sequence, A(n, k) );
511  }
512 
513  }
514  }
515  }
516  }
517  RUNTIME_options_ws_free(&options);
518  RUNTIME_options_finalize(&options, morse);
519 }
520 
521 void hicma_pztrsmd(MORSE_enum side, MORSE_enum uplo, MORSE_enum trans, MORSE_enum diag,
522  double alpha,
523  MORSE_desc_t *AUV,
524  MORSE_desc_t *AD,
525  MORSE_desc_t *Ark,
526  MORSE_desc_t *Bdense,
527  int maxrk,
528  MORSE_sequence_t *sequence, MORSE_request_t *request)
529 {
530  if(HICMA_get_print_index() == 1){
531  printf("%d:%s maxrk:%d alpha:%e\n",
532  __LINE__, __func__,
533  maxrk, alpha);
534  }
535  MORSE_desc_t* A = AUV;
536  MORSE_desc_t* B = Bdense;
537 
538  MORSE_context_t *morse;
539  MORSE_option_t options;
540 
541  int k, m, n;
542  int ldak, ldam, ldan, ldbk, ldbm;
543  int tempkm, tempkn, tempmm, tempnn;
544 
545  double zone = (double) 1.0;
546  double mzone = (double)-1.0;
547  double minvalpha = (double)-1.0 / alpha;
548  double lalpha;
549 
550  morse = morse_context_self();
551  if (sequence->status != MORSE_SUCCESS)
552  return;
553  RUNTIME_options_init(&options, morse, sequence, request);
554  size_t ws_host = 0;
555  size_t ws_worker = 0;
556  ws_worker =
557  + AD->mb * maxrk // temporary space for performing AV*B in CD+=AU*(AV*B)
558  ;
559  ws_worker *= sizeof(double); //FIXME use MORSE_Complex64_t
560  //FIXME add ws_worker and ws_host calculation from compute/pzgeqrf.c when GPU/MAGMA is supported
561  RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
562  /*
563  * MorseLeft / MorseUpper / MorseNoTrans
564  */
565  if (side == MorseLeft) {
566  if (uplo == MorseUpper) {
567  assert("Not implemented yet" == 0);
568  if (trans == MorseNoTrans) {
569  for (k = 0; k < B->mt; k++) {
570  tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
571  ldak = BLKLDD(A, B->mt-1-k);
572  ldbk = BLKLDD(B, B->mt-1-k);
573  lalpha = k == 0 ? alpha : zone;
574  for (n = 0; n < B->nt; n++) {
575  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
576  MORSE_TASK_dtrsm(
577  &options,
578  side, uplo, trans, diag,
579  tempkm, tempnn, A->mb,
580  lalpha, A(B->mt-1-k, B->mt-1-k), ldak, /* lda * tempkm */
581  B(B->mt-1-k, n), ldbk); /* ldb * tempnn */
582  }
583  RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
584  for (m = k+1; m < B->mt; m++) {
585  ldam = BLKLDD(A, B->mt-1-m);
586  ldbm = BLKLDD(B, B->mt-1-m);
587  for (n = 0; n < B->nt; n++) {
588  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
589  MORSE_TASK_dgemm(
590  &options,
591  MorseNoTrans, MorseNoTrans,
592  B->mb, tempnn, tempkm, A->mb,
593  mzone, A(B->mt-1-m, B->mt-1-k), ldam,
594  B(B->mt-1-k, n ), ldbk,
595  lalpha, B(B->mt-1-m, n ), ldbm);
596  }
597  RUNTIME_data_flush( sequence, A(B->mt-1-m, B->mt-1-k) );
598  }
599  for (n = 0; n < B->nt; n++) {
600  RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
601  }
602  }
603  }
604  /*
605  * MorseLeft / MorseUpper / Morse[Conj]Trans
606  */
607  else {
608  for (k = 0; k < B->mt; k++) {
609  tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
610  ldak = BLKLDD(A, k);
611  ldbk = BLKLDD(B, k);
612  lalpha = k == 0 ? alpha : zone;
613  for (n = 0; n < B->nt; n++) {
614  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
615  MORSE_TASK_dtrsm(
616  &options,
617  side, uplo, trans, diag,
618  tempkm, tempnn, A->mb,
619  lalpha, A(k, k), ldak,
620  B(k, n), ldbk);
621  }
622  RUNTIME_data_flush( sequence, A(k, k) );
623  for (m = k+1; m < B->mt; m++) {
624  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
625  ldbm = BLKLDD(B, m);
626  for (n = 0; n < B->nt; n++) {
627  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
628  MORSE_TASK_dgemm(
629  &options,
630  trans, MorseNoTrans,
631  tempmm, tempnn, B->mb, A->mb,
632  mzone, A(k, m), ldak,
633  B(k, n), ldbk,
634  lalpha, B(m, n), ldbm);
635  }
636  RUNTIME_data_flush( sequence, A(k, m) );
637  }
638  for (n = 0; n < B->nt; n++) {
639  RUNTIME_data_flush( sequence, B(k, n) );
640  }
641 
642  }
643  }
644  }
645  /*
646  * MorseLeft / MorseLower / MorseNoTrans
647  */
648  else {
649  if (trans == MorseNoTrans) {
650  //@1
651  //printf("%s %d Left Lower Notrans\n", __FILE__, __LINE__);
652  for (k = 0; k < B->mt; k++) {
653  int ldbkd = BLKLDD(Bdense, k);
654  int ldakd = BLKLDD(AD, k);
655  tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
656  ldak = BLKLDD(A, k);
657  ldbk = BLKLDD(B, k);
658  lalpha = k == 0 ? alpha : zone;
659  for (n = 0; n < B->nt; n++) {
660  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
661  MORSE_TASK_dtrsm(
662  &options,
663  side, uplo, trans, diag,
664  tempkm,
665  tempnn,
666  A->mb,
667  lalpha, AD, k, 0, ldakd,
668  Bdense, k, n, ldbkd);
669  }
670  RUNTIME_data_flush( sequence, A(k, k) );
671  for (m = k+1; m < B->mt; m++) {
672  int ldamuv = BLKLDD(AUV, m);
673  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
674  ldam = BLKLDD(A, m);
675  int ldbmd = BLKLDD(Bdense, m);
676  for (n = 0; n < B->nt; n++) {
677  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
679  MORSE_TASK_dgemm(
680  &options,
681  MorseNoTrans, MorseNoTrans,
682  tempmm, tempnn, B->mb, A->mb,
683  mzone, A(m, k), ldam,
684  B(k, n), ldbk,
685  lalpha, B(m, n), ldbm);
686  else {
687  /*printf("(%d,%d,%d): (%d,%d [%d])=(%d,%d [%d])x(%d,%d [%d]) trans:%d tempmm:%d tempnn:%d alpha:%g\n", k, m, n, */
688  /*m, n, ldbmd,*/
689  /*m, k, ldamuv,*/
690  /*k, n, ldbkd,*/
691  /*trans,*/
692  /*tempmm, tempnn, lalpha*/
693  /*);*/
695  &options,
696  MorseNoTrans, MorseNoTrans,
697  tempmm,
698  tempnn,
699  mzone,
700  AUV, Ark, m, k, ldamuv,
701  Bdense, k, n, ldbkd,
702  lalpha,
703  Bdense, m, n, ldbmd);
704  }
705  }
706  RUNTIME_data_flush( sequence, A(m, 0) );
707  }
708  for (n = 0; n < B->nt; n++) {
709  RUNTIME_data_flush( sequence, B(k, n) );
710  }
711  }
712  }
713  /*
714  * MorseLeft / MorseLower / Morse[Conj]Trans
715  */
716  else {
717  //@2
718  printf("%s %d Left Lower Trans B->m,n:%d,%d B->mt,nt:%d,%d\n", __FILE__, __LINE__, B->m, B->n, B->mt, B->nt);
719  for (k = 0; k < B->mt; k++) {
720  int ldakuv = BLKLDD(AUV, B->mt-1-k);
721  int ldakd = BLKLDD(AD, B->mt-1-k);
722  int ldbkd = BLKLDD(Bdense, B->mt-1-k);
723  tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
724  ldak = BLKLDD(AD, B->mt-1-k);
725  ldbk = BLKLDD(B, B->mt-1-k);
726  lalpha = k == 0 ? alpha : zone;
727  for (n = 0; n < B->nt; n++) {
728  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
729  /*printf("chamtrsm: (%d,%d) A(%d,%d [%d]) B(%d,%d [%d])\n",*/
730  /*k, n,*/
731  /*B->mt-1-k, 0, ldak,*/
732  /*B->mt-1-k, n, ldbk*/
733  /*);*/
734  if(1)MORSE_TASK_dtrsm(
735  &options,
736  side, uplo,
737  trans,
738  diag,
739  tempkm, tempnn, A->mb,
740  lalpha, AD, B->mt-1-k, 0, ldak,
741  Bdense, B->mt-1-k, n, ldbk);
742  }
743  RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
744  for (m = k+1; m < B->mt; m++) {
745  tempmm = 0;
746  if (B->mt-1-k == B->mt-1) {
747  if (AD->m % AUV->mb == 0) {
748  tempmm = AUV->mb;
749  } else {
750  tempmm = AD->m % AUV->mb;
751  }
752  } else {
753  tempmm = AUV->mb;
754  }
755  ldbm = BLKLDD(B, B->mt-1-m);
756  for (n = 0; n < B->nt; n++) {
757  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
759  MORSE_TASK_dgemm(
760  &options,
761  trans, MorseNoTrans,
762  B->mb, tempnn, tempkm, A->mb,
763  mzone, A(B->mt-1-k, B->mt-1-m), ldak,
764  B(B->mt-1-k, n ), ldbk,
765  lalpha, B(B->mt-1-m, n ), ldbm);
766  else {
767  /*printf("(%d,%d,%d): (%d,%d [%d])=(%d,%d [%d])x(%d,%d [%d]) trans:%d tempmm:%d tempnn:%d alpha:%g\n", k, m, n, */
768  /*B->mt-1-m, n, ldbm,*/
769  /*B->mt-1-k, B->mt-1-m, ldakuv,*/
770  /*B->mt-1-k, n, ldbk,*/
771  /*trans,*/
772  /*tempmm, tempnn, lalpha);*/
774  &options,
775  trans, MorseNoTrans,
776  tempmm,
777  tempnn,
778  mzone,
779  AUV, Ark, B->mt-1-k, B->mt-1-m, ldakuv,
780  Bdense, B->mt-1-k, n, ldbk,
781  lalpha,
782  Bdense, B->mt-1-m, n, ldbm);
783  }
784  }
785  RUNTIME_data_flush( sequence, A(B->mt-1-k, 0) );
786  }
787  for (n = 0; n < B->nt; n++) {
788  RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
789  }
790  }
791  }
792  }
793  }
794  /*
795  * MorseRight / MorseUpper / MorseNoTrans
796  */
797  else {
798  assert("Not implemented yet" == 0);
799  if (uplo == MorseUpper) {
800  if (trans == MorseNoTrans) {
801  for (k = 0; k < B->nt; k++) {
802  tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
803  ldak = BLKLDD(A, k);
804  lalpha = k == 0 ? alpha : zone;
805  for (m = 0; m < B->mt; m++) {
806  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
807  ldbm = BLKLDD(B, m);
808  MORSE_TASK_dtrsm(
809  &options,
810  side, uplo, trans, diag,
811  tempmm, tempkn, A->mb,
812  lalpha, A(k, k), ldak, /* lda * tempkn */
813  B(m, k), ldbm); /* ldb * tempkn */
814  }
815  RUNTIME_data_flush( sequence, A(k, k) );
816  for (m = 0; m < B->mt; m++) {
817  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
818  ldbm = BLKLDD(B, m);
819  for (n = k+1; n < B->nt; n++) {
820  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
821  MORSE_TASK_dgemm(
822  &options,
823  MorseNoTrans, MorseNoTrans,
824  tempmm, tempnn, B->mb, A->mb,
825  mzone, B(m, k), ldbm, /* ldb * B->mb */
826  A(k, n), ldak, /* lda * tempnn */
827  lalpha, B(m, n), ldbm); /* ldb * tempnn */
828  }
829  RUNTIME_data_flush( sequence, B(m, k) );
830  }
831  for (n = k+1; n < B->nt; n++) {
832  RUNTIME_data_flush( sequence, A(k, n) );
833  }
834  }
835  }
836  /*
837  * MorseRight / MorseUpper / Morse[Conj]Trans
838  */
839  else {
840  for (k = 0; k < B->nt; k++) {
841  tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
842  ldak = BLKLDD(A, B->nt-1-k);
843  for (m = 0; m < B->mt; m++) {
844  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
845  ldbm = BLKLDD(B, m);
846  MORSE_TASK_dtrsm(
847  &options,
848  side, uplo, trans, diag,
849  tempmm, tempkn, A->mb,
850  alpha, A(B->nt-1-k, B->nt-1-k), ldak, /* lda * tempkn */
851  B( m, B->nt-1-k), ldbm); /* ldb * tempkn */
852  RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
853 
854  for (n = k+1; n < B->nt; n++) {
855  ldan = BLKLDD(A, B->nt-1-n);
856  MORSE_TASK_dgemm(
857  &options,
858  MorseNoTrans, trans,
859  tempmm, B->nb, tempkn, A->mb,
860  minvalpha, B(m, B->nt-1-k), ldbm, /* ldb * tempkn */
861  A(B->nt-1-n, B->nt-1-k), ldan, /* A->mb * tempkn (Never last row) */
862  zone, B(m, B->nt-1-n), ldbm); /* ldb * B->nb */
863  }
864  RUNTIME_data_flush( sequence, B(m, B->nt-1-k) );
865  }
866  for (n = k+1; n < B->nt; n++) {
867  RUNTIME_data_flush( sequence, A(B->nt-1-n, B->nt-1-k) );
868  }
869  }
870  }
871  }
872  /*
873  * MorseRight / MorseLower / MorseNoTrans
874  */
875  else {
876  if (trans == MorseNoTrans) {
877  for (k = 0; k < B->nt; k++) {
878  tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
879  ldak = BLKLDD(A, B->nt-1-k);
880  lalpha = k == 0 ? alpha : zone;
881  for (m = 0; m < B->mt; m++) {
882  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
883  ldbm = BLKLDD(B, m);
884  MORSE_TASK_dtrsm(
885  &options,
886  side, uplo, trans, diag,
887  tempmm, tempkn, A->mb,
888  lalpha, A(B->nt-1-k, B->nt-1-k), ldak, /* lda * tempkn */
889  B( m, B->nt-1-k), ldbm); /* ldb * tempkn */
890  RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
891 
892  for (n = k+1; n < B->nt; n++) {
893  MORSE_TASK_dgemm(
894  &options,
895  MorseNoTrans, MorseNoTrans,
896  tempmm, B->nb, tempkn, A->mb,
897  mzone, B(m, B->nt-1-k), ldbm, /* ldb * tempkn */
898  A(B->nt-1-k, B->nt-1-n), ldak, /* lda * B->nb */
899  lalpha, B(m, B->nt-1-n), ldbm); /* ldb * B->nb */
900  }
901  RUNTIME_data_flush( sequence, B(m, B->nt-1-k) );
902  }
903  for (n = k+1; n < B->nt; n++) {
904  RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-n) );
905  }
906  }
907  }
908  /*
909  * MorseRight / MorseLower / Morse[Conj]Trans
910  */
911  else {
912  for (k = 0; k < B->nt; k++) {
913  tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
914  ldak = BLKLDD(A, k);
915  for (m = 0; m < B->mt; m++) {
916  tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
917  ldbm = BLKLDD(B, m);
918  MORSE_TASK_dtrsm(
919  &options,
920  side, uplo, trans, diag,
921  tempmm, tempkn, A->mb,
922  alpha, A(k, k), ldak, /* lda * tempkn */
923  B(m, k), ldbm); /* ldb * tempkn */
924  RUNTIME_data_flush( sequence, A(k, k) );
925 
926  for (n = k+1; n < B->nt; n++) {
927  tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
928  ldan = BLKLDD(A, n);
929  MORSE_TASK_dgemm(
930  &options,
931  MorseNoTrans, trans,
932  tempmm, tempnn, B->mb, A->mb,
933  minvalpha, B(m, k), ldbm, /* ldb * tempkn */
934  A(n, k), ldan, /* ldan * tempkn */
935  zone, B(m, n), ldbm); /* ldb * tempnn */
936  }
937  RUNTIME_data_flush( sequence, B(m, k) );
938  }
939  for (n = k+1; n < B->nt; n++) {
940  RUNTIME_data_flush( sequence, A(n, k) );
941  }
942 
943  }
944  }
945  }
946  }
947  RUNTIME_options_ws_free(&options);
948  RUNTIME_options_finalize(&options, morse);
949 }
950 
#define AUV(m, n)
Definition: pzgemm.c:60
#define B(m, n)
Definition: pztrsm.c:55
void hicma_pztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum trans, MORSE_enum diag, double alpha, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, MORSE_desc_t *BUV, MORSE_desc_t *Brk, int rk, int maxrk, double acc, MORSE_sequence_t *sequence, MORSE_request_t *request)
Definition: pztrsm.c:60
#define A(m, n)
Definition: pztrsm.c:54
#define BUV(m, n)
Definition: pzgemm.c:61
int side[2]
int pztrsm_enable_dense
Definition: pztrsm.c:56
int diag[2]
void HICMA_TASK_zgemm_bdcd(const MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, double alpha, const MORSE_desc_t *AUV, const MORSE_desc_t *Ark, int Am, int An, int lda, const MORSE_desc_t *BD, int Bm, int Bn, int ldb, double beta, const MORSE_desc_t *CD, int Cm, int Cn, int ldc)
int trans[3]
void HICMA_TASK_zgemm(const MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, double alpha, const MORSE_desc_t *AUV, const MORSE_desc_t *Ark, int Am, int An, int lda, const MORSE_desc_t *BUV, const MORSE_desc_t *Brk, int Bm, int Bn, int ldb, double beta, const MORSE_desc_t *CUV, const MORSE_desc_t *Crk, int Cm, int Cn, int ldc, int rk, int maxrk, double acc)
Definition: codelet_zgemm.c:45
void hicma_pztrsmd(MORSE_enum side, MORSE_enum uplo, MORSE_enum trans, MORSE_enum diag, double alpha, MORSE_desc_t *AUV, MORSE_desc_t *AD, MORSE_desc_t *Ark, MORSE_desc_t *Bdense, int maxrk, MORSE_sequence_t *sequence, MORSE_request_t *request)
Definition: pztrsm.c:521
int uplo[2]
int HICMA_get_print_index()
Definition: hicma_init.c:50
void HICMA_TASK_ztrsm(const MORSE_option_t *options, MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int m, double alpha, const MORSE_desc_t *A, int Am, int An, int lda, const MORSE_desc_t *BUV, int Bm, int Bn, int ldb, const MORSE_desc_t *Brk)
Definition: codelet_ztrsm.c:38