HiCMA
Hierarchical Computations on Manycore Architectures
hcore_zgytlr.c
Go to the documentation of this file.
1 
16 //#include "hcore/include/hcore.h"
17 #include "morse.h"
18 #include "hcore_z.h"
19 #include <assert.h>
20 #include <stdio.h>
21 #include <sys/time.h>//FIXME for gettimeofday
22 #include <stdlib.h>//FIXME for malloc
23 
24 #define COMPLEX
25 #undef REAL
26 // HCORE_zgytlr - Generate a tile for random matrix.
27 
28 #include "starsh.h"
29 #include "starsh-spatial.h"
30 #include "starsh-randtlr.h"
31 #ifdef LAPACKE_UTILS
32 #include <lapacke_utils.h>
33 #endif
34 #include "coreblas/coreblas.h"
35 #include "coreblas/lapacke.h"
36 extern STARSH_blrf *mpiF;
37 extern int print_index;
39 extern int store_only_diagonal_tiles;
40 extern int global_check;
41 extern int print_mat;
42 extern void _printmat(double * A, int m, int n, int ld);
43 
44 //extern int global_always_fixed_rank; //FIXME this does not work, I do not know why
45 //extern int global_fixed_rank;
48 int gytlr_tile_ii = -1;
49 int gytlr_tile_jj = -1;
50 
51 void HCORE_zgytlr( int m, int n, /*dimension of squareAD*/
52  double *AU,
53  double *AV,
54  double *AD,
55  double *Ark,
56  int lda,
57  int ldu,
58  int ldv,
59  int bigM, int ii, int jj, unsigned long long int seed,
60  int maxrank, double tol, int compress_diag,
61  double *Dense
62  )
63 {
64  if(gytlr_tile_ii >= 0) {
65  ii = gytlr_tile_ii;
66  printf("%s %d: Using fixed i:%d\n", __FILE__, __LINE__, ii);
67  }
68  if(gytlr_tile_jj >= 0) {
69  jj = gytlr_tile_jj;
70  printf("%s %d: Using fixed j:%d\n", __FILE__, __LINE__, jj);
71  }
72  int64_t i, j;
73  //printf("m:%d n:%d bigM:%d m0:%d n0:%d\n", m, n, bigM, m0, n0);
74  struct timeval tvalBefore, tvalAfter; // removed comma
75  gettimeofday (&tvalBefore, NULL);
76  if(print_index){
77  fprintf(stderr, "%d+GYTLR\t|(%d,%d) m:%d n:%d lda:%d ldu:%d ldv:%d\n",MORSE_My_Mpi_Rank(), ii, jj, m, n, lda, ldu, ldv);
78  }
79 
80  int shape[2];
81  int rank = 0;
82  int oversample = 10;
83  double *work;
84  int *iwork;
85  STARSH_cluster *RC = mpiF->row_cluster, *CC = RC;
86  void *RD = RC->data, *CD = RD;
87  double *saveAD;
88  // allocate space for dense tile
89  if((ii != jj && store_only_diagonal_tiles == 1) // if tile is off diagonal and
90  // and only diagonal tiles are stored in a tall and skinny matrix
91  // store_only_diagonal_tiles is here because
92  // code can use AD to store dense tiles
93  || // OR
94  compress_diag == 1) { // diagonals are also compressed so AD may not used perhaps
95  saveAD = AD;
96  //AD = malloc(sizeof(double) * m * n);
97  AD = malloc(sizeof(double) * lda * n);
98  assert(m==lda);
99  }
100  //starsh_blrf_get_block(mpiF, ii, jj, shape, &AD);
101 
102  mpiF->problem->kernel(m, n, RC->pivot+RC->start[ii], CC->pivot+CC->start[jj],
103  RD, CD, AD, lda);
104 
105 /* {*/
106  /*if (ii != jj || compress_diag == 1) { */
107  /*if(store_only_diagonal_tiles == 1) {*/
108  /*assert(AD != saveAD);*/
109  /*free(AD);*/
110  /*}*/
111  /*}*/
112  /*return; //TODO*/
113  /*}*/
114  if(global_check == 1){
115  char chall = 'A';
116  LAPACK_dlacpy(&chall, &m, &n, AD, &lda, Dense, &lda);
117  //printf("Original problem is copied :%d,%d\n", ii,jj);
118  }
119  int mn = m;
120  int mn2 = maxrank+oversample;
121  if(mn2 > mn)
122  mn2 = mn;
123  // Get size of temporary arrays
124  size_t lwork = n, lwork_sdd = (4*mn2+7)*mn2;
125  if(lwork_sdd > lwork)
126  lwork = lwork_sdd;
127  lwork += (size_t)mn2*(2*n+m+mn2+1);
128  size_t liwork = 8*mn2;
129  // Allocate temporary arrays
130  //STARSH_MALLOC(iwork, liwork);
131  iwork = malloc(sizeof(*iwork) * liwork);
132  if(iwork == NULL) {
133  fprintf(stderr, "%s %s %d:\t Allocation failed. No memory! liwork:%d", __FILE__, __func__, __LINE__, liwork);
134  exit(-1);
135  }
136  //STARSH_MALLOC(work, lwork);
137  work = malloc(sizeof(*work) * lwork);
138  if(work == NULL) {
139  fprintf(stderr, "%s %s %d:\t Allocation failed. No memory! lwork:%d", __FILE__, __func__, __LINE__, lwork);
140  exit(-1);
141  }
142  if (ii != jj || compress_diag == 1) { // do not try to compress diagonal blocks if it is not enforced
143  //AD is m x n. AU and AV are m x maxrank and n x maxrank correspondingly
144  //starsh_kernel_drsdd(m, n, AD, AU, AV, &rank, maxrank, oversample, tol, work, lwork, iwork);
145  //starsh_dense_dlrrsdd(m, n, AD, AU, AV, &rank, maxrank, oversample, tol, work, lwork, iwork);
146  starsh_dense_dlrrsdd(m, n, AD, lda, AU, ldu, AV, ldv, &rank, maxrank, oversample, tol, work, lwork, iwork);
147 
148 
149  if(0)cblas_dgemm( //for testing purposes
150  CblasColMajor,
151  CblasNoTrans, CblasTrans,
152  m, n, rank,
153  1.0, AU, ldu,
154  AV, ldv,
155  0.0, Dense, lda);
156 
157  if(rank == -1){ //means that tile is dense.
158  rank = m;
159  fprintf(stderr, "%s %s %d: Dense off-diagonal block (%d,%d)\n", __FILE__, __func__, __LINE__, ii, jj);
160  exit(0);
161  }
162  Ark[0] = rank;
163  if(store_only_diagonal_tiles == 1) {
164  assert(AD != saveAD);
165  free(AD);
166  }
167  if(global_always_fixed_rank == 1) {
168  global_fixed_rank = rank;
169  }
170  if(print_mat){
171  printf("%d\tgytlr-UV-output\n", __LINE__);
172  _printmat(AD, m, n, lda);
173  _printmat(AU, m, rank, ldu);
174  _printmat(AV, ldv, rank, ldv);
175  }
176  } else {
177  Ark[0] = m;
178  if(print_mat){
179  printf("%d\tgytlr-DENSE-output\n", __LINE__);
180  _printmat(AD, m, m, lda);
181  }
182  }
183 
184  /*printf("m:%d n:%d Tile %d,%d rk:%d lda:%d maxrank:%d oversample:%d tol:%.2e AD:%p AU:%p AV:%p\n", */
185  /*m, n, ii, jj, rank, lda, maxrank, oversample, tol, AD, AU, AV);*/
186  /*double *tmp = AD;*/
187  /*for (i = 0; i < m; ++i) {*/
188  /*for (j=0; j<n; ++j ) {*/
189  /*printf("%+.2e ",tmp[j*lda+i]);*/
190  /*}*/
191  /*printf("\n");*/
192  /*}*/
193  /*printf("\n");*/
194 
195  free(work);
196  free(iwork);
198  gettimeofday (&tvalAfter, NULL);
199  fprintf(stderr, "%d-GYTLR\t|(%d,%d) rk:%g m:%d n:%d lda:%d ldu:%d ldv:%d\t\t\t\t\tGYTLR: %.4f\n",MORSE_My_Mpi_Rank(),ii,jj,Ark[0],m, n, lda, ldu, ldv,
200  (tvalAfter.tv_sec - tvalBefore.tv_sec)
201  +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0
202  );
203  }
204 }
205 
206 
207 
int store_only_diagonal_tiles
#define A(m, n)
Definition: pzgemm.c:56
int gytlr_tile_jj
Definition: hcore_zgytlr.c:49
int global_fixed_rank
Definition: hcore_zgytlr.c:47
void HCORE_zgytlr(int m, int n, double *AU, double *AV, double *AD, double *Ark, int lda, int ldu, int ldv, int bigM, int ii, int jj, unsigned long long int seed, int maxrank, double tol, int compress_diag, double *Dense)
Definition: hcore_zgytlr.c:51
int print_mat
int gytlr_tile_ii
Definition: hcore_zgytlr.c:48
int print_index_end
Definition: hcore_zgytlr.c:38
void _printmat(double *A, int m, int n, int ld)
STARSH_blrf * mpiF
int print_index
int global_check
int global_always_fixed_rank
Definition: hcore_zgytlr.c:46