29 #include "starsh-spatial.h" 30 #include "starsh-randtlr.h" 32 #include <lapacke_utils.h> 34 #include "coreblas/coreblas.h" 35 #include "coreblas/lapacke.h" 36 extern STARSH_blrf *
mpiF;
42 extern void _printmat(
double *
A,
int m,
int n,
int ld);
59 int bigM,
int ii,
int jj,
unsigned long long int seed,
60 int maxrank,
double tol,
int compress_diag,
66 printf(
"%s %d: Using fixed i:%d\n", __FILE__, __LINE__, ii);
70 printf(
"%s %d: Using fixed j:%d\n", __FILE__, __LINE__, jj);
74 struct timeval tvalBefore, tvalAfter;
75 gettimeofday (&tvalBefore, NULL);
77 fprintf(stderr,
"%d+GYTLR\t|(%d,%d) m:%d n:%d lda:%d ldu:%d ldv:%d\n",MORSE_My_Mpi_Rank(), ii, jj, m, n, lda, ldu, ldv);
85 STARSH_cluster *RC =
mpiF->row_cluster, *CC = RC;
86 void *RD = RC->data, *CD = RD;
97 AD = malloc(
sizeof(
double) * lda * n);
102 mpiF->problem->kernel(m, n, RC->pivot+RC->start[ii], CC->pivot+CC->start[jj],
116 LAPACK_dlacpy(&chall, &m, &n, AD, &lda, Dense, &lda);
120 int mn2 = maxrank+oversample;
124 size_t lwork = n, lwork_sdd = (4*mn2+7)*mn2;
125 if(lwork_sdd > lwork)
127 lwork += (size_t)mn2*(2*n+m+mn2+1);
128 size_t liwork = 8*mn2;
131 iwork = malloc(
sizeof(*iwork) * liwork);
133 fprintf(stderr,
"%s %s %d:\t Allocation failed. No memory! liwork:%d", __FILE__, __func__, __LINE__, liwork);
137 work = malloc(
sizeof(*work) * lwork);
139 fprintf(stderr,
"%s %s %d:\t Allocation failed. No memory! lwork:%d", __FILE__, __func__, __LINE__, lwork);
142 if (ii != jj || compress_diag == 1) {
146 starsh_dense_dlrrsdd(m, n, AD, lda, AU, ldu, AV, ldv, &rank, maxrank, oversample, tol, work, lwork, iwork);
151 CblasNoTrans, CblasTrans,
159 fprintf(stderr,
"%s %s %d: Dense off-diagonal block (%d,%d)\n", __FILE__, __func__, __LINE__, ii, jj);
164 assert(AD != saveAD);
171 printf(
"%d\tgytlr-UV-output\n", __LINE__);
179 printf(
"%d\tgytlr-DENSE-output\n", __LINE__);
198 gettimeofday (&tvalAfter, NULL);
199 fprintf(stderr,
"%d-GYTLR\t|(%d,%d) rk:%g m:%d n:%d lda:%d ldu:%d ldv:%d\t\t\t\t\tGYTLR: %.4f\n",MORSE_My_Mpi_Rank(),ii,jj,Ark[0],m, n, lda, ldu, ldv,
200 (tvalAfter.tv_sec - tvalBefore.tv_sec)
201 +(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000.0
int store_only_diagonal_tiles
void HCORE_zgytlr(int m, int n, double *AU, double *AV, double *AD, double *Ark, int lda, int ldu, int ldv, int bigM, int ii, int jj, unsigned long long int seed, int maxrank, double tol, int compress_diag, double *Dense)
void _printmat(double *A, int m, int n, int ld)
int global_always_fixed_rank