Commit 5ca2d21c authored by Philippe Virouleau's avatar Philippe Virouleau

Use VLA instead of malloc for scratch

parent beadac2a
......@@ -97,8 +97,10 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *TAU = options->ws_worker;
CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n );
#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt, work, TAU) depend(inout:ptrA[0]) depend(inout:ptrT[0])
CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work);
#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
{
CHAMELEON_Complex64_t TAU[options->ws_wsize];
CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n );
CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work);
}
}
......@@ -98,8 +98,10 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *TAU = options->ws_worker;
CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n);
#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt, work, TAU) depend(inout:ptrA[0]) depend(inout:ptrT[0])
CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work);
#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
{
CHAMELEON_Complex64_t TAU[options->ws_wsize];
CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n);
CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work);
}
}
......@@ -34,6 +34,6 @@ void INSERT_TASK_zgetrf(const RUNTIME_option_t *options,
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
int info = 0;
#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An])
#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0])
CORE_zgetrf( m, n, ptrA, lda, IPIV, &info );
}
......@@ -35,7 +35,9 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
CORE_zherfb(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb);
#pragma omp task firstprivate(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_zherfb(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb);
}
}
......@@ -33,9 +33,11 @@ void INSERT_TASK_zlange(const RUNTIME_option_t *options,
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
double *ptrB = RTBLKADDR(B, double, Bm, Bn);
double *work = options->ws_worker;
#pragma omp task firstprivate(M, N, ptrA, LDA, ptrB, options, work) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
CORE_zlange( norm, M, N, ptrA, LDA, work, ptrB);
#pragma omp task firstprivate(M, N, ptrA, LDA, ptrB, options) depend(in:ptrA[0]) depend(inout:ptrB[0])
{
double work[options->ws_wsize];
CORE_zlange( norm, M, N, ptrA, LDA, work, ptrB);
}
}
void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
......
......@@ -32,8 +32,10 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
const CHAM_desc_t *B, int Bm, int Bn)
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
double *work = options->ws_worker;
double *normA = RTBLKADDR(B, double, Bm, Bn);
#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, work, normA) depend(in:ptrA[0:Am*An]) depend(inout:normA[0:Bm*Bn])
CORE_zlanhe( norm, uplo, N, ptrA, LDA, work, normA);
#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0])
{
double work[options->ws_wsize];
CORE_zlanhe( norm, uplo, N, ptrA, LDA, work, normA);
}
}
......@@ -32,8 +32,10 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
const CHAM_desc_t *B, int Bm, int Bn)
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
double *work = options->ws_worker;
double *normA = RTBLKADDR(B, double, Bm, Bn);
#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, work, normA) depend(in:ptrA[0:Am*An]) depend(inout:normA[0:Bm*Bn])
CORE_zlansy( norm, uplo, N, ptrA, LDA, work, normA);
#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0])
{
double work[options->ws_wsize];
CORE_zlansy( norm, uplo, N, ptrA, LDA, work, normA);
}
}
......@@ -32,7 +32,9 @@ void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
double *ptrB = RTBLKADDR(B, double, Bm, Bn);
double *work = options->ws_worker;
#pragma omp task firstprivate(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
CORE_zlantr(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB);
#pragma omp task firstprivate(norm, uplo, diag, M, N, ptrA, LDA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
{
double work[options->ws_wsize];
CORE_zlantr(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB);
}
}
......@@ -30,8 +30,10 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt, work) depend(inout:ptrA[0], ptrB[0], ptrT[0])
CORE_ztplqt( M, N, L, ib,
ptrA, lda, ptrB, ldb, ptrT, ldt, work );
#pragma omp task firstprivate(M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0], ptrT[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_ztplqt( M, N, L, ib,
ptrA, lda, ptrB, ldb, ptrT, ldt, work );
}
}
......@@ -30,8 +30,10 @@ INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
CORE_ztpmlqt( side, trans, M, N, K, L, ib,
ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_ztpmlqt( side, trans, M, N, K, L, ib,
ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
}
}
......@@ -30,8 +30,10 @@ INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
{
CHAMELEON_Complex64_t tmp[options->ws_wsize];
CORE_ztpmqrt( side, trans, M, N, K, L, ib,
ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, tmp );
}
}
......@@ -29,8 +29,10 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0])
CORE_ztpqrt( M, N, L, ib,
ptrA, lda, ptrB, ldb, ptrT, ldt, work );
#pragma omp task firstprivate(M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0])
{
CHAMELEON_Complex64_t tmp[options->ws_wsize];
CORE_ztpqrt( M, N, L, ib,
ptrA, lda, ptrB, ldb, ptrT, ldt, tmp );
}
}
......@@ -109,8 +109,10 @@ void INSERT_TASK_ztslqt(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n);
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *tau = options->ws_worker;
CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n );
#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work) depend(inout:ptrA1[0], ptrA2[0], ptrT[0])
CORE_ztslqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work);
#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt) depend(inout:ptrA1[0], ptrA2[0], ptrT[0])
{
CHAMELEON_Complex64_t tau[options->ws_wsize];
CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n );
CORE_ztslqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work);
}
}
......@@ -140,9 +140,11 @@ void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
int ldwork = side == ChamLeft ? ib : nb;
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib,
ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib,
ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
}
}
......@@ -39,9 +39,11 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
int ldwork = side == ChamLeft ? ib : nb;
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
}
}
......@@ -140,9 +140,11 @@ void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
int ldwork = side == ChamLeft ? ib : nb;
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib,
ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib,
ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
}
}
......@@ -39,9 +39,11 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
int ldwork = side == ChamLeft ? ib : nb;
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
}
}
......@@ -98,8 +98,10 @@ void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n);
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *tau = options->ws_worker;
CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n );
#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, work, tau) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0])
CORE_ztsqrt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work);
#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0])
{
CHAMELEON_Complex64_t tau[options->ws_wsize];
CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n );
CORE_ztsqrt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work);
}
}
......@@ -104,7 +104,9 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrU = RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un);
CHAMELEON_Complex64_t *ptrL = RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, iinfo) depend(inout:ptrA[0], ptrU[0], ptrL[0])
CORE_ztstrf(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, nb, &iinfo);
#pragma omp task firstprivate(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, iinfo) depend(inout:ptrA[0], ptrU[0], ptrL[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_ztstrf(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, nb, &iinfo);
}
}
......@@ -110,8 +110,10 @@ void INSERT_TASK_zttlqt(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n);
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *tau = options->ws_worker;
CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n );
#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, work, tau) depend(inout:ptrA1[0], ptrA2[0], ptrT[0])
CORE_zttlqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work);
#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt) depend(inout:ptrA1[0], ptrA2[0], ptrT[0])
{
CHAMELEON_Complex64_t tau[options->ws_wsize];
CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n );
CORE_zttlqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work);
}
}
......@@ -132,9 +132,11 @@ void INSERT_TASK_zttmlq(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
int ldwork = side == ChamLeft ? ib : nb;
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1,
ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1,
ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
}
}
......@@ -138,9 +138,11 @@ void INSERT_TASK_zttmqr(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
CHAMELEON_Complex64_t *work = options->ws_worker;
int ldwork = side == ChamLeft ? ib : nb;
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib,
ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib,
ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
}
}
......@@ -121,8 +121,10 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
CORE_zunmlq(side, trans, m, n, k, ib,
ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb);
#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
{
CHAMELEON_Complex64_t work[options->ws_wsize];
CORE_zunmlq(side, trans, m, n, k, ib,
ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb);
}
}
......@@ -121,8 +121,10 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
CHAMELEON_Complex64_t *work = options->ws_worker;
#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
CORE_zunmqr(side, trans, m, n, k, ib,
ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb);
#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
{
CHAMELEON_Complex64_t tmp[options->ws_wsize];
CORE_zunmqr(side, trans, m, n, k, ib,
ptrA, lda, ptrT, ldt, ptrC, ldc, tmp, nb);
}
}
......@@ -46,8 +46,10 @@ void RUNTIME_options_finalize( RUNTIME_option_t *option, CHAM_context_t *chamctx
int RUNTIME_options_ws_alloc( RUNTIME_option_t *options, size_t worker_size, size_t host_size )
{
if (worker_size > 0) {
// TODO used for scratch, maybe we can do better than malloc
options->ws_worker = malloc(worker_size* sizeof(char));
// NOTE: we set the size, but instead of doing a malloc shared by multiple workers,
// we just create a VLA in the relevant codelets, within the task's body.
// This way we ensure the "scratch" is thread local and not shared by multiple threads.
options->ws_worker = NULL;
options->ws_wsize = worker_size;
}
// FIXME: handle ws_host if needed for omp target
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment