diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c index 1d284caaab64f41ddbe2e398557a3bf6eded83ff..04786dd800d16e79cff677cbb3a5f06da8373e34 100644 --- a/runtime/openmp/codelets/codelet_zgelqt.c +++ b/runtime/openmp/codelets/codelet_zgelqt.c @@ -97,8 +97,10 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); - CHAMELEON_Complex64_t *TAU = options->ws_worker; - CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n ); -#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt, work, TAU) depend(inout:ptrA[0]) depend(inout:ptrT[0]) - CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); +#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0]) + { + CHAMELEON_Complex64_t TAU[options->ws_wsize]; + CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n ); + CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); + } } diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c index aea7735dad39f8170b5a8f6ce336bb397de33a23..7b1b8e13ec5ad1b7de9e16eb477f389d93f3b0e4 100644 --- a/runtime/openmp/codelets/codelet_zgeqrt.c +++ b/runtime/openmp/codelets/codelet_zgeqrt.c @@ -98,8 +98,10 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); - CHAMELEON_Complex64_t *TAU = options->ws_worker; - CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n); -#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt, work, TAU) depend(inout:ptrA[0]) depend(inout:ptrT[0]) - CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); +#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0]) + { + CHAMELEON_Complex64_t TAU[options->ws_wsize]; + CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n); + CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); + } } diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c index 4bb4173ac113566a90c8702cb1d08002142ec0e9..27c599ed16c998e416df0eef00813fac4a64ef0c 100644 --- a/runtime/openmp/codelets/codelet_zgetrf.c +++ b/runtime/openmp/codelets/codelet_zgetrf.c @@ -34,6 +34,6 @@ void INSERT_TASK_zgetrf(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); int info = 0; -#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0]) CORE_zgetrf( m, n, ptrA, lda, IPIV, &info ); } diff --git a/runtime/openmp/codelets/codelet_zherfb.c b/runtime/openmp/codelets/codelet_zherfb.c index 2890651e836f95db86d0035f34fffac83bc5a141..3ed5263e649f2fedfd4d17b345cee38e8f957d3e 100644 --- a/runtime/openmp/codelets/codelet_zherfb.c +++ b/runtime/openmp/codelets/codelet_zherfb.c @@ -35,7 +35,9 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) - CORE_zherfb(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); +#pragma omp task firstprivate(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_zherfb(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); + } } diff --git a/runtime/openmp/codelets/codelet_zlange.c b/runtime/openmp/codelets/codelet_zlange.c index 1358fc57d2216dc6cf44a9adb27357b23c711499..5c5c99dd99778deba5884a6fdb83a22c9192ef9b 100644 --- a/runtime/openmp/codelets/codelet_zlange.c +++ b/runtime/openmp/codelets/codelet_zlange.c @@ -33,9 +33,11 @@ void INSERT_TASK_zlange(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); double *ptrB = RTBLKADDR(B, double, Bm, Bn); - double *work = options->ws_worker; -#pragma omp task firstprivate(M, N, ptrA, LDA, ptrB, options, work) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) - CORE_zlange( norm, M, N, ptrA, LDA, work, ptrB); +#pragma omp task firstprivate(M, N, ptrA, LDA, ptrB, options) depend(in:ptrA[0]) depend(inout:ptrB[0]) + { + double work[options->ws_wsize]; + CORE_zlange( norm, M, N, ptrA, LDA, work, ptrB); + } } void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, diff --git a/runtime/openmp/codelets/codelet_zlanhe.c b/runtime/openmp/codelets/codelet_zlanhe.c index f77acaad28dffa0abecc17a0f9308d009c0fea36..be4bc57f354ac7c1d4d0b045c88026458583b608 100644 --- a/runtime/openmp/codelets/codelet_zlanhe.c +++ b/runtime/openmp/codelets/codelet_zlanhe.c @@ -32,8 +32,10 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); - double *work = options->ws_worker; double *normA = RTBLKADDR(B, double, Bm, Bn); -#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, work, normA) depend(in:ptrA[0:Am*An]) depend(inout:normA[0:Bm*Bn]) - CORE_zlanhe( norm, uplo, N, ptrA, LDA, work, normA); +#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0]) + { + double work[options->ws_wsize]; + CORE_zlanhe( norm, uplo, N, ptrA, LDA, work, normA); + } } diff --git a/runtime/openmp/codelets/codelet_zlansy.c b/runtime/openmp/codelets/codelet_zlansy.c index c3dd736faac2e267f1703ea63e29633d45dd5b02..3118f450f5f8bc2e88c6af7aefaa59553b81c9d7 100644 --- a/runtime/openmp/codelets/codelet_zlansy.c +++ b/runtime/openmp/codelets/codelet_zlansy.c @@ -32,8 +32,10 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); - double *work = options->ws_worker; double *normA = RTBLKADDR(B, double, Bm, Bn); -#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, work, normA) depend(in:ptrA[0:Am*An]) depend(inout:normA[0:Bm*Bn]) - CORE_zlansy( norm, uplo, N, ptrA, LDA, work, normA); +#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0]) + { + double work[options->ws_wsize]; + CORE_zlansy( norm, uplo, N, ptrA, LDA, work, normA); + } } diff --git a/runtime/openmp/codelets/codelet_zlantr.c b/runtime/openmp/codelets/codelet_zlantr.c index 00f1c3b7ded78b156d9370d20dff2b68fc8418bc..6b5ae11687980f9d0743245ba341ff15afea635e 100644 --- a/runtime/openmp/codelets/codelet_zlantr.c +++ b/runtime/openmp/codelets/codelet_zlantr.c @@ -32,7 +32,9 @@ void INSERT_TASK_zlantr(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); double *ptrB = RTBLKADDR(B, double, Bm, Bn); - double *work = options->ws_worker; -#pragma omp task firstprivate(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) - CORE_zlantr(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB); +#pragma omp task firstprivate(norm, uplo, diag, M, N, ptrA, LDA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + { + double work[options->ws_wsize]; + CORE_zlantr(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB); + } } diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c index 783a610a555be53315333fc9f8019f9d48436eca..b587f5d5ecfa1fcc0a951a1435169c51986fbfc6 100644 --- a/runtime/openmp/codelets/codelet_ztplqt.c +++ b/runtime/openmp/codelets/codelet_ztplqt.c @@ -30,8 +30,10 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt, work) depend(inout:ptrA[0], ptrB[0], ptrT[0]) - CORE_ztplqt( M, N, L, ib, - ptrA, lda, ptrB, ldb, ptrT, ldt, work ); +#pragma omp task firstprivate(M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0], ptrT[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_ztplqt( M, N, L, ib, + ptrA, lda, ptrB, ldb, ptrT, ldt, work ); + } } diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c index 769c66194b93c0cea3769b8671d2849fc242cd0c..3601df373d6f6b3691c2fa6b6624edad754a6a69 100644 --- a/runtime/openmp/codelets/codelet_ztpmlqt.c +++ b/runtime/openmp/codelets/codelet_ztpmlqt.c @@ -30,8 +30,10 @@ INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) - CORE_ztpmlqt( side, trans, M, N, K, L, ib, - ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); +#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_ztpmlqt( side, trans, M, N, K, L, ib, + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); + } } diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c index 526017942fd197be7d181500cf9206486fa58fed..3d5225a9ea4e01c45019bad52381622cfe2ace53 100644 --- a/runtime/openmp/codelets/codelet_ztpmqrt.c +++ b/runtime/openmp/codelets/codelet_ztpmqrt.c @@ -30,8 +30,10 @@ INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) +#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + { + CHAMELEON_Complex64_t tmp[options->ws_wsize]; CORE_ztpmqrt( side, trans, M, N, K, L, ib, - ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, tmp ); + } } diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c index 26dd08848c727fb61e3d0e5d7cf92f05378382b6..f25eb1684b2ecb05838eef5a1b63c144a8ba2c57 100644 --- a/runtime/openmp/codelets/codelet_ztpqrt.c +++ b/runtime/openmp/codelets/codelet_ztpqrt.c @@ -29,8 +29,10 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0]) - CORE_ztpqrt( M, N, L, ib, - ptrA, lda, ptrB, ldb, ptrT, ldt, work ); +#pragma omp task firstprivate(M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + { + CHAMELEON_Complex64_t tmp[options->ws_wsize]; + CORE_ztpqrt( M, N, L, ib, + ptrA, lda, ptrB, ldb, ptrT, ldt, tmp ); + } } diff --git a/runtime/openmp/codelets/codelet_ztslqt.c b/runtime/openmp/codelets/codelet_ztslqt.c index d17db69228fb365eb47b74d80577c17d22965e0d..3d9b75e9c7b57d64a3be545bdce3abed3fe29599 100644 --- a/runtime/openmp/codelets/codelet_ztslqt.c +++ b/runtime/openmp/codelets/codelet_ztslqt.c @@ -109,8 +109,10 @@ void INSERT_TASK_ztslqt(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); - CHAMELEON_Complex64_t *tau = options->ws_worker; - CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n ); -#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work) depend(inout:ptrA1[0], ptrA2[0], ptrT[0]) - CORE_ztslqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); +#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt) depend(inout:ptrA1[0], ptrA2[0], ptrT[0]) + { + CHAMELEON_Complex64_t tau[options->ws_wsize]; + CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n ); + CORE_ztslqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); + } } diff --git a/runtime/openmp/codelets/codelet_ztsmlq.c b/runtime/openmp/codelets/codelet_ztsmlq.c index 83611cf1f5795f82a6536fe1b6ce4b950337acd2..6b11db9fb8a5ba99f7f541b9ed3d0a2437774741 100644 --- a/runtime/openmp/codelets/codelet_ztsmlq.c +++ b/runtime/openmp/codelets/codelet_ztsmlq.c @@ -140,9 +140,11 @@ void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; int ldwork = side == ChamLeft ? ib : nb; -#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) - CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib, - ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib, + ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } } diff --git a/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c index 57071465d08e31024798e7fdf2e303ac1b8c7723..2ba3f99f559de7866071c7153e9ad733c3d4abaa 100644 --- a/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c +++ b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c @@ -39,9 +39,11 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; int ldwork = side == ChamLeft ? ib : nb; -#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) - CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, - ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, + ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } } diff --git a/runtime/openmp/codelets/codelet_ztsmqr.c b/runtime/openmp/codelets/codelet_ztsmqr.c index e6beea2243ab1445f7641e4897769d5d8e251d01..b75e91f3d59f133acdfe7d8fe89a0f194dffecf1 100644 --- a/runtime/openmp/codelets/codelet_ztsmqr.c +++ b/runtime/openmp/codelets/codelet_ztsmqr.c @@ -140,9 +140,11 @@ void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; int ldwork = side == ChamLeft ? ib : nb; -#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) - CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib, - ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib, + ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } } diff --git a/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c index 9a1fe799e6b38aae04256a0a569a882b23d73ff0..6ba9d43310730f9f6a77ea1fbc6cac1c2cc04cd9 100644 --- a/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c +++ b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c @@ -39,9 +39,11 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; int ldwork = side == ChamLeft ? ib : nb; -#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) - CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, - ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, + ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } } diff --git a/runtime/openmp/codelets/codelet_ztsqrt.c b/runtime/openmp/codelets/codelet_ztsqrt.c index b7561d8a965201923e33a3ece17052b630e06a84..8ee1cba55d9e8bc9b58f5ac430150490023ac68a 100644 --- a/runtime/openmp/codelets/codelet_ztsqrt.c +++ b/runtime/openmp/codelets/codelet_ztsqrt.c @@ -98,8 +98,10 @@ void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); - CHAMELEON_Complex64_t *tau = options->ws_worker; - CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n ); -#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, work, tau) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0]) - CORE_ztsqrt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); +#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0]) + { + CHAMELEON_Complex64_t tau[options->ws_wsize]; + CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n ); + CORE_ztsqrt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); + } } diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c index 2748674e5aee0fda795eb34ba3cf1a271fdb01f8..75d2920ce7d378e559ad6d2d9b42d7e079203f95 100644 --- a/runtime/openmp/codelets/codelet_ztstrf.c +++ b/runtime/openmp/codelets/codelet_ztstrf.c @@ -104,7 +104,9 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrU = RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un); CHAMELEON_Complex64_t *ptrL = RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, iinfo) depend(inout:ptrA[0], ptrU[0], ptrL[0]) - CORE_ztstrf(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, nb, &iinfo); +#pragma omp task firstprivate(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, iinfo) depend(inout:ptrA[0], ptrU[0], ptrL[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_ztstrf(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, nb, &iinfo); + } } diff --git a/runtime/openmp/codelets/codelet_zttlqt.c b/runtime/openmp/codelets/codelet_zttlqt.c index c8567b1f18afd6614dbf6b0dafaf47af55f22549..fb37bdfaa48351868f59cf54cf4e83e2eec4652b 100644 --- a/runtime/openmp/codelets/codelet_zttlqt.c +++ b/runtime/openmp/codelets/codelet_zttlqt.c @@ -110,8 +110,10 @@ void INSERT_TASK_zttlqt(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); - CHAMELEON_Complex64_t *tau = options->ws_worker; - CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n ); -#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, work, tau) depend(inout:ptrA1[0], ptrA2[0], ptrT[0]) - CORE_zttlqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); +#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt) depend(inout:ptrA1[0], ptrA2[0], ptrT[0]) + { + CHAMELEON_Complex64_t tau[options->ws_wsize]; + CHAMELEON_Complex64_t *work = tau + chameleon_max( m, n ); + CORE_zttlqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); + } } diff --git a/runtime/openmp/codelets/codelet_zttmlq.c b/runtime/openmp/codelets/codelet_zttmlq.c index e5093489c58e6a1811b388da81de89d180b72103..687131351f8c388503dda5ad8e78c6a69de2d87e 100644 --- a/runtime/openmp/codelets/codelet_zttmlq.c +++ b/runtime/openmp/codelets/codelet_zttmlq.c @@ -132,9 +132,11 @@ void INSERT_TASK_zttmlq(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; int ldwork = side == ChamLeft ? ib : nb; -#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) - CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, - ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, + ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } } diff --git a/runtime/openmp/codelets/codelet_zttmqr.c b/runtime/openmp/codelets/codelet_zttmqr.c index b28b47eb31543b4dd2aa4323043a5ec814fce3c8..7a2a10d6815f253279b2237b81ff4632e6e9535e 100644 --- a/runtime/openmp/codelets/codelet_zttmqr.c +++ b/runtime/openmp/codelets/codelet_zttmqr.c @@ -138,9 +138,11 @@ void INSERT_TASK_zttmqr(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); - CHAMELEON_Complex64_t *work = options->ws_worker; int ldwork = side == ChamLeft ? ib : nb; -#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) - CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib, - ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib, + ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } } diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c index fc16b5e13d3ab386940b562c319f28c5ca7e16dc..c9852c457d06b93d9eda19438da8ba339188d64d 100644 --- a/runtime/openmp/codelets/codelet_zunmlq.c +++ b/runtime/openmp/codelets/codelet_zunmlq.c @@ -121,8 +121,10 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) - CORE_zunmlq(side, trans, m, n, k, ib, - ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); +#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + { + CHAMELEON_Complex64_t work[options->ws_wsize]; + CORE_zunmlq(side, trans, m, n, k, ib, + ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); + } } diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c index 207469b5d2b08e157f9cd2d2346d5195465ef90a..1254dbec58bbb8fb79d5df2320fa94039244e4fe 100644 --- a/runtime/openmp/codelets/codelet_zunmqr.c +++ b/runtime/openmp/codelets/codelet_zunmqr.c @@ -121,8 +121,10 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); - CHAMELEON_Complex64_t *work = options->ws_worker; -#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) - CORE_zunmqr(side, trans, m, n, k, ib, - ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); +#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + { + CHAMELEON_Complex64_t tmp[options->ws_wsize]; + CORE_zunmqr(side, trans, m, n, k, ib, + ptrA, lda, ptrT, ldt, ptrC, ldc, tmp, nb); + } } diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c index ca81fd63c96644d4a533e6b72770ac0803d5e822..81e2cd206b6f5f8888ffe8c4115e93723a2ecbd3 100644 --- a/runtime/openmp/control/runtime_options.c +++ b/runtime/openmp/control/runtime_options.c @@ -46,8 +46,10 @@ void RUNTIME_options_finalize( RUNTIME_option_t *option, CHAM_context_t *chamctx int RUNTIME_options_ws_alloc( RUNTIME_option_t *options, size_t worker_size, size_t host_size ) { if (worker_size > 0) { - // TODO used for scratch, maybe we can do better than malloc - options->ws_worker = malloc(worker_size* sizeof(char)); + // NOTE: we set the size, but instead of doing a malloc shared by multiple workers, + // we just create a VLA in the relevant codelets, within the task's body. + // This way we ensure the "scratch" is thread local and not shared by multiple threads. + options->ws_worker = NULL; options->ws_wsize = worker_size; } // FIXME: handle ws_host if needed for omp target