diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 98312113fb288cd692b363919bc1410210bcdea3..2f66d1abb861d6619795e35e86704b2f68c04c7b 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -32,7 +32,11 @@ #define A(m,n) A, m, n #define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif /***************************************************************************//** * Parallel tile LQ factorization - dynamic scheduling @@ -112,11 +116,13 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, A(k, k), ldak, T(k, k), T->mb); if ( k < (A->mt-1) ) { +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, A->mb, A->nb, A->nb, A(k, k), ldak, - DIAG(k), A->mb ); + DIAG(k), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index 04d389694c0ed92c898150452656d296b893b4b7..987105ba739e44d6fc54d2cdd1659c239e80b1cf 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -36,7 +36,11 @@ #define A(m,n) A, (m), (n) #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+A->nt +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(m,n) DIAG, ((n)/BS), 0 +#else +#define DIAG(m,n) A, (m), (n) +#endif /***************************************************************************//** * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling @@ -117,11 +121,13 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, A(k, N), ldak, T(k, N), T->mb); if ( k < (A->mt-1) ) { +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkm, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index 88f274636efbf2869e24fc07b52f83ed4652d583..4928d45770a24852bb0c290f26b7f0defe74ffc4 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -32,7 +32,11 @@ #define A(m,n) A, m, n #define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif /***************************************************************************//** * Parallel tile QR factorization - dynamic scheduling @@ -107,11 +111,13 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, A(k, k), ldak, T(k, k), T->mb); if ( k < (A->nt-1) ) { +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, A->mb, A->nb, A->nb, A(k, k), ldak, DIAG(k), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index bbcb6414c636851b6c4ab3ede20bcedbb53fc4cd..143b66109a03ec230169ba5086080661b6401bc8 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -34,7 +34,11 @@ #define A(m,n) A, (m), (n) #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), ((n)+A->nt) +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(m,n) DIAG, ((m)/BS), 0 +#else +#define DIAG(m,n) A, (m), (n) +#endif /***************************************************************************//** * Parallel tile QR factorization (reduction Householder) - dynamic scheduling @@ -116,11 +120,13 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, A(M, k), ldaM, T(M, k), T->mb); if ( k < (A->nt-1) ) { - MORSE_TASK_zlacpy( - &options, - MorseLower, tempMm, A->nb, A->nb, - A(M, k), ldaM, - DIAG(M, k), ldaM ); +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempMm, A->nb, A->nb, + A(M, k), ldaM, + DIAG(M, k), ldaM ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzgetrf_incpiv.c b/compute/pzgetrf_incpiv.c index 08b90104ab9d97356d68c654979a2853aa10d4c2..6931064ac80f77380b881991b37c4e247e56d991 100644 --- a/compute/pzgetrf_incpiv.c +++ b/compute/pzgetrf_incpiv.c @@ -33,7 +33,11 @@ #include "common.h" #define A(_m_,_n_) A, _m_, _n_ +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(_k_) DIAG, _k_, 0 +#else +#define DIAG(_k_) A, _k_, _k_ +#endif #define L(_m_,_n_) L, _m_, _n_ #define IPIV(_m_,_n_) &(IPIV[(int64_t)A->mb*((int64_t)(_m_)+(int64_t)A->mt*(int64_t)(_n_))]) @@ -86,11 +90,13 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, k == A->mt-1, A->nb*k); if ( k < (minMNT-1) ) { +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpperLower, tempkm, tempkn, A->nb, A(k, k), ldak, DIAG(k), ldak); +#endif } for (n = k+1; n < A->nt; n++) { diff --git a/compute/pzunglq.c b/compute/pzunglq.c index 8372ddaf02d525ae302582c09b025227c7b9e096..1d2c7ba0a7bfff03503b975038183b1ca6f3849e 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -33,7 +33,11 @@ #define A(m,n) A, m, n #define Q(m,n) Q, m, n #define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif /***************************************************************************//** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling @@ -112,11 +116,13 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, T(k, n), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), A->mb ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index 1991b531f6d2d34f5de629c5cc20247ec22d0b85..96f98f3103a3a74c7cba18c37385c18fda6f9240 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -33,7 +33,11 @@ #define Q(m,n) Q, (m), (n) #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+(A->nt) +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(m,n) DIAG, ((n)/BS), 0 +#else +#define DIAG(m,n) A, (m), (n) +#endif /** * Parallel construction of Q using tile V (application to identity; @@ -135,11 +139,13 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, T(k, n), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzungqr.c b/compute/pzungqr.c index a67a27696bc6a44b8ef0c9241cdeaab0fe1b3c90..92b429f42eb823bc0b8da32ec382228416b7943f 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -33,7 +33,11 @@ #define A(m,n) A, m, n #define Q(m,n) Q, m, n #define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif /***************************************************************************//** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling @@ -108,11 +112,13 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, T(m, k), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index c3fc863a3f8115cda1a8d48f2f5b35efc93efac2..fa78dea0375627932d1b25a77f6bf5b81231458f 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -35,7 +35,11 @@ #define Q(m,n) Q, (m), (n) #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+(A->nt) +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(m,n) DIAG, ((m)/BS), 0 +#else +#define DIAG(m,n) A, (m), (n) +#endif /** * Parallel construction of Q using tile V (application to identity; @@ -141,11 +145,13 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, T(m, k), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index 4592ab327288c40ab0cab0370b79a5cc573f55d5..244d6cd4c831aa1de9ce944806a932009ce7bf80 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -34,7 +34,11 @@ #define A(m,n) A, m, n #define B(m,n) B, m, n #define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif /***************************************************************************//** * Parallel application of Q using tile V - LQ factorization - dynamic scheduling @@ -103,11 +107,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, DIAG(k), A->mb ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -166,11 +172,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, T(k, m), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, DIAG(k), A->mb ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -215,11 +223,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, T(k, n), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), A->mb ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -248,11 +258,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, tempkn = k == B->nt -1 ? B->n -k*B->nb : B->nb; tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb; ldak = BLKLDD(A, k); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), A->mb ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index e2ad8eb63cf65b601f6235313d063142ba84ccc3..a17259921f8371a485c11ae9c5ab41a29e219c10 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -35,7 +35,11 @@ #define B(m,n) B, (m), (n) #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+A->nt +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(m,n) DIAG, ((n)/BS), 0 +#else +#define DIAG(m,n) A, (m), (n) +#endif /***************************************************************************//** * Parallel application of Q using tile V - LQ factorization (reduction @@ -106,11 +110,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, tempkmin = min(tempkm,tempNn); ldaN = BLKLDD(A, N); ldbN = BLKLDD(B, N); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -216,11 +222,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, m), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -291,11 +299,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, n), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -328,11 +338,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; tempkmin = min(tempkm,tempNn); ldaN = BLKLDD(A, N); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldaN, DIAG(k, N), ldaN ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index d7c426415a2edb3fd542de5ba4c31177504a92b3..b71fef55cc527341325b84f47ed76b05ded32016 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -34,7 +34,11 @@ #define A(m,n) A, m, n #define B(m,n) B, m, n #define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif /***************************************************************************//** * Parallel application of Q using tile V - QR factorization - dynamic scheduling @@ -103,11 +107,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -168,11 +174,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, T(m, k), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -219,11 +227,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, T(n, k), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -252,11 +262,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; ldak = BLKLDD(A, k); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index fbfb4496c4d7744164de0c36fd4c3337f7ec5385..5b1dbc3afc6085eb49312decb8056a350663197f 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -35,7 +35,11 @@ #define B(m,n) B, (m), (n) #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), ((n)+A->nt) +#if defined(CHAMELEON_COPY_DIAG) #define DIAG(m,n) DIAG, ((m)/BS), 0 +#else +#define DIAG(m,n) A, (m), (n) +#endif /***************************************************************************//** * Parallel application of Q using tile V - QR factorization (reduction @@ -105,11 +109,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, tempkmin = min(tempMm, tempkn); ldaM = BLKLDD(A, M); ldbM = BLKLDD(B, M); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -218,11 +224,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, T(m, k), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -295,11 +303,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, T(n, k), T->mb); } } +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, @@ -331,11 +341,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; tempkmin = min(tempMm, tempkn); ldaM = BLKLDD(A, M); +#if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); +#endif #if defined(CHAMELEON_USE_MAGMA) MORSE_TASK_zlaset( &options, diff --git a/control/common.h b/control/common.h index 8be1ac62403a74600dc7838e4527ad46b4a22808..193a3dd3950171c39f84ff791ea4763fc1640429 100644 --- a/control/common.h +++ b/control/common.h @@ -136,6 +136,12 @@ #define MORSE_MPI_SIZE morse->mpi_comm_size #endif +/******************************************************************************* + * Activate copy of diagonal tile (StarPU only) for some tile algorithms (pz) + **/ +#if defined(CHAMELEON_SCHED_STARPU) +#define CHAMELEON_COPY_DIAG +#endif /******************************************************************************* * IPT internal define