diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 7d59288a91657a6923f6612e09596cc8fd06a909..416f043e8b2a169138a5d801823e5bae4620b40f 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -83,7 +83,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif #if defined(CHAMELEON_USE_MAGMA) @@ -92,15 +92,15 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ - ws_worker = max( ws_worker, ib * (ib + A->nb) ); + ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); /* Host space * * zgelqt = ib * A->nb + 3 * ib * ib + A->nb * ztslqt = 3 * ib * A->nb + ib * ib + A->nb */ - ws_host = max( ws_host, ib * A->nb + 3 * ib * ib + A->nb ); - ws_host = max( ws_host, 3 * ib * A->nb + ib * ib + A->nb ); + ws_host = chameleon_max( ws_host, ib * A->nb + 3 * ib * ib + A->nb ); + ws_host = chameleon_max( ws_host, 3 * ib * A->nb + ib * ib + A->nb ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -111,7 +111,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, #if defined(CHAMELEON_COPY_DIAG) /* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */ DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); + morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); #endif for (k = 0; k < minMNT; k++) { diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index c358e06c9c2845768d3a97e9f934d6fdc1e486b3..caefaf5c82e53145047e6c07c57dfedb7622c6db 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -84,7 +84,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif #if defined(CHAMELEON_USE_MAGMA) @@ -93,15 +93,15 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ - ws_worker = max( ws_worker, ib * (ib + A->nb) ); + ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); /* Host space * * zgelqt = ib * A->nb + 3 * ib * ib + A->nb * ztslqt = 3 * ib * A->nb + ib * ib + A->nb */ - ws_host = max( ws_host, ib * A->nb + 3 * ib * ib + A->nb ); - ws_host = max( ws_host, 3 * ib * A->nb + ib * ib + A->nb ); + ws_host = chameleon_max( ws_host, ib * A->nb + 3 * ib * ib + A->nb ); + ws_host = chameleon_max( ws_host, 3 * ib * A->nb + ib * ib + A->nb ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -118,12 +118,12 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, } #endif - for (k = 0; k < min(A->mt, A->nt); k++) { + for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); for (N = k; N < A->nt; N += BS) { tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; - tempkmin = min(tempkm, tempNn); + tempkmin = chameleon_min(tempkm, tempNn); MORSE_TASK_zgelqt( &options, tempkm, tempNn, ib, T->nb, @@ -156,7 +156,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, T(k, N), T->mb, A(m, N), ldam); } - for (n = N+1; n < min(N+BS, A->nt); n++) { + for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; MORSE_TASK_ztslqt( &options, diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index ad141a97b1adb407457161e82c0889ee7e4759fd..ca0dfb2e9e3573b4e52cb573eff41710b1cdf7db 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -54,7 +54,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, int ldak, ldam; int tempkm, tempkn, tempnn, tempmm; int ib; - int minMNT = min(A->mt, A->nt); + int minMNT = chameleon_min(A->mt, A->nt); morse = morse_context_self(); if (sequence->status != MORSE_SUCCESS) @@ -78,7 +78,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif #if defined(CHAMELEON_USE_MAGMA) @@ -87,15 +87,15 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ - ws_worker = max( ws_worker, ib * (ib + A->nb) ); + ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); /* Host space * * zgeqrt = ib * (A->mb+3*ib) + A->mb ) * ztsqrt = 2 * ib * (A->nb+ib) + A->nb */ - ws_host = max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); + ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); + ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -106,7 +106,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, #if defined(CHAMELEON_COPY_DIAG) /* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */ DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); + morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); #endif for (k = 0; k < minMNT; k++) { diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index c95013f89f661bf18f3a8ac88cd5fe7beeea1c2f..fd002fc64cfba3750172019861fa05477e2c4bc1 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -82,7 +82,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif #if defined(CHAMELEON_USE_MAGMA) @@ -91,15 +91,15 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ - ws_worker = max( ws_worker, ib * (ib + A->nb) ); + ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); /* Host space * * zgeqrt = ib * (A->nb+3*ib) + A->nb ) * ztsqrt = 2 * ib * (A->nb+ib) + A->nb */ - ws_host = max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); + ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); + ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -116,12 +116,12 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, } #endif - K = min(A->mt, A->nt); + K = chameleon_min(A->mt, A->nt); for (k = 0; k < K; k++) { tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; for (M = k; M < A->mt; M += BS) { tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; - tempkmin = min(tempMm, tempkn); + tempkmin = chameleon_min(tempMm, tempkn); ldaM = BLKLDD(A, M); MORSE_TASK_zgeqrt( &options, @@ -154,7 +154,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, T(M, k), T->mb, A(M, n), ldaM); } - for (m = M+1; m < min(M+BS, A->mt); m++) { + for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); MORSE_TASK_ztsqrt( diff --git a/compute/pzgetrf_incpiv.c b/compute/pzgetrf_incpiv.c index b9c47e84fddcca51b996d31e214d567834da05ba..43bd1628703ee9a3681057838036cf44f4558452 100644 --- a/compute/pzgetrf_incpiv.c +++ b/compute/pzgetrf_incpiv.c @@ -56,7 +56,7 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, int ldak, ldam; int tempkm, tempkn, tempmm, tempnn; int ib; - int minMNT = min(A->mt, A->nt); + int minMNT = chameleon_min(A->mt, A->nt); morse = morse_context_self(); if (sequence->status != MORSE_SUCCESS) @@ -75,7 +75,7 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, /* necessary to avoid dependencies between tasks regarding the diag tile */ DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); + morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); for (k = 0; k < minMNT; k++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; diff --git a/compute/pzgetrf_nopiv.c b/compute/pzgetrf_nopiv.c index 76525b0d88ab0f107c6ddece6efc8909e1a1f6e4..5eea46d894f76cac504ee4232fd2168f2872ada4 100644 --- a/compute/pzgetrf_nopiv.c +++ b/compute/pzgetrf_nopiv.c @@ -53,7 +53,7 @@ void morse_pzgetrf_nopiv(MORSE_desc_t *A, ib = MORSE_IB; - for (k = 0; k < min(A->mt, A->nt); k++) { + for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; ldak = BLKLDD(A, k); diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c index 88e673c59e76c7f962e142c644cf26ccfe1d05e4..e1bf3dc84c3f53915aba49308378181d9056ef3f 100644 --- a/compute/pzhetrd_he2hb.c +++ b/compute/pzhetrd_he2hb.c @@ -82,7 +82,7 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, * ztsmqr = 2 * A->nb * ib * zherfb = A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif #if defined(CHAMELEON_USE_MAGMA) @@ -91,15 +91,15 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ - ws_worker = max( ws_worker, ib * (ib + A->nb) ); + ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); /* Host space * * zgeqrt = ib * (A->mb+3*ib) + A->mb ) * ztsqrt = 2 * ib * (A->nb+ib) + A->nb */ - ws_host = max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); + ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); + ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -110,17 +110,17 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, #if defined(CHAMELEON_COPY_DIAG) /* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */ E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*E, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); + morse_zdesc_alloc_diag(*E, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); #endif /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*D, A->mb, A->nb, min(A->m, A->n) - A->mb, A->nb, 0, 0, min(A->m, A->n) - A->mb, A->nb, A->p, A->q); + morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q); AT = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); *AT = morse_desc_init( MorseComplexDouble, A->mb, A->nb, (A->mb*A->nb), - min(A->mt, A->nt) * A->mb, A->nb, 0, 0, min(A->mt, A->nt) * A->mb, A->nb, 1, 1); + chameleon_min(A->mt, A->nt) * A->mb, A->nb, 0, 0, chameleon_min(A->mt, A->nt) * A->mb, A->nb, 1, 1); morse_desc_mat_alloc( AT ); /* Let's extract the diagonal in a temporary copy that contains A and A' */ diff --git a/compute/pzlacpy.c b/compute/pzlacpy.c index 50e5ba5745a8e788a5932183ea26174ac5a718ba..1f88ed10aa4b60d0e13f8421e6a369d2ce8fdda8 100644 --- a/compute/pzlacpy.c +++ b/compute/pzlacpy.c @@ -98,7 +98,7 @@ void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, A(m, m), ldam, B(m, m), ldbm); } - for (n = 0; n < min(m, A->nt); n++) { + for (n = 0; n < chameleon_min(m, A->nt); n++) { Y = n == A->nt-1 ? A->n-n*A->nb : A->nb; MORSE_TASK_zlacpy( &options, diff --git a/compute/pzlange.c b/compute/pzlange.c index 3f9c445cb691910c678b5b92fc7cc7145cd9aec8..ff9ccd9fc0395241b006e5330280556300406242 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -70,7 +70,7 @@ void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = max( A->mt, A->p ); + workm = chameleon_max( A->mt, A->p ); workn = A->n; MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, A->nb, A->nb, workm, workn, 0, 0, workm, workn, A->p, A->q); @@ -169,7 +169,7 @@ void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, RUNTIME_options_ws_alloc( &options, A->mb, 0 ); workm = A->m; - workn = max( A->nt, A->q ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, A->mb, 1, A->mb, workm, workn, 0, 0, workm, workn, A->p, A->q); @@ -283,8 +283,8 @@ void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, */ case MorseFrobeniusNorm: - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 2, 2, workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); @@ -353,8 +353,8 @@ void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 1, 1, workm, workn, 0, 0, workm, workn, A->p, A->q); diff --git a/compute/pzlanhe.c b/compute/pzlanhe.c index 773ccc6d79a43a30289e6e3f5300f3ee69952310..8e0bb19e0b97e449ebe76e1bbe268c3d2839a1dd 100644 --- a/compute/pzlanhe.c +++ b/compute/pzlanhe.c @@ -76,7 +76,7 @@ void morse_pzlanhe(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re RUNTIME_options_ws_alloc( &options, A->mb, 0 ); workm = A->m; - workn = max( A->nt, A->q ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, A->mb, 1, A->mb, workm, workn, 0, 0, workm, workn, A->p, A->q); @@ -212,8 +212,8 @@ void morse_pzlanhe(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re * MorseFrobeniusNorm */ case MorseFrobeniusNorm: - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 2, 2, workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); @@ -352,8 +352,8 @@ void morse_pzlanhe(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 1, 1, workm, workn, 0, 0, workm, workn, A->p, A->q); diff --git a/compute/pzlansy.c b/compute/pzlansy.c index 3044c44022dfd49b3258c71b65f4290fd463d2f6..4fc9c6bdee52e7b563b778eabd452272e41b4d78 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -76,7 +76,7 @@ void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re RUNTIME_options_ws_alloc( &options, A->mb, 0 ); workm = A->m; - workn = max( A->nt, A->q ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, A->mb, 1, A->mb, workm, workn, 0, 0, workm, workn, A->p, A->q); @@ -212,8 +212,8 @@ void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re * MorseFrobeniusNorm */ case MorseFrobeniusNorm: - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 2, 2, workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); @@ -361,8 +361,8 @@ void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 1, 1, workm, workn, 0, 0, workm, workn, A->p, A->q); diff --git a/compute/pzlantr.c b/compute/pzlantr.c index 89a72def273415fb77e9bd224cdc014eb6c259fa..c9dc86e776ed8c4b0773deb3a8ed57fb45fa6eaa 100644 --- a/compute/pzlantr.c +++ b/compute/pzlantr.c @@ -51,7 +51,7 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, int m, n, minMNT; /* int part_p, part_q; */ - minMNT = min( A->mt, A->nt ); + minMNT = chameleon_min( A->mt, A->nt ); /* part_p = A->myrank / A->q; */ /* part_q = A->myrank % A->q; */ @@ -70,7 +70,7 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = max( A->mt, A->p ); + workm = chameleon_max( A->mt, A->p ); workn = A->n; MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, A->nb, A->nb, workm, workn, 0, 0, workm, workn, A->p, A->q); @@ -239,7 +239,7 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, RUNTIME_options_ws_alloc( &options, A->mb, 0 ); workm = A->m; - workn = max( A->nt, A->q ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, A->mb, 1, A->mb, workm, workn, 0, 0, workm, workn, A->p, A->q); @@ -404,8 +404,8 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, * MorseFrobeniusNorm */ case MorseFrobeniusNorm: - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 2, 2, workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); @@ -548,8 +548,8 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = max( A->mt, A->p ); - workn = max( A->nt, A->q ); + workm = chameleon_max( A->mt, A->p ); + workn = chameleon_max( A->nt, A->q ); MORSE_Desc_Create(&(VECNORMS_STEP1), NULL, MorseRealDouble, 1, 1, 1, workm, workn, 0, 0, workm, workn, A->p, A->q); diff --git a/compute/pzlascal.c b/compute/pzlascal.c index 524c113022d68ea916d744087e8d3444ff25d64a..9829782dcdab832516b73cd4e484d8ae6783b26e 100644 --- a/compute/pzlascal.c +++ b/compute/pzlascal.c @@ -27,7 +27,7 @@ void morse_pzlascal(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, int tempmm, tempnn, tempmn, tempnm; int m, n; int ldam, ldan; - int minmnt = min(A->mt, A->nt); + int minmnt = chameleon_min(A->mt, A->nt); morse = morse_context_self(); if (sequence->status != MORSE_SUCCESS) diff --git a/compute/pzlaset.c b/compute/pzlaset.c index 93ae3f462797078f78dc31a6c22855f8dc9098cf..314acf23f64c7bac143e56915f40af63849a4fb4 100644 --- a/compute/pzlaset.c +++ b/compute/pzlaset.c @@ -46,7 +46,7 @@ void morse_pzlaset(MORSE_enum uplo, int ldai, ldaj; int tempim; int tempjm, tempjn; - int minmn = min(A->mt, A->nt); + int minmn = chameleon_min(A->mt, A->nt); morse = morse_context_self(); if (sequence->status != MORSE_SUCCESS) @@ -77,7 +77,7 @@ void morse_pzlaset(MORSE_enum uplo, else if (uplo == MorseUpper) { for (j = 1; j < A->nt; j++){ tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; - for (i = 0; i < min(j, A->mt); i++){ + for (i = 0; i < chameleon_min(j, A->mt); i++){ tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; ldai = BLKLDD(A, i); MORSE_TASK_zlaset( diff --git a/compute/pzlaset2.c b/compute/pzlaset2.c index 045325be4f31a3e2d52d087375c425b926901b43..7c0ce6306653431cae8832cd054f5e731771568a 100644 --- a/compute/pzlaset2.c +++ b/compute/pzlaset2.c @@ -45,7 +45,7 @@ void morse_pzlaset2(MORSE_enum uplo, MORSE_Complex64_t alpha, int ldai, ldaj; int tempim; int tempjm, tempjn; - int minmn = min(A->mt, A->nt); + int minmn = chameleon_min(A->mt, A->nt); morse = morse_context_self(); if (sequence->status != MORSE_SUCCESS) @@ -76,7 +76,7 @@ void morse_pzlaset2(MORSE_enum uplo, MORSE_Complex64_t alpha, else if (uplo == MorseUpper) { for (j = 1; j < A->nt; j++){ tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; - for (i = 0; i < min(j, A->mt); i++){ + for (i = 0; i < chameleon_min(j, A->mt); i++){ tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; ldai = BLKLDD(A, i); MORSE_TASK_zlaset2( diff --git a/compute/pztile2band.c b/compute/pztile2band.c index 4611d2adce9a7e2679e82d8ec83d3b59aff33063..a5efb27771ac4c206f53ea1f03f659e50eb8b07c 100644 --- a/compute/pztile2band.c +++ b/compute/pztile2band.c @@ -42,7 +42,7 @@ void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, int j; int ldaj, ldx; int tempjm, tempjn; - int minmnt = min(A->mt, A->nt); + int minmnt = chameleon_min(A->mt, A->nt); morse = morse_context_self(); if (sequence->status != MORSE_SUCCESS) @@ -56,7 +56,7 @@ void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, */ if ( uplo == MorseLower ) { for (j = 0; j < minmnt; j++){ - /* Compute dimension on N with B since it is dimensioned with min(A->m, A->n) */ + /* Compute dimension on N with B since it is dimensioned with chameleon_min(A->m, A->n) */ assert( A->m == B->n ); assert( A->n >= B->n ); @@ -89,7 +89,7 @@ void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, } else if ( uplo == MorseUpper ) { for (j = 0; j < minmnt; j++){ - /* Compute dimension on M with B since it is dimensioned with min(A->m, A->n) */ + /* Compute dimension on M with B since it is dimensioned with chameleon_min(A->m, A->n) */ assert( A->n == B->n ); assert( A->m >= B->n ); tempjn = j == A->nt-1 ? A->n - j * A->nb : A->nb; diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c index 723dbf369753f5e4e8287ff062a95ff97efc221b..e6a9dd7aae8e09548b32c836563ef1cc26bea0ce 100644 --- a/compute/pztpgqrt.c +++ b/compute/pztpgqrt.c @@ -58,7 +58,7 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M ib = MORSE_IB; /* - * ztsmqr = A->nb * ib + * ztpmqrt = A->nb * ib */ ws_worker = A->nb * ib; @@ -66,9 +66,9 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M #if defined(CHAMELEON_USE_CUDA) /* Worker space * - * ztsmqr = 2 * A->nb * ib + * ztpmqrt = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -80,7 +80,7 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; ldak = BLKLDD(A, k); - maxmtk = min( B->mt, maxmt+k ) - 1; + maxmtk = chameleon_min( B->mt, maxmt+k ) - 1; for (m = maxmtk; m > -1; m--) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; templm = m == maxmtk ? tempmm : 0; diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c index 0b825d5c222858c76380a79d359a483a1b7f9c2a..9b60ef164a62cb22288894358fa050262d05c06f 100644 --- a/compute/pztpqrt.c +++ b/compute/pztpqrt.c @@ -56,10 +56,8 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, ib = MORSE_IB; /* - * zgeqrt = A->nb * (ib+1) - * zunmqr = A->nb * ib - * ztsqrt = A->nb * (ib+1) - * ztsmqr = A->nb * ib + * ztsqrt = A->nb * (ib+1) + * ztpmqrt = A->nb * ib */ ws_worker = A->nb * (ib+1); @@ -67,27 +65,24 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, #if defined(CHAMELEON_USE_CUDA) /* Worker space * - * zunmqr = A->nb * ib - * ztsmqr = 2 * A->nb * ib + * ztpmqrt = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif #if defined(CHAMELEON_USE_MAGMA) /* Worker space * - * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + * ztpqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ - ws_worker = max( ws_worker, ib * (ib + A->nb) ); + ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); /* Host space * - * zgeqrt = ib * (A->mb+3*ib) + A->mb ) - * ztsqrt = 2 * ib * (A->nb+ib) + A->nb + * ztpqrt = 2 * ib * (A->nb+ib) + A->nb */ - ws_host = max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); + ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); + ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -124,7 +119,7 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, } } - maxmt = min( B->mt, maxmt+1 ); + maxmt = chameleon_min( B->mt, maxmt+1 ); } RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); diff --git a/compute/pztradd.c b/compute/pztradd.c index 6e80ab3160e9bf7e1b82dd701844e66389208f7f..152960e9a5811f25c581a9e4f405959e07d24b90 100644 --- a/compute/pztradd.c +++ b/compute/pztradd.c @@ -53,7 +53,7 @@ void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, switch(uplo){ case MorseLower: if (trans == MorseNoTrans) { - for (n = 0; n < min(B->mt,B->nt); n++) { + for (n = 0; n < chameleon_min(B->mt,B->nt); n++) { tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb; tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); @@ -79,7 +79,7 @@ void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, } } else { - for (n = 0; n < min(B->mt,B->nt); n++) { + for (n = 0; n < chameleon_min(B->mt,B->nt); n++) { tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb; tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); @@ -106,7 +106,7 @@ void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, break; case MorseUpper: if (trans == MorseNoTrans) { - for (m = 0; m < min(B->mt,B->nt); m++) { + for (m = 0; m < chameleon_min(B->mt,B->nt); m++) { tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb; ldam = BLKLDD(A, m); @@ -130,7 +130,7 @@ void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, } } else { - for (m = 0; m < min(B->mt,B->nt); m++) { + for (m = 0; m < chameleon_min(B->mt,B->nt); m++) { tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb; ldam = BLKLDD(A, m); diff --git a/compute/pztrsmpl.c b/compute/pztrsmpl.c index 403cd0f4dec894aee12674fc56ead5111ffa61fe..4c8631b3d2f9d1fed197a8bdb4e074091b35fd6d 100644 --- a/compute/pztrsmpl.c +++ b/compute/pztrsmpl.c @@ -54,10 +54,10 @@ void morse_pztrsmpl(MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *L, int *IPIV RUNTIME_options_init(&options, morse, sequence, request); ib = MORSE_IB; - for (k = 0; k < min(A->mt, A->nt); k++) { + for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempkmin = k == min(A->mt, A->nt)-1 ? min(A->m, A->n)-k*A->mb : A->mb; + tempkmin = k == chameleon_min(A->mt, A->nt)-1 ? chameleon_min(A->m, A->n)-k*A->mb : A->mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); for (n = 0; n < B->nt; n++) { diff --git a/compute/pzunglq.c b/compute/pzunglq.c index 2356ff6707dde66993eefb9fd228f9f2ecb0dd1a..37ecea8efb8b0d2d37aad2672123b1df868a0d2a 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -83,7 +83,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, * zunmlq = A->nb * ib * ztsmlq = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -100,7 +100,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, for (k = minMT-1; k >= 0; k--) { tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempkmin = min( tempAkn, tempAkm ); + tempkmin = chameleon_min( tempAkn, tempAkm ); tempkn = k == Q->nt-1 ? Q->n-k*Q->nb : Q->nb; ldak = BLKLDD(A, k); for (n = Q->nt-1; n > k; n--) { diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index 30288e5d255567859ead0c4c41592dd112739408..59717f431e43a9c333f899e85cf28be8f3de8b1d 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -80,7 +80,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -97,7 +97,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, } #endif - K = min(A->mt, A->nt); + K = chameleon_min(A->mt, A->nt); for (k = K-1; k >= 0; k--) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); @@ -124,8 +124,8 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, } for (N = k; N < A->nt; N += BS) { tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; - tempkmin = min(tempkm, tempNn); - for (n = min(N+BS, A->nt)-1; n > N; n--) { + tempkmin = chameleon_min(tempkm, tempNn); + for (n = chameleon_min(N+BS, A->nt)-1; n > N; n--) { tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; for (m = k; m < Q->mt; m++) { diff --git a/compute/pzungqr.c b/compute/pzungqr.c index 80e7fd82bc6140056ecfe8e4774d34d998e55fa9..23f33b7e816e74b5b727655441ab839bec18cf03 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -83,7 +83,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -100,7 +100,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, for (k = minMT-1; k >= 0; k--) { tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempkmin = min( tempAkn, tempAkm ); + tempkmin = chameleon_min( tempAkn, tempAkm ); tempkm = k == Q->mt-1 ? Q->m-k*Q->mb : Q->mb; ldak = BLKLDD(A, k); ldqk = BLKLDD(Q, k); diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index bd08779f2631654e54b7adbb72416d44469dd845..1a2470eceb767f9c7d40f269c222d84b9b17cbdc 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -82,7 +82,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -99,7 +99,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, } #endif - K = min(A->mt, A->nt); + K = chameleon_min(A->mt, A->nt); for (k = K-1; k >= 0; k--) { tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; lastRD = 0; @@ -127,10 +127,10 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, } for (M = k; M < A->mt; M += BS) { tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; - tempkmin = min(tempMm, tempkn); + tempkmin = chameleon_min(tempMm, tempkn); ldaM = BLKLDD(A, M); ldqM = BLKLDD(Q, M); - for (m = min(M+BS, A->mt)-1; m > M; m--) { + for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldqm = BLKLDD(Q, m); ldam = BLKLDD(A, m); diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index 46e35e596e5468e7a12e03129478fee54a9fb92b..3cb7d8591eeed71062aaa4456372918dd7b1f962 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -85,7 +85,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, * zunmlq = A->mb * ib * ztsmlq = 2 * A->mb * ib */ - ws_worker = max( ws_worker, ib * A->mb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->mb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index e9a5b3689dd2a9d9bf4b3ea48f36eb0044727618..2963bf5537c7b35157d1dba86b640d6db8bc1086 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -81,7 +81,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, * zunmlq = A->nb * ib * ztsmlq = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -98,7 +98,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, } #endif - K = min(A->mt, A->nt); + K = chameleon_min(A->mt, A->nt); if (side == MorseLeft ) { if (trans == MorseNoTrans) { /* @@ -109,7 +109,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ldak = BLKLDD(A, k); for (N = k; N < A->nt; N += BS) { tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; - tempkmin = min(tempkm,tempNn); + tempkmin = chameleon_min(tempkm,tempNn); ldbN = BLKLDD(B, N); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -136,7 +136,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, N), T->mb, B(N, n), ldbN); } - for (m = N+1; m < min(N+BS, A->nt); m++) { + for (m = N+1; m < chameleon_min(N+BS, A->nt); m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { @@ -204,9 +204,9 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, } for (N = k; N < A->nt; N += BS) { tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; - tempkmin = min(tempkm,tempNn); + tempkmin = chameleon_min(tempkm,tempNn); ldbN = BLKLDD(B, N); - for (m = min(N+BS, A->nt)-1; m > N; m--) { + for (m = chameleon_min(N+BS, A->nt)-1; m > N; m--) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { @@ -282,8 +282,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, } for (N = k; N < A->nt; N += BS) { tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; - tempkmin = min(tempkm,tempNn); - for (n = min(N+BS, A->nt)-1; n > N; n--) { + tempkmin = chameleon_min(tempkm,tempNn); + for (n = chameleon_min(N+BS, A->nt)-1; n > N; n--) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; @@ -336,7 +336,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ldak = BLKLDD(A, k); for (N = k; N < A->nt; N += BS) { tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; - tempkmin = min(tempkm,tempNn); + tempkmin = chameleon_min(tempkm,tempNn); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, @@ -363,7 +363,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, N), T->mb, B(m, N), ldbm); } - for (n = N+1; n < min(N+BS, A->nt); n++) { + for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index 7d359f059546d4853771e4bd808a5d7101ea6aad..f860f7f04b795bcf6f8151084afe49a94ea575f5 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -85,7 +85,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 71bb3d6bd60cb2b8c78fa077b201adcc054c208a..8ef8cb2dc8426db7183ea8b8d5e5d6dd5b76f3a6 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -82,7 +82,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, * zunmqr = A->nb * ib * ztsmqr = 2 * A->nb * ib */ - ws_worker = max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -100,7 +100,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, } #endif - K = min(A->mt, A->nt); + K = chameleon_min(A->mt, A->nt); if (side == MorseLeft ) { if (trans == MorseConjTrans) { /* @@ -110,7 +110,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; for (M = k; M < A->mt; M += BS) { tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; - tempkmin = min(tempMm, tempkn); + tempkmin = chameleon_min(tempMm, tempkn); ldaM = BLKLDD(A, M); ldbM = BLKLDD(B, M); #if defined(CHAMELEON_COPY_DIAG) @@ -138,7 +138,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, T(M, k), T->mb, B(M, n), ldbM); } - for (m = M+1; m < min(M+BS, A->mt); m++) { + for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldbm = BLKLDD(B, m); ldam = BLKLDD(A, m); @@ -208,10 +208,10 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, } for (M = k; M < A->mt; M += BS) { tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; - tempkmin = min(tempMm, tempkn); + tempkmin = chameleon_min(tempMm, tempkn); ldaM = BLKLDD(A, M); ldbM = BLKLDD(B, M); - for (m = min(M+BS, A->mt)-1; m > M; m--) { + for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldbm = BLKLDD(B, m); ldam = BLKLDD(A, m); @@ -287,10 +287,10 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, } for (M = k; M < A->mt; M += BS) { tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; - tempkmin = min(tempMm, tempkn); + tempkmin = chameleon_min(tempMm, tempkn); ldaM = BLKLDD(A, M); ldbM = BLKLDD(B, M); - for (n = min(M+BS, A->mt)-1; n > M; n--) { + for (n = chameleon_min(M+BS, A->mt)-1; n > M; n--) { ldan = BLKLDD(A, n); tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; for (m = 0; m < B->mt; m++) { @@ -343,7 +343,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; for (M = k; M < A->mt; M += BS) { tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; - tempkmin = min(tempMm, tempkn); + tempkmin = chameleon_min(tempMm, tempkn); ldaM = BLKLDD(A, M); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -371,7 +371,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, T(M, k), T->mb, B(m, M), ldbm); } - for (n = M+1; n < min(M+BS, A->mt); n++) { + for (n = M+1; n < chameleon_min(M+BS, A->mt); n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); for (m = 0; m < B->mt; m++) { diff --git a/compute/zbuild.c b/compute/zbuild.c index f37758be36a41ec35fd45de21195ee1b458ed822..8262d017bb1ebd8aad84d98a41395c034688f3b2 100644 --- a/compute/zbuild.c +++ b/compute/zbuild.c @@ -110,12 +110,12 @@ int MORSE_zbuild( MORSE_enum uplo, int M, int N, morse_error("MORSE_zbuild", "illegal value of N"); return -2; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zbuild", "illegal value of LDA"); return -4; } /* Quick return */ - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -280,7 +280,7 @@ int MORSE_zbuild_Tile_Async( MORSE_enum uplo, MORSE_desc_t *A, } /* Quick return */ - if (min( A->m, A->n ) == 0) + if (chameleon_min( A->m, A->n ) == 0) return MORSE_SUCCESS; morse_pzbuild(uplo, A, user_data, user_build_callback, sequence, request); diff --git a/compute/zgeadd.c b/compute/zgeadd.c index ef87bb7728f2041db5ec12599603a591a83ac1ef..9aea3d4c73bed941982ed456415e7b093a6da422 100644 --- a/compute/zgeadd.c +++ b/compute/zgeadd.c @@ -126,11 +126,11 @@ int MORSE_zgeadd(MORSE_enum trans, int M, int N, morse_error("MORSE_zgeadd", "illegal value of N"); return -3; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zgeadd", "illegal value of LDA"); return -6; } - if (LDB < max(1, M)) { + if (LDB < chameleon_max(1, M)) { morse_error("MORSE_zgeadd", "illegal value of LDB"); return -9; } diff --git a/compute/zgelqf.c b/compute/zgelqf.c index 214067ab6b1aad46e716f9b4a6754f7a1d7a801a..95fe0544f6fcbd4098e3c44ef98b443f9d711e19 100644 --- a/compute/zgelqf.c +++ b/compute/zgelqf.c @@ -100,13 +100,13 @@ int MORSE_zgelqf(int M, int N, morse_error("MORSE_zgelqf", "illegal value of N"); return -2; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgelqf", "illegal value of LDA"); return -4; } /* Quick return */ - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ @@ -120,7 +120,7 @@ int MORSE_zgelqf(int M, int N, NB = MORSE_NB; morse_sequence_create(morse, &sequence); - + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, morse_desc_mat_free(&(descA)) ); @@ -200,7 +200,7 @@ int MORSE_zgelqf_Tile(MORSE_desc_t *A, MORSE_desc_t *T) MORSE_zgelqf_Tile_Async(A, T, sequence, &request); morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; @@ -274,7 +274,7 @@ int MORSE_zgelqf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, } /* Quick return */ /* - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { diff --git a/compute/zgelqs.c b/compute/zgelqs.c index 3620d8c1187a007dd0f323f5241a99dede514943..c78fb2d92736de7b3107ff9d8ba0044ddfcd08d5 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -110,16 +110,16 @@ int MORSE_zgelqs(int M, int N, int NRHS, morse_error("MORSE_zgelqs", "illegal value of N"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgelqs", "illegal value of LDA"); return -5; } - if (LDB < max(1, max(1, N))) { + if (LDB < chameleon_max(1, chameleon_max(1, N))) { morse_error("MORSE_zgelqs", "illegal value of LDB"); return -8; } /* Quick return */ - if (min(M, min(N, NRHS)) == 0) { + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { return MORSE_SUCCESS; } @@ -303,7 +303,7 @@ int MORSE_zgelqs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, } /* Quick return */ /* - if (min(M, min(N, NRHS)) == 0) { + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { return MORSE_SUCCESS; } */ diff --git a/compute/zgels.c b/compute/zgels.c index 0b0e4c979607a39118fdba181764bc9ffe448f23..8b5093369e3d088a4359d89a322cd8ddef0d9ae5 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -142,17 +142,17 @@ int MORSE_zgels(MORSE_enum trans, int M, int N, int NRHS, morse_error("MORSE_zgels", "illegal value of NRHS"); return -4; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgels", "illegal value of LDA"); return -6; } - if (LDB < max(1, max(M, N))) { + if (LDB < chameleon_max(1, chameleon_max(M, N))) { morse_error("MORSE_zgels", "illegal value of LDB"); return -9; } /* Quick return */ - if (min(M, min(N, NRHS)) == 0) { - for (i = 0; i < max(M, N); i++) + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + for (i = 0; i < chameleon_max(M, N); i++) for (j = 0; j < NRHS; j++) B[j*LDB+i] = 0.0; return MORSE_SUCCESS; @@ -371,8 +371,8 @@ int MORSE_zgels_Tile_Async(MORSE_enum trans, MORSE_desc_t *A, return morse_request_fail(sequence, request, MORSE_ERR_NOT_SUPPORTED); } /* Quick return - currently NOT equivalent to LAPACK's: - if (min(M, min(N, NRHS)) == 0) { - for (i = 0; i < max(M, N); i++) + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + for (i = 0; i < chameleon_max(M, N); i++) for (j = 0; j < NRHS; j++) B[j*LDB+i] = 0.0; return MORSE_SUCCESS; diff --git a/compute/zgemm.c b/compute/zgemm.c index bc0be5dcc6540a947fad4f95a0f5a4c3c535b150..0f931d8dc4636d67112d7e452b522738c789731b 100644 --- a/compute/zgemm.c +++ b/compute/zgemm.c @@ -179,15 +179,15 @@ int MORSE_zgemm(MORSE_enum transA, MORSE_enum transB, int M, int N, int K, morse_error("MORSE_zgemm", "illegal value of N"); return -5; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zgemm", "illegal value of LDA"); return -8; } - if (LDB < max(1, Bm)) { + if (LDB < chameleon_max(1, Bm)) { morse_error("MORSE_zgemm", "illegal value of LDB"); return -10; } - if (LDC < max(1, M)) { + if (LDC < chameleon_max(1, M)) { morse_error("MORSE_zgemm", "illegal value of LDC"); return -13; } diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index f6162ca88350accd16620d720047a459d5728816..51182770c768571843efe8e89e990e2c4e6b2478 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -99,13 +99,13 @@ int MORSE_zgeqrf(int M, int N, morse_error("MORSE_zgeqrf", "illegal value of N"); return -2; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgeqrf", "illegal value of LDA"); return -4; } /* Quick return */ - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ @@ -119,7 +119,7 @@ int MORSE_zgeqrf(int M, int N, NB = MORSE_NB; morse_sequence_create(morse, &sequence); - + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, morse_desc_mat_free(&(descA)) ); @@ -273,7 +273,7 @@ int MORSE_zgeqrf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, } /* Quick return */ /* - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c index b881bcbe519aa614b0df01d2187e90a2c0cecb6a..360d13ae92960bac2f4396b70abbe06265ca63ad 100644 --- a/compute/zgeqrs.c +++ b/compute/zgeqrs.c @@ -110,16 +110,16 @@ int MORSE_zgeqrs(int M, int N, int NRHS, morse_error("MORSE_zgeqrs", "illegal value of N"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgeqrs", "illegal value of LDA"); return -5; } - if (LDB < max(1, max(1, M))) { + if (LDB < chameleon_max(1, chameleon_max(1, M))) { morse_error("MORSE_zgeqrs", "illegal value of LDB"); return -8; } /* Quick return */ - if (min(M, min(N, NRHS)) == 0) { + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { return MORSE_SUCCESS; } @@ -303,7 +303,7 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, } /* Quick return */ /* - if (min(M, min(N, NRHS)) == 0) { + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { return MORSE_SUCCESS; } */ diff --git a/compute/zgesv_incpiv.c b/compute/zgesv_incpiv.c index 07da2243d7cba3c8db3b9357e8d632824d2195c1..02b1129c26bc3254c380fe70da57de57ae03add3 100644 --- a/compute/zgesv_incpiv.c +++ b/compute/zgesv_incpiv.c @@ -111,16 +111,16 @@ int MORSE_zgesv_incpiv(int N, int NRHS, morse_error("MORSE_zgesv_incpiv", "illegal value of NRHS"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zgesv_incpiv", "illegal value of LDA"); return -4; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zgesv_incpiv", "illegal value of LDB"); return -8; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ @@ -306,7 +306,7 @@ int MORSE_zgesv_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, M } /* Quick return */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c index 60f3a69d3a22502496663190f8c36477b1101b95..31735f19c856c8c77c8b03a6df813cd41d5e9310 100644 --- a/compute/zgesv_nopiv.c +++ b/compute/zgesv_nopiv.c @@ -110,16 +110,16 @@ int MORSE_zgesv_nopiv(int N, int NRHS, morse_error("MORSE_zgesv_nopiv", "illegal value of NRHS"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zgesv_nopiv", "illegal value of LDA"); return -4; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zgesv_nopiv", "illegal value of LDB"); return -8; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ @@ -294,7 +294,7 @@ int MORSE_zgesv_nopiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *B, } /* Quick return */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ diff --git a/compute/zgesvd.c b/compute/zgesvd.c index 6c5263e4c27e2a008d375da266abb54dec0d80bf..571b9fa90a299fed8cf13c5de469bf03d52810e3 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -179,7 +179,7 @@ int MORSE_zgesvd(MORSE_enum jobu, MORSE_enum jobvt, morse_error("MORSE_zgesvd", "illegal value of N"); return -4; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgesvd", "illegal value of LDA"); return -6; } @@ -192,7 +192,7 @@ int MORSE_zgesvd(MORSE_enum jobu, MORSE_enum jobvt, return -11; } /* Quick return */ - if (min(M, N) == 0) { + if (chameleon_min(M, N) == 0) { return MORSE_SUCCESS; } @@ -455,7 +455,7 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, M = descA.m; N = descA.n; - MINMN = min(M, N); + MINMN = chameleon_min(M, N); NB = descA.mb; LDAB = NB + 1; uplo = M >= N ? MorseUpper : MorseLower; diff --git a/compute/zgetrf_incpiv.c b/compute/zgetrf_incpiv.c index 6ab6f822aa2f3a2c798f1a3a9e4e84f9f31681af..169fb67f05fd55e87abbdbe6e377d85fb1f48237 100644 --- a/compute/zgetrf_incpiv.c +++ b/compute/zgetrf_incpiv.c @@ -102,12 +102,12 @@ int MORSE_zgetrf_incpiv(int M, int N, morse_error("MORSE_zgetrf_incpiv", "illegal value of N"); return -2; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgetrf_incpiv", "illegal value of LDA"); return -4; } /* Quick return */ - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ @@ -278,7 +278,7 @@ int MORSE_zgetrf_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, } /* Quick return */ /* - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c index e8c2f01510d283575dfbddedbfebce70e8bc2dce..ea987f595f701c29cf171e573311d2782f66b990 100644 --- a/compute/zgetrf_nopiv.c +++ b/compute/zgetrf_nopiv.c @@ -95,12 +95,12 @@ int MORSE_zgetrf_nopiv(int M, int N, morse_error("MORSE_zgetrf_nopiv", "illegal value of N"); return -2; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zgetrf_nopiv", "illegal value of LDA"); return -4; } /* Quick return */ - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ diff --git a/compute/zgetrs_incpiv.c b/compute/zgetrs_incpiv.c index 22542be07c9355ddc040aaea9f6b9e247a344f4c..63f9564a2fca9fa0a01d874abd33874343dfc420 100644 --- a/compute/zgetrs_incpiv.c +++ b/compute/zgetrs_incpiv.c @@ -117,16 +117,16 @@ int MORSE_zgetrs_incpiv(MORSE_enum trans, int N, int NRHS, morse_error("MORSE_zgetrs_incpiv", "illegal value of NRHS"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zgetrs_incpiv", "illegal value of LDA"); return -5; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zgetrs_incpiv", "illegal value of LDB"); return -9; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */ @@ -309,7 +309,7 @@ int MORSE_zgetrs_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, } /* Quick return */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ morse_pztrsmpl(A, B, L, IPIV, sequence, request); diff --git a/compute/zgetrs_nopiv.c b/compute/zgetrs_nopiv.c index 89791493c8bc8b474b297335d1412f010cec2b96..02b029146f903eba98da6b6b0a4722102a57046d 100644 --- a/compute/zgetrs_nopiv.c +++ b/compute/zgetrs_nopiv.c @@ -111,16 +111,16 @@ int MORSE_zgetrs_nopiv(MORSE_enum trans, int N, int NRHS, morse_error("MORSE_zgetrs_nopiv", "illegal value of NRHS"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zgetrs_nopiv", "illegal value of LDA"); return -5; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zgetrs_nopiv", "illegal value of LDB"); return -9; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */ @@ -293,7 +293,7 @@ int MORSE_zgetrs_nopiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *B, } /* Quick return */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseUnit, (MORSE_Complex64_t)1.0, A, B, sequence, request); diff --git a/compute/zheevd.c b/compute/zheevd.c index 7272b0c243065b70712c0f5a64b27a3626479764..a873de7436be9d9629b038403248ee8f1c0b7f67 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -126,7 +126,7 @@ int MORSE_zheevd(MORSE_enum jobz, MORSE_enum uplo, int N, morse_error("MORSE_zheevd", "illegal value of N"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zheevd", "illegal value of LDA"); return -5; } @@ -393,7 +393,7 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, } N = descA.m; - NB = min(descA.mb,descA.m); + NB = chameleon_min(descA.mb,descA.m); /* Allocate data structures for reduction to tridiagonal form */ E = malloc( (N - 1) * sizeof(double) ); diff --git a/compute/zhemm.c b/compute/zhemm.c index 1a333118d1ddef0b08e846ec7f7dd81bf0970e56..a9581e25a62a77ca307ee2250b95384aebfd9bcb 100644 --- a/compute/zhemm.c +++ b/compute/zhemm.c @@ -142,15 +142,15 @@ int MORSE_zhemm(MORSE_enum side, MORSE_enum uplo, int M, int N, morse_error("MORSE_zhemm", "illegal value of N"); return -4; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zhemm", "illegal value of LDA"); return -7; } - if (LDB < max(1, M)) { + if (LDB < chameleon_max(1, M)) { morse_error("MORSE_zhemm", "illegal value of LDB"); return -9; } - if (LDC < max(1, M)) { + if (LDC < chameleon_max(1, M)) { morse_error("MORSE_zhemm", "illegal value of LDC"); return -12; } diff --git a/compute/zher2k.c b/compute/zher2k.c index f0215d6e2334048d638bed2e6478f218ed977219..6dd8448041eb2c5bdc1de9aec24e3fadb65acc2c 100644 --- a/compute/zher2k.c +++ b/compute/zher2k.c @@ -147,15 +147,15 @@ int MORSE_zher2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, morse_error("MORSE_zher2k", "illegal value of K"); return -4; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zher2k", "illegal value of LDA"); return -7; } - if (LDB < max(1, Am)) { + if (LDB < chameleon_max(1, Am)) { morse_error("MORSE_zher2k", "illegal value of LDB"); return -9; } - if (LDC < max(1, N)) { + if (LDC < chameleon_max(1, N)) { morse_error("MORSE_zher2k", "illegal value of LDC"); return -12; } diff --git a/compute/zherk.c b/compute/zherk.c index 35e69bd5f5e064ca19a92ac733208d7cd9156aed..b52cdff217dd0e6297c40ab209bf58241614b1cb 100644 --- a/compute/zherk.c +++ b/compute/zherk.c @@ -137,11 +137,11 @@ int MORSE_zherk(MORSE_enum uplo, MORSE_enum trans, int N, int K, morse_error("MORSE_zherk", "illegal value of K"); return -4; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zherk", "illegal value of LDA"); return -7; } - if (LDC < max(1, N)) { + if (LDC < chameleon_max(1, N)) { morse_error("MORSE_zherk", "illegal value of LDC"); return -10; } diff --git a/compute/zhetrd.c b/compute/zhetrd.c index 6e11245aa99f5e73c98dfc28cf30647c26c6dde2..40c8dde894083346456d3a8330ac1dd77f7f07d5 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -143,7 +143,7 @@ int MORSE_zhetrd(MORSE_enum jobz, MORSE_enum uplo, int N, morse_error("MORSE_zhetrd", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zhetrd", "illegal value of LDA"); return -4; } diff --git a/compute/zlacpy.c b/compute/zlacpy.c index db7c293f890be8e2973034803bd6382862d82a60..c19edef30700232549a7caa946cc7085bb5f8e7b 100644 --- a/compute/zlacpy.c +++ b/compute/zlacpy.c @@ -44,7 +44,7 @@ * = MorseLower: Lower triangular part * * @param[in] M - * The number of rows of the matrix A. M >= 0. + * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. @@ -90,7 +90,7 @@ int MORSE_zlacpy(MORSE_enum uplo, int M, int N, return MORSE_ERR_NOT_INITIALIZED; } /* Check input arguments */ - if ( (uplo != MorseUpperLower) && + if ( (uplo != MorseUpperLower) && (uplo != MorseUpper) && (uplo != MorseLower) ) { morse_error("MORSE_zlacpy", "illegal value of uplo"); @@ -104,17 +104,17 @@ int MORSE_zlacpy(MORSE_enum uplo, int M, int N, morse_error("MORSE_zlacpy", "illegal value of N"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zlacpy", "illegal value of LDA"); return -5; } - if (LDB < max(1, M)) { + if (LDB < chameleon_max(1, M)) { morse_error("MORSE_zlacpy", "illegal value of LDB"); return -7; } /* Quick return */ - if (min(N, M) == 0) + if (chameleon_min(N, M) == 0) return (double)0.0; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -281,14 +281,14 @@ int MORSE_zlacpy_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } /* Check input arguments */ - if ( (uplo != MorseUpperLower) && + if ( (uplo != MorseUpperLower) && (uplo != MorseUpper) && (uplo != MorseLower) ) { morse_error("MORSE_zlacpy_Tile_Async", "illegal value of uplo"); return -1; } /* Quick return */ - if (min(A->m, A->n) == 0) { + if (chameleon_min(A->m, A->n) == 0) { return MORSE_SUCCESS; } diff --git a/compute/zlange.c b/compute/zlange.c index b3bfe288ad60d0faa6b3d4c1c17fb3ad992e2da5..c48e992e2a01a3344e6a40863e4364323164e344 100644 --- a/compute/zlange.c +++ b/compute/zlange.c @@ -112,13 +112,13 @@ double MORSE_zlange(MORSE_enum norm, int M, int N, morse_error("MORSE_zlange", "illegal value of N"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zlange", "illegal value of LDA"); return -5; } /* Quick return */ - if (min(N, M) == 0) + if (chameleon_min(N, M) == 0) return (double)0.0; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -281,7 +281,7 @@ int MORSE_zlange_Tile_Async(MORSE_enum norm, MORSE_desc_t *A, double *value, return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } /* Quick return */ - if (min(A->m, A->n) == 0) { + if (chameleon_min(A->m, A->n) == 0) { *value = 0.0; return MORSE_SUCCESS; } diff --git a/compute/zlanhe.c b/compute/zlanhe.c index fcbd5decdd339633502a6777fbc627e4d8e26e07..e03146bc65b14abd3c2e0c0d85a75539ea2501d8 100644 --- a/compute/zlanhe.c +++ b/compute/zlanhe.c @@ -112,7 +112,7 @@ double MORSE_zlanhe(MORSE_enum norm, MORSE_enum uplo, int N, morse_error("MORSE_zlanhe", "illegal value of N"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zlanhe", "illegal value of LDA"); return -5; } diff --git a/compute/zlansy.c b/compute/zlansy.c index 15b09950f14a1cb582c9bc1fcb06b05753542f13..5a141e1937c61f6fcab82419ba8fb455ace72cb0 100644 --- a/compute/zlansy.c +++ b/compute/zlansy.c @@ -112,7 +112,7 @@ double MORSE_zlansy(MORSE_enum norm, MORSE_enum uplo, int N, morse_error("MORSE_zlansy", "illegal value of N"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zlansy", "illegal value of LDA"); return -5; } diff --git a/compute/zlantr.c b/compute/zlantr.c index f1718da7b283ac1db4d6befa635fc61a2c3de922..69563ac643ac00e1c2d08f7082eab70d1182bd9f 100644 --- a/compute/zlantr.c +++ b/compute/zlantr.c @@ -132,13 +132,13 @@ double MORSE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, morse_error("MORSE_zlantr", "illegal value of N"); return -5; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zlantr", "illegal value of LDA"); return -7; } /* Quick return */ - if (min(N, M) == 0) + if (chameleon_min(N, M) == 0) return (double)0.0; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -315,7 +315,7 @@ int MORSE_zlantr_Tile_Async(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, } /* Quick return */ - if (min(A->m, A->n) == 0) { + if (chameleon_min(A->m, A->n) == 0) { *value = 0.0; return MORSE_SUCCESS; } diff --git a/compute/zlascal.c b/compute/zlascal.c index 8c7d2f36224eee1e22e46ebad2e2c99606c94080..7fba2b6ee94e810165747545fe309fef2018f860 100644 --- a/compute/zlascal.c +++ b/compute/zlascal.c @@ -90,7 +90,7 @@ int MORSE_zlascal(MORSE_enum uplo, int M, int N, morse_error("MORSE_zlascal", "illegal value of N"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zlascal", "illegal value of LDA"); return -6; } diff --git a/compute/zlaset.c b/compute/zlaset.c index 5ae298d251a4351ce435ed8483ba97d5b36a22f2..c90ba7526368db6e7438813d7b7ebfd08b39ad56 100644 --- a/compute/zlaset.c +++ b/compute/zlaset.c @@ -105,13 +105,13 @@ int MORSE_zlaset(MORSE_enum uplo, int M, int N, morse_error("MORSE_zlaset", "illegal value of N"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zlaset", "illegal value of LDA"); return -5; } /* Quick return */ - if (min(N, M) == 0) + if (chameleon_min(N, M) == 0) return (double)0.0; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -275,7 +275,7 @@ int MORSE_zlaset_Tile_Async(MORSE_enum uplo, return -1; } /* Quick return */ - if (min(A->m, A->n) == 0) { + if (chameleon_min(A->m, A->n) == 0) { return MORSE_SUCCESS; } diff --git a/compute/zlauum.c b/compute/zlauum.c index a79827ce2d245fc322400fc6fbf6999ea6eeb438..f96cab0c6b36d7894df2181d0fa5cdc6bddcdcaa 100644 --- a/compute/zlauum.c +++ b/compute/zlauum.c @@ -100,12 +100,12 @@ int MORSE_zlauum(MORSE_enum uplo, int N, morse_error("MORSE_zlauum", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zlauum", "illegal value of LDA"); return -4; } /* Quick return */ - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -277,7 +277,7 @@ int MORSE_zlauum_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, } /* Quick return */ /* - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; */ morse_pzlauum(uplo, A, sequence, request); diff --git a/compute/zplghe.c b/compute/zplghe.c index 9929737c35dc010137338ab57ef343dda02ad8c9..d7c884989890c5bc30ea21ab762242c004a3a123 100644 --- a/compute/zplghe.c +++ b/compute/zplghe.c @@ -95,12 +95,12 @@ int MORSE_zplghe( double bump, MORSE_enum uplo, int N, morse_error("MORSE_zplghe", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zplghe", "illegal value of LDA"); return -4; } /* Quick return */ - if (max(0, N) == 0) + if (chameleon_max(0, N) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -258,7 +258,7 @@ int MORSE_zplghe_Tile_Async( double bump, } /* Quick return */ - if (min( A->m, A->n ) == 0) + if (chameleon_min( A->m, A->n ) == 0) return MORSE_SUCCESS; morse_pzplghe(bump, uplo, A, seed, sequence, request); diff --git a/compute/zplgsy.c b/compute/zplgsy.c index f61ee0d83bc1eab2c7278d582c3c0215a4cc2edf..3684d39dca07057d90d9d58cf09edcbd2b4c837a 100644 --- a/compute/zplgsy.c +++ b/compute/zplgsy.c @@ -95,12 +95,12 @@ int MORSE_zplgsy( MORSE_Complex64_t bump, MORSE_enum uplo, int N, morse_error("MORSE_zplgsy", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zplgsy", "illegal value of LDA"); return -4; } /* Quick return */ - if (max(0, N) == 0) + if (chameleon_max(0, N) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -260,7 +260,7 @@ int MORSE_zplgsy_Tile_Async( MORSE_Complex64_t bump, } /* Quick return */ - if (min( A->m, A->n ) == 0) + if (chameleon_min( A->m, A->n ) == 0) return MORSE_SUCCESS; morse_pzplgsy(bump, uplo, A, seed, sequence, request); diff --git a/compute/zplrnt.c b/compute/zplrnt.c index 5410719a6966e833a7973be438fb22d66cfbf6c9..707469c9eb10c99f4ffead2d911280dcfff218e1 100644 --- a/compute/zplrnt.c +++ b/compute/zplrnt.c @@ -93,12 +93,12 @@ int MORSE_zplrnt( int M, int N, morse_error("MORSE_zplrnt", "illegal value of N"); return -2; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zplrnt", "illegal value of LDA"); return -4; } /* Quick return */ - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -248,7 +248,7 @@ int MORSE_zplrnt_Tile_Async( MORSE_desc_t *A, } /* Quick return */ - if (min( A->m, A->n ) == 0) + if (chameleon_min( A->m, A->n ) == 0) return MORSE_SUCCESS; morse_pzplrnt(A, seed, sequence, request); diff --git a/compute/zposv.c b/compute/zposv.c index 5cba21f4929c7469e9cf5954df547274730d118d..b0c9cb25d7a8b551c16419cba70aa33ce9e7ecf6 100644 --- a/compute/zposv.c +++ b/compute/zposv.c @@ -123,17 +123,17 @@ int MORSE_zposv(MORSE_enum uplo, int N, int NRHS, morse_error("MORSE_zposv", "illegal value of NRHS"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zposv", "illegal value of LDA"); return -5; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zposv", "illegal value of LDB"); return -7; } /* Quick return - currently NOT equivalent to LAPACK's * LAPACK does not have such check for DPOSV */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ @@ -324,7 +324,7 @@ int MORSE_zposv_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, /* Quick return - currently NOT equivalent to LAPACK's * LAPACK does not have such check for DPOSV */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ morse_pzpotrf(uplo, A, sequence, request); diff --git a/compute/zpotrf.c b/compute/zpotrf.c index 1ccae89b1d5d8a39db3dfa6382ba1d9453bbe7ce..e69c04b8605ca4df2a1f2e77ecc42123e22082e3 100644 --- a/compute/zpotrf.c +++ b/compute/zpotrf.c @@ -106,12 +106,12 @@ int MORSE_zpotrf(MORSE_enum uplo, int N, morse_error("MORSE_zpotrf", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zpotrf", "illegal value of LDA"); return -4; } /* Quick return */ - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -288,7 +288,7 @@ int MORSE_zpotrf_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, } /* Quick return */ /* - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; */ morse_pzpotrf(uplo, A, sequence, request); diff --git a/compute/zpotri.c b/compute/zpotri.c index 7ee8c9d3ec8ae1d1942cc36a7a5c597a9ffbce5f..5581e027475583c814e6f1693ad944c2870d27e8 100644 --- a/compute/zpotri.c +++ b/compute/zpotri.c @@ -97,12 +97,12 @@ int MORSE_zpotri(MORSE_enum uplo, int N, morse_error("MORSE_zpotri", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zpotri", "illegal value of LDA"); return -4; } /* Quick return */ - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -277,7 +277,7 @@ int MORSE_zpotri_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, } /* Quick return */ /* - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; */ morse_pztrtri(uplo, MorseNonUnit, A, sequence, request); diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c index 39347010e6402523b1fc6eb0718f3e713e97cd3d..c779d47c6c9d285de923c001ea9930e98ccd6e8a 100644 --- a/compute/zpotrimm.c +++ b/compute/zpotrimm.c @@ -99,20 +99,20 @@ int MORSE_zpotrimm(MORSE_enum uplo, int N, morse_error("MORSE_zpotrimm", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zpotrimm", "illegal value of LDA"); return -4; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zpotrimm", "illegal value of LDB"); return -6; } - if (LDC < max(1, N)) { + if (LDC < chameleon_max(1, N)) { morse_error("MORSE_zpotrimm", "illegal value of LDC"); return -8; } /* Quick return */ - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -311,7 +311,7 @@ int MORSE_zpotrimm_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, } /* Quick return */ /* - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; */ morse_pzpotrimm(uplo, A, B, C, sequence, request); diff --git a/compute/zpotrs.c b/compute/zpotrs.c index bb2de06d5bc89001ff0e21d3403a6809d7575c17..13309e1c524623bdaa8b87b3d783ab9e616acba3 100644 --- a/compute/zpotrs.c +++ b/compute/zpotrs.c @@ -108,16 +108,16 @@ int MORSE_zpotrs(MORSE_enum uplo, int N, int NRHS, morse_error("MORSE_zpotrs", "illegal value of NRHS"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zpotrs", "illegal value of LDA"); return -5; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zpotrs", "illegal value of LDB"); return -7; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -299,7 +299,7 @@ int MORSE_zpotrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, } /* Quick return */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ morse_pztrsm(MorseLeft, uplo, uplo == MorseUpper ? MorseConjTrans : MorseNoTrans, MorseNonUnit, 1.0, A, B, sequence, request); diff --git a/compute/zsymm.c b/compute/zsymm.c index 6787a29bbedc22cff785cb2c8841e3a6c2316bcc..3bd567a9e486c3eb9f319d6cec1f35e26e332dcd 100644 --- a/compute/zsymm.c +++ b/compute/zsymm.c @@ -142,15 +142,15 @@ int MORSE_zsymm(MORSE_enum side, MORSE_enum uplo, int M, int N, morse_error("MORSE_zsymm", "illegal value of N"); return -4; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zsymm", "illegal value of LDA"); return -7; } - if (LDB < max(1, M)) { + if (LDB < chameleon_max(1, M)) { morse_error("MORSE_zsymm", "illegal value of LDB"); return -9; } - if (LDC < max(1, M)) { + if (LDC < chameleon_max(1, M)) { morse_error("MORSE_zsymm", "illegal value of LDC"); return -12; } diff --git a/compute/zsyr2k.c b/compute/zsyr2k.c index f028ab5c95afd9208534b5fa7efba84124b31e3d..bc098a162414536c726763b6f341f0b5b826eb5f 100644 --- a/compute/zsyr2k.c +++ b/compute/zsyr2k.c @@ -147,15 +147,15 @@ int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, morse_error("MORSE_zsyr2k", "illegal value of K"); return -4; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zsyr2k", "illegal value of LDA"); return -7; } - if (LDB < max(1, Am)) { + if (LDB < chameleon_max(1, Am)) { morse_error("MORSE_zsyr2k", "illegal value of LDB"); return -9; } - if (LDC < max(1, N)) { + if (LDC < chameleon_max(1, N)) { morse_error("MORSE_zsyr2k", "illegal value of LDC"); return -12; } diff --git a/compute/zsyrk.c b/compute/zsyrk.c index 1f7aaebccb2a1d88a88e4e221966482e9225db0e..3623506f22dba7f484f5e63f5b0399a62b2f0af4 100644 --- a/compute/zsyrk.c +++ b/compute/zsyrk.c @@ -137,11 +137,11 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, morse_error("MORSE_zsyrk", "illegal value of K"); return -4; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zsyrk", "illegal value of LDA"); return -7; } - if (LDC < max(1, N)) { + if (LDC < chameleon_max(1, N)) { morse_error("MORSE_zsyrk", "illegal value of LDC"); return -10; } diff --git a/compute/zsysv.c b/compute/zsysv.c index 1d8eaaebb87b9b7b522ec752bfc0ed2ca85b2278..db51e5c515299716402007d979312469279f75f1 100644 --- a/compute/zsysv.c +++ b/compute/zsysv.c @@ -120,17 +120,17 @@ int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, morse_error("MORSE_zsysv", "illegal value of NRHS"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zsysv", "illegal value of LDA"); return -5; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zsysv", "illegal value of LDB"); return -7; } /* Quick return - currently NOT equivalent to LAPACK's * LAPACK does not have such check for Dsysv */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ @@ -315,7 +315,7 @@ int MORSE_zsysv_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, /* Quick return - currently NOT equivalent to LAPACK's * LAPACK does not have such check for Dsysv */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ morse_pzsytrf(uplo, A, sequence, request); diff --git a/compute/zsytrf.c b/compute/zsytrf.c index dd1ea44f5b019dec8354bc5c7dd353c32d36f6f8..c157d1248004068d209507dac66451ce7122fb9a 100644 --- a/compute/zsytrf.c +++ b/compute/zsytrf.c @@ -101,12 +101,12 @@ int MORSE_zsytrf(MORSE_enum uplo, int N, morse_error("MORSE_zsytrf", "illegal value of N"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zsytrf", "illegal value of LDA"); return -4; } /* Quick return */ - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -274,7 +274,7 @@ int MORSE_zsytrf_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, } /* Quick return */ /* - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; */ morse_pzsytrf(uplo, A, sequence, request); diff --git a/compute/zsytrs.c b/compute/zsytrs.c index e8b934b2f064cca769e9355845cbba8711ce0024..508d404c634dbb376946ea5591b18021c34563fb 100644 --- a/compute/zsytrs.c +++ b/compute/zsytrs.c @@ -107,16 +107,16 @@ int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, morse_error("MORSE_zsytrs", "illegal value of NRHS"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_zsytrs", "illegal value of LDA"); return -5; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_zsytrs", "illegal value of LDB"); return -7; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -292,7 +292,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, } /* Quick return */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ morse_pztrsm(MorseLeft, uplo, uplo == MorseUpper ? MorseTrans : MorseNoTrans, MorseNonUnit, 1.0, A, B, sequence, request); diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 6a4ef6d7c68cf879e30bd14c55a18aecdc14b0ba..6a81d5cd53ea8a830da38ac54e66fa43c74dbdb1 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -128,7 +128,7 @@ int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_sequence_t *sequence = NULL; MORSE_request_t request = MORSE_REQUEST_INITIALIZER; MORSE_desc_t descA, descB, descV; - int minMK = min( M, K ); + int minMK = chameleon_min( M, K ); morse = morse_context_self(); if (morse == NULL) { @@ -153,15 +153,15 @@ int MORSE_ztpgqrt( int M, int N, int K, int L, morse_error("MORSE_ztpgqrt", "illegal value of N"); return -4; } - if (LDV < max(1, M)) { + if (LDV < chameleon_max(1, M)) { morse_error("MORSE_ztpgqrt", "illegal value of LDV"); return -6; } - if (LDA < max(1, K)) { + if (LDA < chameleon_max(1, K)) { morse_error("MORSE_ztpgqrt", "illegal value of LDA"); return -9; } - if (LDB < max(1, M)) { + if (LDB < chameleon_max(1, M)) { morse_error("MORSE_ztpgqrt", "illegal value of LDB"); return -11; } diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c index 6efe33bbb728ecfdaa714abd3176720236be9a7f..f7c95881575bad2bc97965eb056c6854f59f5c13 100644 --- a/compute/ztpqrt.c +++ b/compute/ztpqrt.c @@ -139,7 +139,7 @@ int MORSE_ztpqrt( int M, int N, int L, MORSE_sequence_t *sequence = NULL; MORSE_request_t request = MORSE_REQUEST_INITIALIZER; MORSE_desc_t descA, descB; - int minMN = min( M, N ); + int minMN = chameleon_min( M, N ); morse = morse_context_self(); if (morse == NULL) { @@ -160,11 +160,11 @@ int MORSE_ztpqrt( int M, int N, int L, morse_error("MORSE_ztpqrt", "illegal value of N"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_ztpqrt", "illegal value of LDA"); return -5; } - if (LDB < max(1, M)) { + if (LDB < chameleon_max(1, M)) { morse_error("MORSE_ztpqrt", "illegal value of LDB"); return -7; } diff --git a/compute/ztradd.c b/compute/ztradd.c index 5e8855189034c8c72685e1aaccc8701d361cdef0..d35eb823c13d08cfb25848e724d2054036aa8532 100644 --- a/compute/ztradd.c +++ b/compute/ztradd.c @@ -136,11 +136,11 @@ int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, morse_error("MORSE_ztradd", "illegal value of N"); return -4; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_ztradd", "illegal value of LDA"); return -7; } - if (LDB < max(1, M)) { + if (LDB < chameleon_max(1, M)) { morse_error("MORSE_ztradd", "illegal value of LDB"); return -10; } diff --git a/compute/ztrmm.c b/compute/ztrmm.c index 029142851afffad8708e04fd89684fd719b68e87..17e5d44039b514938445003df3a9c13ed7b125ba 100644 --- a/compute/ztrmm.c +++ b/compute/ztrmm.c @@ -133,9 +133,9 @@ int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, morse_error("MORSE_ztrmm", "illegal value of uplo"); return -2; } - if (transA != MorseConjTrans && - transA != MorseNoTrans && - transA != MorseTrans ) + if (transA != MorseConjTrans && + transA != MorseNoTrans && + transA != MorseTrans ) { morse_error("MORSE_ztrmm", "illegal value of transA"); return -3; @@ -152,16 +152,16 @@ int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, morse_error("MORSE_ztrmm", "illegal value of NRHS"); return -6; } - if (LDA < max(1, NA)) { + if (LDA < chameleon_max(1, NA)) { morse_error("MORSE_ztrmm", "illegal value of LDA"); return -8; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_ztrmm", "illegal value of LDB"); return -10; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -288,7 +288,7 @@ int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); RUNTIME_desc_getoncpu(B); - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; diff --git a/compute/ztrsm.c b/compute/ztrsm.c index 82b8ada02c890d94b0b090c79c508756b2c6854f..5a6b21980c2166cc4f9cd083074b2c59a4a566d7 100644 --- a/compute/ztrsm.c +++ b/compute/ztrsm.c @@ -150,16 +150,16 @@ int MORSE_ztrsm(MORSE_enum side, MORSE_enum uplo, morse_error("MORSE_ztrsm", "illegal value of NRHS"); return -6; } - if (LDA < max(1, NA)) { + if (LDA < chameleon_max(1, NA)) { morse_error("MORSE_ztrsm", "illegal value of LDA"); return -8; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_ztrsm", "illegal value of LDB"); return -10; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ diff --git a/compute/ztrsmpl.c b/compute/ztrsmpl.c index dd18ea9e58043047cf10b36c3cfa5ba664e4ac18..86ef9bb4d3bffa2f0d228fe147e79927bb0d82de 100644 --- a/compute/ztrsmpl.c +++ b/compute/ztrsmpl.c @@ -105,16 +105,16 @@ int MORSE_ztrsmpl(int N, int NRHS, morse_error("MORSE_ztrsmpl", "illegal value of NRHS"); return -2; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_ztrsmpl", "illegal value of LDA"); return -4; } - if (LDB < max(1, N)) { + if (LDB < chameleon_max(1, N)) { morse_error("MORSE_ztrsmpl", "illegal value of LDB"); return -8; } /* Quick return */ - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on N & NRHS; Set NBNB */ @@ -294,7 +294,7 @@ int MORSE_ztrsmpl_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_ } /* Quick return */ /* - if (min(N, NRHS) == 0) + if (chameleon_min(N, NRHS) == 0) return MORSE_SUCCESS; */ morse_pztrsmpl(A, B, L, IPIV, sequence, request); diff --git a/compute/ztrtri.c b/compute/ztrtri.c index 628cd063585222ff61080285ac352b845ee969d0..28cab09ef59ac5d24e8642972c909a57cf8c0e9d 100644 --- a/compute/ztrtri.c +++ b/compute/ztrtri.c @@ -109,12 +109,12 @@ int MORSE_ztrtri(MORSE_enum uplo, MORSE_enum diag, int N, morse_error("MORSE_ztrtri", "illegal value of N"); return -3; } - if (LDA < max(1, N)) { + if (LDA < chameleon_max(1, N)) { morse_error("MORSE_ztrtri", "illegal value of LDA"); return -5; } /* Quick return */ - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ @@ -299,7 +299,7 @@ int MORSE_ztrtri_Tile_Async(MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A, } /* Quick return */ /* - if (max(N, 0) == 0) + if (chameleon_max(N, 0) == 0) return MORSE_SUCCESS; */ morse_pztrtri(uplo, diag, A, sequence, request); diff --git a/compute/zunglq.c b/compute/zunglq.c index af628c28d114e81f093971e7fc330e377437c4da..78865dd0228bf36a3e672f5fcbff6ab8657b535f 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -110,17 +110,17 @@ int MORSE_zunglq(int M, int N, int K, morse_error("MORSE_zunglq", "illegal value of K"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zunglq", "illegal value of LDA"); return -5; } - if (LDQ < max(1, M)) { + if (LDQ < chameleon_max(1, M)) { morse_error("MORSE_zunglq", "illegal value of LDQ"); return -8; } /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */ - if (min(M, min(N, K)) == 0) + if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ @@ -295,7 +295,7 @@ int MORSE_zunglq_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, Q, LDQ ) */ /* - if (min(M, N) == 0) + if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); diff --git a/compute/zungqr.c b/compute/zungqr.c index 074f8eef35fbc561fe0156210b1ba260cfb09800..cb136d4176f92d0ebffcbc451428efe7187ee053 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -111,15 +111,15 @@ int MORSE_zungqr(int M, int N, int K, morse_error("MORSE_zungqr", "illegal value of K"); return -3; } - if (LDA < max(1, M)) { + if (LDA < chameleon_max(1, M)) { morse_error("MORSE_zungqr", "illegal value of LDA"); return -5; } - if (LDQ < max(1, M)) { + if (LDQ < chameleon_max(1, M)) { morse_error("MORSE_zungqr", "illegal value of LDQ"); return -8; } - if (min(M, min(N, K)) == 0) + if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M & N; Set NBNB */ diff --git a/compute/zunmlq.c b/compute/zunmlq.c index fcf8fbb4eb2cc59fc1b8ec5f729f390e6a7e2e39..ca6d7806ad610272cfae956b8a100f0cb7f080b4 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -36,16 +36,16 @@ * @ingroup MORSE_Complex64_t * * MORSE_zunmlq - Overwrites the general complex M-by-N matrix C with - * + * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q * TRANS = 'C': Q**H * C C * Q**H - * + * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors - * + * * Q = H(1) H(2) . . . H(k) - * + * * as returned by MORSE_zgeqrf. Q is of order M if SIDE = MorseLeft * and of order N if SIDE = MorseRight. * @@ -124,7 +124,7 @@ int MORSE_zunmlq(MORSE_enum side, MORSE_enum trans, int M, int N, int K, if (side == MorseLeft) An = M; - else + else An = N; /* Check input arguments */ @@ -148,17 +148,17 @@ int MORSE_zunmlq(MORSE_enum side, MORSE_enum trans, int M, int N, int K, morse_error("MORSE_zunmlq", "illegal value of K"); return -5; } - if (LDA < max(1, K)) { + if (LDA < chameleon_max(1, K)) { morse_error("MORSE_zunmlq", "illegal value of LDA"); return -7; } - if (LDC < max(1, M)) { + if (LDC < chameleon_max(1, M)) { morse_error("MORSE_zunmlq", "illegal value of LDC"); return -10; } /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ - if (min(M, min(N, K)) == 0) + if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ @@ -270,7 +270,7 @@ int MORSE_zunmlq_Tile(MORSE_enum side, MORSE_enum trans, morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); RUNTIME_desc_getoncpu(C); - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; @@ -355,7 +355,7 @@ int MORSE_zunmlq_Tile_Async(MORSE_enum side, MORSE_enum trans, /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ /* - if (min(M, min(N, K)) == 0) + if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { diff --git a/compute/zunmqr.c b/compute/zunmqr.c index b344058cbc57599b6110d8c4c655f31e09ff44f8..709947e87ccbe27cf26f30283026ba39b9aea3d6 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -35,16 +35,16 @@ * @ingroup MORSE_Complex64_t * * MORSE_zunmqr - Overwrites the general complex M-by-N matrix C with - * + * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q * TRANS = 'C': Q**H * C C * Q**H - * + * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors - * + * * Q = H(1) H(2) . . . H(k) - * + * * as returned by MORSE_zgeqrf. Q is of order M if SIDE = MorseLeft * and of order N if SIDE = MorseRight. * @@ -76,7 +76,7 @@ * Details of the QR factorization of the original matrix A as returned by MORSE_zgeqrf. * * @param[in] LDA - * The leading dimension of the array A. + * The leading dimension of the array A. * If side == MorseLeft, LDA >= max(1,M). * If side == MorseRight, LDA >= max(1,N). * @@ -151,17 +151,17 @@ int MORSE_zunmqr(MORSE_enum side, MORSE_enum trans, int M, int N, int K, morse_error("MORSE_zunmqr", "illegal value of K"); return -5; } - if (LDA < max(1, Am)) { + if (LDA < chameleon_max(1, Am)) { morse_error("MORSE_zunmqr", "illegal value of LDA"); return -7; } - if (LDC < max(1, M)) { + if (LDC < chameleon_max(1, M)) { morse_error("MORSE_zunmqr", "illegal value of LDC"); return -10; } /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ - if (min(M, min(N, K)) == 0) + if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; /* Tune NB & IB depending on M, K & N; Set NBNB */ @@ -274,7 +274,7 @@ int MORSE_zunmqr_Tile(MORSE_enum side, MORSE_enum trans, morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); RUNTIME_desc_getoncpu(C); - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; @@ -359,7 +359,7 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ /* - if (min(M, min(N, K)) == 0) + if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { diff --git a/control/context.c b/control/context.c index e39b7b3dfdcea5c648e620a83b9da0e1001786d0..483630782bad68a5b11c50d5a1942653c4c9be77 100644 --- a/control/context.c +++ b/control/context.c @@ -301,7 +301,7 @@ int MORSE_Set(MORSE_enum param, int value) morse_warning("MORSE_Set", "autotuning has been automatically disable\n"); } /* Limit ib to nb */ - morse->ib = min( morse->nb, morse->ib ); + morse->ib = chameleon_min( morse->nb, morse->ib ); break; case MORSE_INNER_BLOCK_SIZE: if (value <= 0) { diff --git a/control/workspace.c b/control/workspace.c index d66577de4821c8be8e1bf73d9fbefd598ea2eb9b..097a26711706660712005db66c7e858a2af827e9 100644 --- a/control/workspace.c +++ b/control/workspace.c @@ -143,7 +143,7 @@ int morse_alloc_ipiv(int M, int N, MORSE_enum func, int type, MORSE_desc_t **des lm = IB * MT; ln = NB * NT; - size = (size_t)(min(MT, NT) * NB * NT * sizeof(int)); + size = (size_t)(chameleon_min(MT, NT) * NB * NT * sizeof(int)); if (size <= 0) { *IPIV = NULL; return MORSE_SUCCESS; diff --git a/coreblas/compute/core_zgeadd.c b/coreblas/compute/core_zgeadd.c index f7714d50ca9006028575daad5abb4f9d2df7fc84..9714410f6e76186deeb8cfc4fc3817c40e80a097 100644 --- a/coreblas/compute/core_zgeadd.c +++ b/coreblas/compute/core_zgeadd.c @@ -109,13 +109,13 @@ int CORE_zgeadd(MORSE_enum trans, int M, int N, coreblas_error(3, "Illegal value of N"); return -3; } - if ( ((trans == MorseNoTrans) && (LDA < max(1,M)) && (M > 0)) || - ((trans != MorseNoTrans) && (LDA < max(1,N)) && (N > 0)) ) + if ( ((trans == MorseNoTrans) && (LDA < chameleon_max(1,M)) && (M > 0)) || + ((trans != MorseNoTrans) && (LDA < chameleon_max(1,N)) && (N > 0)) ) { coreblas_error(6, "Illegal value of LDA"); return -6; } - if ( (LDB < max(1,M)) && (M > 0) ) { + if ( (LDB < chameleon_max(1,M)) && (M > 0) ) { coreblas_error(8, "Illegal value of LDB"); return -8; } diff --git a/coreblas/compute/core_zgelqt.c b/coreblas/compute/core_zgelqt.c index 8268be84117d9f19fc705a0f3c9c288dd0270ace..e0c5b8ff68f821d59694a8134a2c6985e1ed3042 100644 --- a/coreblas/compute/core_zgelqt.c +++ b/coreblas/compute/core_zgelqt.c @@ -114,11 +114,11 @@ int CORE_zgelqt(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA < max(1,M)) && (M > 0)) { + if ((LDA < chameleon_max(1,M)) && (M > 0)) { coreblas_error(5, "Illegal value of LDA"); return -5; } - if ((LDT < max(1,IB)) && (IB > 0)) { + if ((LDT < chameleon_max(1,IB)) && (IB > 0)) { coreblas_error(7, "Illegal value of LDT"); return -7; } @@ -127,10 +127,10 @@ int CORE_zgelqt(int M, int N, int IB, if ((M == 0) || (N == 0) || (IB == 0)) return MORSE_SUCCESS; - k = min(M, N); + k = chameleon_min(M, N); for(i = 0; i < k; i += IB) { - sb = min(IB, k-i); + sb = chameleon_min(IB, k-i); LAPACKE_zgelq2_work(LAPACK_COL_MAJOR, sb, N-i, &A[LDA*i+i], LDA, &TAU[i], WORK); diff --git a/coreblas/compute/core_zgeqrt.c b/coreblas/compute/core_zgeqrt.c index 4feb0c75f3d8d99963c117f5ece303f70378b766..0c984732d78d183a64026c5910d7597d75b53fb2 100644 --- a/coreblas/compute/core_zgeqrt.c +++ b/coreblas/compute/core_zgeqrt.c @@ -115,11 +115,11 @@ int CORE_zgeqrt(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA < max(1,M)) && (M > 0)) { + if ((LDA < chameleon_max(1,M)) && (M > 0)) { coreblas_error(5, "Illegal value of LDA"); return -5; } - if ((LDT < max(1,IB)) && (IB > 0)) { + if ((LDT < chameleon_max(1,IB)) && (IB > 0)) { coreblas_error(7, "Illegal value of LDT"); return -7; } @@ -128,10 +128,10 @@ int CORE_zgeqrt(int M, int N, int IB, if ((M == 0) || (N == 0) || (IB == 0)) return MORSE_SUCCESS; - k = min(M, N); + k = chameleon_min(M, N); for(i = 0; i < k; i += IB) { - sb = min(IB, k-i); + sb = chameleon_min(IB, k-i); LAPACKE_zgeqr2_work(LAPACK_COL_MAJOR, M-i, sb, &A[LDA*i+i], LDA, &TAU[i], WORK); diff --git a/coreblas/compute/core_zgesplit.c b/coreblas/compute/core_zgesplit.c index 6bd4a3a7006605030d257a09ee12a270006b4351..4c62e55c5e2574356e955e86f993612d0ca674c0 100644 --- a/coreblas/compute/core_zgesplit.c +++ b/coreblas/compute/core_zgesplit.c @@ -82,11 +82,11 @@ int CORE_zgesplit(MORSE_enum side, MORSE_enum diag, coreblas_error(2, "Illegal value of N"); return -2; } - if ( (LDA < max(1,M)) && (M > 0) ) { + if ( (LDA < chameleon_max(1,M)) && (M > 0) ) { coreblas_error(5, "Illegal value of LDA"); return -5; } - if ( (LDB < max(1,M)) && (M > 0) ) { + if ( (LDB < chameleon_max(1,M)) && (M > 0) ) { coreblas_error(7, "Illegal value of LDB"); return -7; } @@ -105,5 +105,6 @@ int CORE_zgesplit(MORSE_enum side, MORSE_enum diag, morse_lapack_const(uplo), M, N, 0., 1., A, LDA); + (void)diag; return MORSE_SUCCESS; } diff --git a/coreblas/compute/core_zgessm.c b/coreblas/compute/core_zgessm.c index 14e0a142b51c38270d958e889fb5acfb9efb8ac0..2694092b26bbf45e9ee60ac2411fa0bce4fb2dc7 100644 --- a/coreblas/compute/core_zgessm.c +++ b/coreblas/compute/core_zgessm.c @@ -107,11 +107,11 @@ int CORE_zgessm(int M, int N, int K, int IB, coreblas_error(4, "Illegal value of IB"); return -4; } - if ((LDL < max(1,M)) && (M > 0)) { + if ((LDL < chameleon_max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDL"); return -7; } - if ((LDA < max(1,M)) && (M > 0)) { + if ((LDA < chameleon_max(1,M)) && (M > 0)) { coreblas_error(9, "Illegal value of LDA"); return -9; } @@ -121,7 +121,7 @@ int CORE_zgessm(int M, int N, int K, int IB, return MORSE_SUCCESS; for(i = 0; i < K; i += IB) { - sb = min(IB, K-i); + sb = chameleon_min(IB, K-i); /* * Apply interchanges to columns I*IB+1:IB*( I+1 )+1. */ diff --git a/coreblas/compute/core_zgetf2_nopiv.c b/coreblas/compute/core_zgetf2_nopiv.c index 85990a5fd82b868522c84a9a1cf9c82c4702894f..d2bd6031c7e32f96540a9f7d0a78bd28926ab91f 100644 --- a/coreblas/compute/core_zgetf2_nopiv.c +++ b/coreblas/compute/core_zgetf2_nopiv.c @@ -92,7 +92,7 @@ CORE_zgetf2_nopiv(int M, int N, coreblas_error(2, "Illegal value of N"); return -2; } - if ((LDA < max(1,M)) && (M > 0)) { + if ((LDA < chameleon_max(1,M)) && (M > 0)) { coreblas_error(5, "Illegal value of LDA"); return -5; } @@ -102,7 +102,7 @@ CORE_zgetf2_nopiv(int M, int N, return MORSE_SUCCESS; sfmin = LAPACKE_dlamch_work('S'); - k = min(M, N); + k = chameleon_min(M, N); for(i=0 ; i < k; i++) { alpha = A[i*LDA+i]; if ( alpha != (MORSE_Complex64_t)0.0 ) { diff --git a/coreblas/compute/core_zgetrf_incpiv.c b/coreblas/compute/core_zgetrf_incpiv.c index ab02423ff6bf9c37908745e1cf91079e43694194..a59b84a1dddbd85416e4b09e79ec1df652cbd57f 100644 --- a/coreblas/compute/core_zgetrf_incpiv.c +++ b/coreblas/compute/core_zgetrf_incpiv.c @@ -107,7 +107,7 @@ int CORE_zgetrf_incpiv(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA < max(1,M)) && (M > 0)) { + if ((LDA < chameleon_max(1,M)) && (M > 0)) { coreblas_error(5, "Illegal value of LDA"); return -5; } @@ -116,10 +116,10 @@ int CORE_zgetrf_incpiv(int M, int N, int IB, if ((M == 0) || (N == 0) || (IB == 0)) return MORSE_SUCCESS; - k = min(M, N); + k = chameleon_min(M, N); for(i =0 ; i < k; i += IB) { - sb = min(IB, k-i); + sb = chameleon_min(IB, k-i); /* * Factor diagonal and subdiagonal blocks and test for exact singularity. */ diff --git a/coreblas/compute/core_zgetrf_nopiv.c b/coreblas/compute/core_zgetrf_nopiv.c index 4df186dc9b9cb647dac75b6678aa4396ca7911ed..a7fd6c4994ca6016f1fd229f5d71ff81564781ca 100644 --- a/coreblas/compute/core_zgetrf_nopiv.c +++ b/coreblas/compute/core_zgetrf_nopiv.c @@ -98,7 +98,7 @@ int CORE_zgetrf_nopiv(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA < max(1,M)) && (M > 0)) { + if ((LDA < chameleon_max(1,M)) && (M > 0)) { coreblas_error(5, "Illegal value of LDA"); return -5; } @@ -107,9 +107,9 @@ int CORE_zgetrf_nopiv(int M, int N, int IB, if ((M == 0) || (N == 0) || (IB == 0)) return MORSE_SUCCESS; - k = min(M, N); + k = chameleon_min(M, N); for(i =0 ; i < k; i += IB) { - sb = min(IB, k-i); + sb = chameleon_min(IB, k-i); /* * Factor diagonal and subdiagonal blocks and test for exact singularity. */ diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c index 3f2079c20b711e2a3fea1fd05918cf80448a833c..933e302c109a28d1859413423557866efec0c804 100644 --- a/coreblas/compute/core_zherfb.c +++ b/coreblas/compute/core_zherfb.c @@ -141,15 +141,15 @@ int CORE_zherfb( MORSE_enum uplo, int n, coreblas_error(5, "Illegal value of nb"); return -5; } - if ( (lda < max(1,n)) && (n > 0) ) { + if ( (lda < chameleon_max(1,n)) && (n > 0) ) { coreblas_error(7, "Illegal value of lda"); return -7; } - if ( (ldt < max(1,ib)) && (ib > 0) ) { + if ( (ldt < chameleon_max(1,ib)) && (ib > 0) ) { coreblas_error(9, "Illegal value of ldt"); return -9; } - if ( (ldc < max(1,n)) && (n > 0) ) { + if ( (ldc < chameleon_max(1,n)) && (n > 0) ) { coreblas_error(11, "Illegal value of ldc"); return -11; } diff --git a/coreblas/compute/core_zlantr.c b/coreblas/compute/core_zlantr.c index 376bde3b9f12af56f224f519682f357600f75166..d100feb12bef5b12a14a149be99847f83339cade 100644 --- a/coreblas/compute/core_zlantr.c +++ b/coreblas/compute/core_zlantr.c @@ -108,7 +108,7 @@ void CORE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, int i, j, imax; int idiag = (diag == MorseUnit) ? 1 : 0; - if ( min(M, N) == 0 ) { + if ( chameleon_min(M, N) == 0 ) { *normA = 0; return; } @@ -122,10 +122,10 @@ void CORE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, } if ( uplo == MorseUpper ) { - M = min(M, N); + M = chameleon_min(M, N); for (j = 0; j < N; j++) { tmpA = A+(j*LDA); - imax = min(j+1-idiag, M); + imax = chameleon_min(j+1-idiag, M); for (i = 0; i < imax; i++) { value = cabs( *tmpA ); @@ -134,7 +134,7 @@ void CORE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, } } } else { - N = min(M, N); + N = chameleon_min(M, N); for (j = 0; j < N; j++) { tmpA = A + j * (LDA+1) + idiag; @@ -151,7 +151,7 @@ void CORE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, CORE_ztrasm( MorseColumnwise, uplo, diag, M, N, A, LDA, work ); if ( uplo == MorseLower ) - N = min(M,N); + N = chameleon_min(M,N); *normA = 0; for (i = 0; i < N; i++) { @@ -163,7 +163,7 @@ void CORE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, CORE_ztrasm( MorseRowwise, uplo, diag, M, N, A, LDA, work ); if ( uplo == MorseUpper ) - M = min(M,N); + M = chameleon_min(M,N); *normA = 0; for (i = 0; i < M; i++) { diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c index bc17934d3b06cec9c58fe7dc95456f7d6cd5460c..3fc2101d8213ca3789ecdb4fe4510dafa7930131 100644 --- a/coreblas/compute/core_zlascal.c +++ b/coreblas/compute/core_zlascal.c @@ -72,7 +72,7 @@ CORE_zlascal( MORSE_enum uplo, int m, int n, coreblas_error(3, "Illegal value of n"); return -3; } - if ( (lda < max(1,m)) && (m > 0) ) { + if ( (lda < chameleon_max(1,m)) && (m > 0) ) { coreblas_error(6, "Illegal value of lda"); return -6; } @@ -80,13 +80,13 @@ CORE_zlascal( MORSE_enum uplo, int m, int n, switch ( uplo ) { case MorseUpper: for(i=0; i<n; i++) { - cblas_zscal( min( i+1, m ), CBLAS_SADDR(alpha), A+i*lda, 1 ); + cblas_zscal( chameleon_min( i+1, m ), CBLAS_SADDR(alpha), A+i*lda, 1 ); } break; case MorseLower: for(i=0; i<n; i++) { - cblas_zscal( max( m, m-i ), CBLAS_SADDR(alpha), A+i*lda, 1 ); + cblas_zscal( chameleon_max( m, m-i ), CBLAS_SADDR(alpha), A+i*lda, 1 ); } break; default: diff --git a/coreblas/compute/core_zlatro.c b/coreblas/compute/core_zlatro.c index e7449e0ab8a1861283a60bbc9fc08fcd7dace45a..b8547cb0c185f7ad580675e816c6261f062524b8 100644 --- a/coreblas/compute/core_zlatro.c +++ b/coreblas/compute/core_zlatro.c @@ -111,11 +111,11 @@ int CORE_zlatro(MORSE_enum uplo, MORSE_enum trans, coreblas_error(4, "Illegal value of N"); return -4; } - if ( (LDA < max(1,M)) && (M > 0) ) { + if ( (LDA < chameleon_max(1,M)) && (M > 0) ) { coreblas_error(6, "Illegal value of LDA"); return -6; } - if ( (LDB < max(1,N)) && (N > 0) ) { + if ( (LDB < chameleon_max(1,N)) && (N > 0) ) { coreblas_error(8, "Illegal value of LDB"); return -8; } @@ -127,7 +127,7 @@ int CORE_zlatro(MORSE_enum uplo, MORSE_enum trans, if (trans == MorseConjTrans) { if(uplo == MorseUpper) { for(j=0; j<N; j++) - for(i=0; i<min(j+1,M); i++) + for(i=0; i<chameleon_min(j+1,M); i++) B[j+i*LDB] = conj(A[i+j*LDA]); } else if(uplo == MorseLower) { @@ -144,7 +144,7 @@ int CORE_zlatro(MORSE_enum uplo, MORSE_enum trans, else { if(uplo==MorseUpper) { for(j=0;j<N;j++) - for(i=0;i<min(j+1,M);i++) + for(i=0;i<chameleon_min(j+1,M);i++) B[j+i*LDB] = A[i+j*LDA]; } else if(uplo==MorseLower) { diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c index 32c8782728b7b0c7d0d2ba75d471bd93f595d6c1..32d63ae48b56c26d0b4c6a20b63b83200af05b05 100644 --- a/coreblas/compute/core_zpemv.c +++ b/coreblas/compute/core_zpemv.c @@ -167,11 +167,11 @@ int CORE_zpemv(MORSE_enum trans, int storev, coreblas_error(4, "Illegal value of N"); return -4; } - if (L > min(M ,N)) { + if (L > chameleon_min(M ,N)) { coreblas_error(5, "Illegal value of L"); return -5; } - if (LDA < max(1,M)) { + if (LDA < chameleon_max(1,M)) { coreblas_error(8, "Illegal value of LDA"); return -8; } diff --git a/coreblas/compute/core_zssssm.c b/coreblas/compute/core_zssssm.c index f9a61e1c4f399cb2e346e12d53b6adf68f91a10a..2094c6d25333396fc5c5c67386ec29ea9cc83099 100644 --- a/coreblas/compute/core_zssssm.c +++ b/coreblas/compute/core_zssssm.c @@ -140,19 +140,19 @@ int CORE_zssssm(int M1, int N1, int M2, int N2, int K, int IB, coreblas_error(6, "Illegal value of IB"); return -6; } - if (LDA1 < max(1,M1)) { + if (LDA1 < chameleon_max(1,M1)) { coreblas_error(8, "Illegal value of LDA1"); return -8; } - if (LDA2 < max(1,M2)) { + if (LDA2 < chameleon_max(1,M2)) { coreblas_error(10, "Illegal value of LDA2"); return -10; } - if (LDL1 < max(1,IB)) { + if (LDL1 < chameleon_max(1,IB)) { coreblas_error(12, "Illegal value of LDL1"); return -12; } - if (LDL2 < max(1,M2)) { + if (LDL2 < chameleon_max(1,M2)) { coreblas_error(14, "Illegal value of LDL2"); return -14; } @@ -164,7 +164,7 @@ int CORE_zssssm(int M1, int N1, int M2, int N2, int K, int IB, ip = 0; for(ii = 0; ii < K; ii += IB) { - sb = min(K-ii, IB); + sb = chameleon_min(K-ii, IB); for(i = 0; i < sb; i++) { im = IPIV[ip]-1; diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c index b9addb167660efbf2578a8fcaed907bd6bb4b107..8744ed8a1577ff65b361dc50f312cef47f53dc8d 100644 --- a/coreblas/compute/core_ztpmqrt.c +++ b/coreblas/compute/core_ztpmqrt.c @@ -181,8 +181,8 @@ int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans, } else { //LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK ); - coreblas_error( 3, "Illegal value of L (only 0 or M handled for now)"); - return -3; + coreblas_error( 6, "Illegal value of L (only 0 or M handled for now)"); + return -6; } return MORSE_SUCCESS; diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c index 504951a0c6c5fd942760b21bad9fb94b9c2f6483..c12051a16acad1415cb83791b11553f7e64253a3 100644 --- a/coreblas/compute/core_ztpqrt.c +++ b/coreblas/compute/core_ztpqrt.c @@ -112,7 +112,7 @@ int CORE_ztpqrt( int M, int N, int L, int IB, coreblas_error(2, "Illegal value of N"); return -2; } - if( (L < 0) || ((L > min(M, N)) && (min(M,N) > 0))) { + if( (L < 0) || ((L > chameleon_min(M, N)) && (chameleon_min(M,N) > 0))) { coreblas_error(3, "Illegal value of L"); return -3; } @@ -120,15 +120,15 @@ int CORE_ztpqrt( int M, int N, int L, int IB, coreblas_error(4, "Illegal value of IB"); return -4; } - if ((LDA < max(1,N)) && (N > 0)) { + if ((LDA < chameleon_max(1,N)) && (N > 0)) { coreblas_error(6, "Illegal value of LDA"); return -6; } - if ((LDB < max(1,M)) && (M > 0)) { + if ((LDB < chameleon_max(1,M)) && (M > 0)) { coreblas_error(6, "Illegal value of LDB"); return -8; } - if ((LDT < max(1,IB)) && (IB > 0)) { + if ((LDT < chameleon_max(1,IB)) && (IB > 0)) { coreblas_error(6, "Illegal value of LDT"); return -10; } diff --git a/coreblas/compute/core_ztradd.c b/coreblas/compute/core_ztradd.c index 6334a874231fabfbec63af57f9107510335e8290..6d87ccbb9e88f1498d1a1a76f128b0265236cf48 100644 --- a/coreblas/compute/core_ztradd.c +++ b/coreblas/compute/core_ztradd.c @@ -134,13 +134,13 @@ int CORE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, coreblas_error(4, "Illegal value of N"); return -4; } - if ( ((trans == MorseNoTrans) && (LDA < max(1,M)) && (M > 0)) || - ((trans != MorseNoTrans) && (LDA < max(1,N)) && (N > 0)) ) + if ( ((trans == MorseNoTrans) && (LDA < chameleon_max(1,M)) && (M > 0)) || + ((trans != MorseNoTrans) && (LDA < chameleon_max(1,N)) && (N > 0)) ) { coreblas_error(7, "Illegal value of LDA"); return -7; } - if ( (LDB < max(1,M)) && (M > 0) ) { + if ( (LDB < chameleon_max(1,M)) && (M > 0) ) { coreblas_error(9, "Illegal value of LDB"); return -9; } @@ -189,7 +189,7 @@ int CORE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, #if defined(PRECISION_z) || defined(PRECISION_c) case MorseConjTrans: for (j=0; j<N; j++, A++) { - int mm = min( j+1, M ); + int mm = chameleon_min( j+1, M ); for(i=0; i<mm; i++, B++) { *B = beta * (*B) + alpha * conj(A[LDA*i]); } @@ -200,7 +200,7 @@ int CORE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, case MorseTrans: for (j=0; j<N; j++, A++) { - int mm = min( j+1, M ); + int mm = chameleon_min( j+1, M ); for(i=0; i<mm; i++, B++) { *B = beta * (*B) + alpha * (A[LDA*i]); } @@ -211,7 +211,7 @@ int CORE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, case MorseNoTrans: default: for (j=0; j<N; j++) { - int mm = min( j+1, M ); + int mm = chameleon_min( j+1, M ); for(i=0; i<mm; i++, B++, A++) { *B = beta * (*B) + alpha * (*A); } diff --git a/coreblas/compute/core_ztrasm.c b/coreblas/compute/core_ztrasm.c index dcaf6231a6759cb38bbf1be75a0b6c4d3e2024fb..940458aece604159c295bdb7c19d2db91e49af79 100644 --- a/coreblas/compute/core_ztrasm.c +++ b/coreblas/compute/core_ztrasm.c @@ -84,12 +84,12 @@ void CORE_ztrasm(MORSE_enum storev, MORSE_enum uplo, MORSE_enum diag, * MorseUpper / MorseColumnwise */ if (uplo == MorseUpper ) { - M = min(M, N); + M = chameleon_min(M, N); if (storev == MorseColumnwise) { for (j = 0; j < N; j++) { tmpA = A+(j*lda); - imax = min(j+1-idiag, M); + imax = chameleon_min(j+1-idiag, M); if ( j < M ) work[j] += idiag; @@ -111,7 +111,7 @@ void CORE_ztrasm(MORSE_enum storev, MORSE_enum uplo, MORSE_enum diag, } for (j = 0; j < N; j++) { tmpA = A+(j*lda); - imax = min(j+1-idiag, M); + imax = chameleon_min(j+1-idiag, M); for (i = 0; i < imax; i++) { work[i] += cabs(*tmpA); @@ -120,7 +120,7 @@ void CORE_ztrasm(MORSE_enum storev, MORSE_enum uplo, MORSE_enum diag, } } } else { - N = min(M, N); + N = chameleon_min(M, N); /* * MorseLower / MorseColumnwise diff --git a/coreblas/compute/core_ztrssq.c b/coreblas/compute/core_ztrssq.c index fd0e46a75dc3ba1e0fb399a1063faa875a88352f..154091c38b1e0285b47686d27865a602775edfcf 100644 --- a/coreblas/compute/core_ztrssq.c +++ b/coreblas/compute/core_ztrssq.c @@ -105,16 +105,16 @@ int CORE_ztrssq(MORSE_enum uplo, MORSE_enum diag, int M, int N, double *ptr; if ( diag == MorseUnit ){ - tmp = sqrt( min(M, N) ); + tmp = sqrt( chameleon_min(M, N) ); UPDATE( 1., tmp ); } if (uplo == MorseUpper ) { - M = min(M, N); + M = chameleon_min(M, N); for(j=0; j<N; j++) { ptr = (double*) ( A + j * LDA ); - imax = min(j+1-idiag, M); + imax = chameleon_min(j+1-idiag, M); for(i=0; i<imax; i++, ptr++) { tmp = fabs(*ptr); @@ -129,7 +129,7 @@ int CORE_ztrssq(MORSE_enum uplo, MORSE_enum diag, int M, int N, } } else { - N = min(M, N); + N = chameleon_min(M, N); for(j=0; j<N; j++) { ptr = (double*) ( A + j * (LDA+1) + idiag ); diff --git a/coreblas/compute/core_ztslqt.c b/coreblas/compute/core_ztslqt.c index 14436675df0f29b2357e7f555fb59e2fad2e5606..dd188ad6f90a22c883b795df87a3ce68329bf03e 100644 --- a/coreblas/compute/core_ztslqt.c +++ b/coreblas/compute/core_ztslqt.c @@ -132,7 +132,7 @@ int CORE_ztslqt(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA2 < max(1,M)) && (M > 0)) { + if ((LDA2 < chameleon_max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA2"); return -8; } @@ -142,7 +142,7 @@ int CORE_ztslqt(int M, int N, int IB, return MORSE_SUCCESS; for(ii = 0; ii < M; ii += IB) { - sb = min(M-ii, IB); + sb = chameleon_min(M-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:N ). diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c index 47a4f09c6348a51a108253cd0d289a58a0fad0fc..d12b66707e4e1eab5ba7f158750613b7534cb570 100644 --- a/coreblas/compute/core_ztsmlq.c +++ b/coreblas/compute/core_ztsmlq.c @@ -194,23 +194,23 @@ int CORE_ztsmlq(MORSE_enum side, MORSE_enum trans, coreblas_error(8, "Illegal value of IB"); return -8; } - if (LDA1 < max(1,M1)){ + if (LDA1 < chameleon_max(1,M1)){ coreblas_error(10, "Illegal value of LDA1"); return -10; } - if (LDA2 < max(1,M2)){ + if (LDA2 < chameleon_max(1,M2)){ coreblas_error(12, "Illegal value of LDA2"); return -12; } - if (LDV < max(1,K)){ + if (LDV < chameleon_max(1,K)){ coreblas_error(14, "Illegal value of LDV"); return -14; } - if (LDT < max(1,IB)){ + if (LDT < chameleon_max(1,IB)){ coreblas_error(16, "Illegal value of LDT"); return -16; } - if (LDWORK < max(1,NW)){ + if (LDWORK < chameleon_max(1,NW)){ coreblas_error(18, "Illegal value of LDWORK"); return -18; } @@ -237,7 +237,7 @@ int CORE_ztsmlq(MORSE_enum side, MORSE_enum trans, } for(i = i1; (i > -1) && (i < K); i += i3) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c index 10d4f2d03a98a135c8062c0618996572d8489829..d3473600b4f59945806782f638219c3083ac1495 100644 --- a/coreblas/compute/core_ztsmqr.c +++ b/coreblas/compute/core_ztsmqr.c @@ -196,23 +196,23 @@ int CORE_ztsmqr(MORSE_enum side, MORSE_enum trans, coreblas_error(8, "Illegal value of IB"); return -8; } - if (LDA1 < max(1,M1)){ + if (LDA1 < chameleon_max(1,M1)){ coreblas_error(10, "Illegal value of LDA1"); return -10; } - if (LDA2 < max(1,M2)){ + if (LDA2 < chameleon_max(1,M2)){ coreblas_error(12, "Illegal value of LDA2"); return -12; } - if (LDV < max(1,NQ)){ + if (LDV < chameleon_max(1,NQ)){ coreblas_error(14, "Illegal value of LDV"); return -14; } - if (LDT < max(1,IB)){ + if (LDT < chameleon_max(1,IB)){ coreblas_error(16, "Illegal value of LDT"); return -16; } - if (LDWORK < max(1,NW)){ + if (LDWORK < chameleon_max(1,NW)){ coreblas_error(18, "Illegal value of LDWORK"); return -18; } @@ -232,7 +232,7 @@ int CORE_ztsmqr(MORSE_enum side, MORSE_enum trans, } for(i = i1; (i > -1) && (i < K); i += i3) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/coreblas/compute/core_ztsqrt.c b/coreblas/compute/core_ztsqrt.c index f937018339a41e8ce02c33f8bff8d71a2627d374..9323e5b9178e8496045baaff59c08edabcd932ae 100644 --- a/coreblas/compute/core_ztsqrt.c +++ b/coreblas/compute/core_ztsqrt.c @@ -122,7 +122,7 @@ int CORE_ztsqrt(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA2 < max(1,M)) && (M > 0)) { + if ((LDA2 < chameleon_max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA2"); return -8; } @@ -132,7 +132,7 @@ int CORE_ztsqrt(int M, int N, int IB, return MORSE_SUCCESS; for(ii = 0; ii < N; ii += IB) { - sb = min(N-ii, IB); + sb = chameleon_min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate diff --git a/coreblas/compute/core_ztstrf.c b/coreblas/compute/core_ztstrf.c index 272d7850cea4e3de4361c6c59085ced1594ad48e..c235d33880deee1bf4fcc260cbd94bb646d46328 100644 --- a/coreblas/compute/core_ztstrf.c +++ b/coreblas/compute/core_ztstrf.c @@ -129,15 +129,15 @@ int CORE_ztstrf(int M, int N, int IB, int NB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDU < max(1,NB)) && (NB > 0)) { + if ((LDU < chameleon_max(1,NB)) && (NB > 0)) { coreblas_error(6, "Illegal value of LDU"); return -6; } - if ((LDA < max(1,M)) && (M > 0)) { + if ((LDA < chameleon_max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA"); return -8; } - if ((LDL < max(1,IB)) && (IB > 0)) { + if ((LDL < chameleon_max(1,IB)) && (IB > 0)) { coreblas_error(10, "Illegal value of LDL"); return -10; } @@ -151,7 +151,7 @@ int CORE_ztstrf(int M, int N, int IB, int NB, ip = 0; for (ii = 0; ii < N; ii += IB) { - sb = min(N-ii, IB); + sb = chameleon_min(N-ii, IB); for (i = 0; i < sb; i++) { im = cblas_izamax(M, &A[LDA*(ii+i)], 1); diff --git a/coreblas/compute/core_zttlqt.c b/coreblas/compute/core_zttlqt.c index 9dc6f75f21b8633443056f38b60f5b3ba06aaebc..6a3551aa0aeaf9ba75295406514f2c709a84b62a 100644 --- a/coreblas/compute/core_zttlqt.c +++ b/coreblas/compute/core_zttlqt.c @@ -136,7 +136,7 @@ int CORE_zttlqt(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA2 < max(1,M)) && (M > 0)) { + if ((LDA2 < chameleon_max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } @@ -151,11 +151,11 @@ int CORE_zttlqt(int M, int N, int IB, 0., 0., T, LDT); for(ii = 0; ii < M; ii += IB) { - sb = min(M-ii, IB); + sb = chameleon_min(M-ii, IB); for(i = 0; i < sb; i++) { j = ii + i; mi = sb-i-1; - ni = min( j + 1, N); + ni = chameleon_min( j + 1, N); /* * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ). */ @@ -200,12 +200,12 @@ int CORE_zttlqt(int M, int N, int IB, if (i > 0 ) { - l = min(i, max(0, N-ii)); + l = chameleon_min(i, chameleon_max(0, N-ii)); alpha = -(TAU[j]); CORE_zpemv( MorseNoTrans, MorseRowwise, - i , min(j, N), l, + i , chameleon_min(j, N), l, alpha, &A2[ii], LDA2, &A2[j], LDA2, zzero, &T[LDT*j], 1, @@ -232,8 +232,8 @@ int CORE_zttlqt(int M, int N, int IB, /* Apply Q to the rest of the matrix to the right */ if (M > ii+sb) { mi = M-(ii+sb); - ni = min(ii+sb, N); - l = min(sb, max(0, ni-ii)); + ni = chameleon_min(ii+sb, N); + l = chameleon_min(sb, chameleon_max(0, ni-ii)); CORE_zparfb( MorseRight, MorseNoTrans, MorseForward, MorseRowwise, diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c index f51aabd5c7d8e85b8a355834cf62f03355b18ea4..24d5dfa796c126d4f50a02912ebf3b2b05400be1 100644 --- a/coreblas/compute/core_zttmlq.c +++ b/coreblas/compute/core_zttmlq.c @@ -190,23 +190,23 @@ int CORE_zttmlq(MORSE_enum side, MORSE_enum trans, coreblas_error(8, "Illegal value of IB"); return -8; } - if (LDA1 < max(1,M1)){ + if (LDA1 < chameleon_max(1,M1)){ coreblas_error(10, "Illegal value of LDA1"); return -10; } - if (LDA2 < max(1,M2)){ + if (LDA2 < chameleon_max(1,M2)){ coreblas_error(12, "Illegal value of LDA2"); return -12; } - if (LDV < max(1,NQ)){ + if (LDV < chameleon_max(1,NQ)){ coreblas_error(14, "Illegal value of LDV"); return -14; } - if (LDT < max(1,IB)){ + if (LDT < chameleon_max(1,IB)){ coreblas_error(16, "Illegal value of LDT"); return -16; } - if (LDWORK < max(1,NW)){ + if (LDWORK < chameleon_max(1,NW)){ coreblas_error(18, "Illegal value of LDWORK"); return -18; } @@ -234,18 +234,18 @@ int CORE_zttmlq(MORSE_enum side, MORSE_enum trans, } for (i = i1; (i > -1) && (i < K); i+=i3) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { mi1 = kb; // M1 - i; - mi2 = min(i+kb, M2); - l = min(kb, max(0, M2-i)); + mi2 = chameleon_min(i+kb, M2); + l = chameleon_min(kb, chameleon_max(0, M2-i)); ic = i; } else { ni1 = kb; - ni2 = min(i+kb, N2); - l = min(kb, max(0, N2-i)); + ni2 = chameleon_min(i+kb, N2); + l = chameleon_min(kb, chameleon_max(0, N2-i)); jc = i; } diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c index d718bff75d3d092f4410b28076d06ee25ba26a8d..95b8aa2df576d45ecbc8bb8895dc4327889e31c9 100644 --- a/coreblas/compute/core_zttmqr.c +++ b/coreblas/compute/core_zttmqr.c @@ -190,23 +190,23 @@ int CORE_zttmqr(MORSE_enum side, MORSE_enum trans, coreblas_error(8, "Illegal value of IB"); return -8; } - if (LDA1 < max(1,M1)){ + if (LDA1 < chameleon_max(1,M1)){ coreblas_error(10, "Illegal value of LDA1"); return -10; } - if (LDA2 < max(1,M2)){ + if (LDA2 < chameleon_max(1,M2)){ coreblas_error(12, "Illegal value of LDA2"); return -12; } - if (LDV < max(1,NQ)){ + if (LDV < chameleon_max(1,NQ)){ coreblas_error(14, "Illegal value of LDV"); return -14; } - if (LDT < max(1,IB)){ + if (LDT < chameleon_max(1,IB)){ coreblas_error(16, "Illegal value of LDT"); return -16; } - if (LDWORK < max(1,NW)){ + if (LDWORK < chameleon_max(1,NW)){ coreblas_error(18, "Illegal value of LDWORK"); return -18; } @@ -226,18 +226,18 @@ int CORE_zttmqr(MORSE_enum side, MORSE_enum trans, } for (i = i1; (i > -1) && (i < K); i+=i3) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { mi1 = kb; - mi2 = min(i+kb, M2); - l = min(kb, max(0, M2-i)); + mi2 = chameleon_min(i+kb, M2); + l = chameleon_min(kb, chameleon_max(0, M2-i)); ic = i; } else { ni1 = kb; - ni2 = min(i+kb, N2); - l = min(kb, max(0, N2-i)); + ni2 = chameleon_min(i+kb, N2); + l = chameleon_min(kb, chameleon_max(0, N2-i)); jc = i; } diff --git a/coreblas/compute/core_zttqrt.c b/coreblas/compute/core_zttqrt.c index f98349d08877e7800b790960070495e282c8dc33..5aa323be9aa71dc5c7002675d90c62dedab06a4b 100644 --- a/coreblas/compute/core_zttqrt.c +++ b/coreblas/compute/core_zttqrt.c @@ -134,7 +134,7 @@ int CORE_zttqrt(int M, int N, int IB, coreblas_error(3, "Illegal value of IB"); return -3; } - if ((LDA2 < max(1,M)) && (M > 0)) { + if ((LDA2 < chameleon_max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } @@ -149,10 +149,10 @@ int CORE_zttqrt(int M, int N, int IB, 0., 0., T, LDT); for (ii = 0; ii < N; ii += IB) { - sb = min(N-ii, IB); + sb = chameleon_min(N-ii, IB); for (i = 0; i < sb; i++) { j = ii + i; - mi = min( j + 1, M ); + mi = chameleon_min( j + 1, M ); ni = sb-i-1; /* @@ -206,12 +206,12 @@ int CORE_zttqrt(int M, int N, int IB, if ( i > 0 ) { - l = min(i, max(0, M-ii)); + l = chameleon_min(i, chameleon_max(0, M-ii)); alpha = -(TAU[j]); CORE_zpemv( MorseConjTrans, MorseColumnwise, - min(j, M), i, l, + chameleon_min(j, M), i, l, alpha, &A2[LDA2*ii], LDA2, &A2[LDA2*j], 1, zzero, &T[LDT*j], 1, @@ -232,9 +232,9 @@ int CORE_zttqrt(int M, int N, int IB, /* Apply Q' to the rest of the matrix to the left */ if (N > ii+sb) { - mi = min(ii+sb, M); + mi = chameleon_min(ii+sb, M); ni = N-(ii+sb); - l = min(sb, max(0, mi-ii)); + l = chameleon_min(sb, chameleon_max(0, mi-ii)); CORE_zparfb( MorseLeft, MorseConjTrans, MorseForward, MorseColumnwise, diff --git a/coreblas/compute/core_zunmlq.c b/coreblas/compute/core_zunmlq.c index 26b3e52e62145af00e3d02a1172923b0c0dbfe12..5f222e5fc9ab1eb9c8fcc7d6f13b27edd6625e68 100644 --- a/coreblas/compute/core_zunmlq.c +++ b/coreblas/compute/core_zunmlq.c @@ -168,15 +168,15 @@ int CORE_zunmlq(MORSE_enum side, MORSE_enum trans, coreblas_error(6, "Illegal value of IB"); return -6; } - if ((LDA < max(1,K)) && (K > 0)) { + if ((LDA < chameleon_max(1,K)) && (K > 0)) { coreblas_error(8, "Illegal value of LDA"); return -8; } - if ((LDC < max(1,M)) && (M > 0)) { + if ((LDC < chameleon_max(1,M)) && (M > 0)) { coreblas_error(12, "Illegal value of LDC"); return -12; } - if ((LDWORK < max(1,nw)) && (nw > 0)) { + if ((LDWORK < chameleon_max(1,nw)) && (nw > 0)) { coreblas_error(14, "Illegal value of LDWORK"); return -14; } @@ -203,7 +203,7 @@ int CORE_zunmlq(MORSE_enum side, MORSE_enum trans, } for(i = i1; (i >- 1) && (i < K); i+=i3 ) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/coreblas/compute/core_zunmqr.c b/coreblas/compute/core_zunmqr.c index fb8f8d5bfe24dab1be3a37eda9adb6f0eb197809..6e864baf4bc3235b521639770a00d41e73871568 100644 --- a/coreblas/compute/core_zunmqr.c +++ b/coreblas/compute/core_zunmqr.c @@ -168,15 +168,15 @@ int CORE_zunmqr(MORSE_enum side, MORSE_enum trans, coreblas_error(6, "Illegal value of IB"); return -6; } - if ((LDA < max(1,nq)) && (nq > 0)) { + if ((LDA < chameleon_max(1,nq)) && (nq > 0)) { coreblas_error(8, "Illegal value of LDA"); return -8; } - if ((LDC < max(1,M)) && (M > 0)) { + if ((LDC < chameleon_max(1,M)) && (M > 0)) { coreblas_error(12, "Illegal value of LDC"); return -12; } - if ((LDWORK < max(1,nw)) && (nw > 0)) { + if ((LDWORK < chameleon_max(1,nw)) && (nw > 0)) { coreblas_error(14, "Illegal value of LDWORK"); return -14; } @@ -196,7 +196,7 @@ int CORE_zunmqr(MORSE_enum side, MORSE_enum trans, } for(i = i1; (i >- 1) && (i < K); i+=i3 ) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/coreblas/include/coreblas.h b/coreblas/include/coreblas.h index 7ea284ac97a6256cb3203439a40bd73c276b1c92..f67d51a969cee3221da0ff62303d7ab2e939d595 100644 --- a/coreblas/include/coreblas.h +++ b/coreblas/include/coreblas.h @@ -76,19 +76,6 @@ extern "C" { #define CBLAS_DIAG enum CBLAS_DIAG #define CBLAS_SIDE enum CBLAS_SIDE -/******************************************************************************* - * Global utilities - **/ -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif -#ifndef roundup -#define roundup(a, b) (b <= 0) ? (a) : (((a) + (b)-1) & ~((b)-1)) -#endif - /** **************************************************************************** * LAPACK Constants **/ diff --git a/cudablas/compute/CMakeLists.txt b/cudablas/compute/CMakeLists.txt index 3f97d72ddca9ce073357d099d0f308cd602386f8..40686c92fb785f93f8b7ec6de0f545e4759ec770 100644 --- a/cudablas/compute/CMakeLists.txt +++ b/cudablas/compute/CMakeLists.txt @@ -38,6 +38,7 @@ set(ZSRC cuda_zsymm.c cuda_zsyr2k.c cuda_zsyrk.c + cuda_ztpmqrt.c cuda_ztrmm.c cuda_ztrsm.c cuda_ztsmlq.c diff --git a/cudablas/compute/cuda_zgelqt.c b/cudablas/compute/cuda_zgelqt.c index d5b8bdb03366b5a81978089714463fd3f9ca0487..411251c954b08e3daea146474c0a9ae13ad2d00c 100644 --- a/cudablas/compute/cuda_zgelqt.c +++ b/cudablas/compute/cuda_zgelqt.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) int CUDA_zgelqt( @@ -50,11 +51,11 @@ int CUDA_zgelqt( return -1; } else if (n < 0) { return -2; - } else if (ldda < max(1,m)) { + } else if (ldda < chameleon_max(1,m)) { return -4; } - k = min(m,n); + k = chameleon_min(m,n); if (k == 0) { hwork[0] = *((magmaDoubleComplex*) &one); return MAGMA_SUCCESS; @@ -69,7 +70,7 @@ int CUDA_zgelqt( old_i = 0; old_ib = nb; for (i = 0; i < k-nb; i += nb) { - ib = min(k-i, nb); + ib = chameleon_min(k-i, nb); cols = n-i; magma_zgetmatrix_async( ib, cols, da_ref(i,i), ldda, @@ -101,7 +102,7 @@ int CUDA_zgelqt( /* put 0s in the lower triangular part of a panel (and 1s on the diagonal); copy the lower triangular in d */ - CORE_zgesplit(MorseRight, MorseUnit, ib, min(ib,cols), + CORE_zgesplit(MorseRight, MorseUnit, ib, chameleon_min(ib,cols), (double _Complex*) v_ref(0,i), ib, (double _Complex*) d, ib); diff --git a/cudablas/compute/cuda_zgemerge.c b/cudablas/compute/cuda_zgemerge.c index 385cf4683d5f435b60e78c83eeb429053dca228b..6f574b3e0e8b8efd5ef0fc2e706aee2432f5a61c 100644 --- a/cudablas/compute/cuda_zgemerge.c +++ b/cudablas/compute/cuda_zgemerge.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zgemerge( MORSE_enum side, MORSE_enum diag, @@ -31,7 +32,7 @@ CUDA_zgemerge( MORSE_enum side, MORSE_enum diag, cuDoubleComplex *B, int LDB, CUBLAS_STREAM_PARAM) { - int i, j; + int i; cuDoubleComplex *cola, *colb; if (M < 0) { @@ -40,10 +41,10 @@ CUDA_zgemerge( MORSE_enum side, MORSE_enum diag, if (N < 0) { return -2; } - if ( (LDA < max(1,M)) && (M > 0) ) { + if ( (LDA < chameleon_max(1,M)) && (M > 0) ) { return -5; } - if ( (LDB < max(1,M)) && (M > 0) ) { + if ( (LDB < chameleon_max(1,M)) && (M > 0) ) { return -7; } @@ -67,5 +68,6 @@ CUDA_zgemerge( MORSE_enum side, MORSE_enum diag, } } + (void)diag; return MORSE_SUCCESS; } diff --git a/cudablas/compute/cuda_zgemm.c b/cudablas/compute/cuda_zgemm.c index 4f2102a8421ad6d9010901d9a3401bdfa30f44d7..0c6a61b4f43a214eeaa83a3b2f4c356c22207adb 100644 --- a/cudablas/compute/cuda_zgemm.c +++ b/cudablas/compute/cuda_zgemm.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zgemm(MORSE_enum transa, MORSE_enum transb, int m, int n, int k, @@ -33,6 +34,7 @@ int CUDA_zgemm(MORSE_enum transa, MORSE_enum transb, cuDoubleComplex *C, int ldc, CUBLAS_STREAM_PARAM) { + #if !defined(CHAMELEON_USE_CUBLAS_V2) cublasSetKernelStream( stream ); #endif diff --git a/cudablas/compute/cuda_zgeqrt.c b/cudablas/compute/cuda_zgeqrt.c index 6ffa3e3b553c5fb6a5bfa8b3df58c742c08ee587..f7cb69ab82eb188869c2d4cab03e8b0cb0920dce 100644 --- a/cudablas/compute/cuda_zgeqrt.c +++ b/cudablas/compute/cuda_zgeqrt.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) int CUDA_zgeqrt( @@ -50,11 +51,11 @@ int CUDA_zgeqrt( return -1; } else if (n < 0) { return -2; - } else if (ldda < max(1,m)) { + } else if (ldda < chameleon_max(1,m)) { return -4; } - k = min(m,n); + k = chameleon_min(m,n); if (k == 0) { hwork[0] = *((magmaDoubleComplex*) &one); return MAGMA_SUCCESS; @@ -69,7 +70,7 @@ int CUDA_zgeqrt( old_i = 0; old_ib = nb; for (i = 0; i < k-nb; i += nb) { - ib = min(k-i, nb); + ib = chameleon_min(k-i, nb); rows = m -i; magma_zgetmatrix_async( rows, ib, da_ref(i,i), ldda, @@ -101,7 +102,7 @@ int CUDA_zgeqrt( /* Put 0s in the upper triangular part of a panel (and 1s on the diagonal); copy the upper triangular in d. */ - CORE_zgesplit(MorseLeft, MorseUnit, min(rows,ib), ib, + CORE_zgesplit(MorseLeft, MorseUnit, chameleon_min(rows,ib), ib, (double _Complex*) v_ref(i, 0), ldv, (double _Complex*) d, ib); diff --git a/cudablas/compute/cuda_zgessm.c b/cudablas/compute/cuda_zgessm.c index 70807372465220e825b39d28f34824d0d5a7d0e6..93b7065348fad1961b82fb0d33ea86257664d7ef 100644 --- a/cudablas/compute/cuda_zgessm.c +++ b/cudablas/compute/cuda_zgessm.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) #if defined(HAVE_MAGMA_GETRF_INCPIV_GPU) diff --git a/cudablas/compute/cuda_zgetrf.c b/cudablas/compute/cuda_zgetrf.c index 6f9caac92daffa5ca0765c7ff80d22447f4452bc..8da4fe5e3333332b99890f9525c16fa6c040de54 100644 --- a/cudablas/compute/cuda_zgetrf.c +++ b/cudablas/compute/cuda_zgetrf.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) #if defined(HAVE_MAGMA_GETRF_INCPIV_GPU) diff --git a/cudablas/compute/cuda_zhemm.c b/cudablas/compute/cuda_zhemm.c index 0e70070b980d6b45a5286be1c835e06cacc74ce5..8ed742362c32c0c6d0e2891510c333efb7cf431d 100644 --- a/cudablas/compute/cuda_zhemm.c +++ b/cudablas/compute/cuda_zhemm.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zhemm(MORSE_enum side, MORSE_enum uplo, int m, int n, diff --git a/cudablas/compute/cuda_zher2k.c b/cudablas/compute/cuda_zher2k.c index b810f83aebad6f7833f96466dd80c9bc6c08e5b8..5b340abd58953d91f2d8a60965ab935173207972 100644 --- a/cudablas/compute/cuda_zher2k.c +++ b/cudablas/compute/cuda_zher2k.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zher2k(MORSE_enum uplo, MORSE_enum trans, int n, int k, diff --git a/cudablas/compute/cuda_zherfb.c b/cudablas/compute/cuda_zherfb.c new file mode 100644 index 0000000000000000000000000000000000000000..4563d938864b54b4ec0bec0fb9d7c07f0c806790 --- /dev/null +++ b/cudablas/compute/cuda_zherfb.c @@ -0,0 +1,92 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file cuda_zherfb.c + * + * MORSE cudablas kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver, + * and INRIA Bordeaux Sud-Ouest + * + * @author Florent Pruvost + * @date 2015-09-16 + * @precisions normal z -> c d s + * + **/ +#include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" + +int +CUDA_zherfb( MORSE_enum uplo, int n, + int k, int ib, int nb, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *T, int ldt, + cuDoubleComplex *C, int ldc, + cuDoubleComplex *WORK, int ldwork, + CUBLAS_STREAM_PARAM ) +{ + /* Check input arguments */ + if ((uplo != MorseUpper) && (uplo != MorseLower)) { + cudablas_error(1, "Illegal value of uplo"); + return -1; + } + if (n < 0) { + cudablas_error(2, "Illegal value of n"); + return -2; + } + if (k < 0) { + cudablas_error(3, "Illegal value of k"); + return -3; + } + if (ib < 0) { + cudablas_error(4, "Illegal value of ib"); + return -4; + } + if (nb < 0) { + cudablas_error(5, "Illegal value of nb"); + return -5; + } + if ( (lda < chameleon_max(1,n)) && (n > 0) ) { + cudablas_error(7, "Illegal value of lda"); + return -7; + } + if ( (ldt < chameleon_max(1,ib)) && (ib > 0) ) { + cudablas_error(9, "Illegal value of ldt"); + return -9; + } + if ( (ldc < chameleon_max(1,n)) && (n > 0) ) { + cudablas_error(11, "Illegal value of ldc"); + return -11; + } + + if (uplo == MorseLower) { + /* Left */ + CUDA_zunmqrt( MorseLeft, MorseConjTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + /* Right */ + CUDA_zunmqrt( MorseRight, MorseNoTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + } + else { + /* Right */ + CUDA_zunmlqt( MorseRight, MorseConjTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + /* Left */ + CUDA_zunmlqt( MorseLeft, MorseNoTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + } + return 0; +} diff --git a/cudablas/compute/cuda_zherk.c b/cudablas/compute/cuda_zherk.c index d5af14ecfec914935441e405765de7902accf6c3..39717408795f1dbd70076d4ccf81b7867888489f 100644 --- a/cudablas/compute/cuda_zherk.c +++ b/cudablas/compute/cuda_zherk.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zherk( MORSE_enum uplo, MORSE_enum trans, int n, int k, diff --git a/cudablas/compute/cuda_zlarfb.c b/cudablas/compute/cuda_zlarfb.c index a85475858ec48bbefd2f74212b1c34ee275db658..c35b555861e140400f223dc7a5d460a75286def8 100644 --- a/cudablas/compute/cuda_zlarfb.c +++ b/cudablas/compute/cuda_zlarfb.c @@ -25,6 +25,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, @@ -46,10 +47,11 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, double mzone = -1.0; #endif /* defined(PRECISION_z) || defined(PRECISION_c) */ - int j; MORSE_enum transT, uplo, notransV, transV; - CUBLAS_GET_STREAM; +#if !defined(CHAMELEON_USE_CUBLAS_V2) + cublasSetKernelStream( stream ); +#endif /* Check input arguments */ if ((side != MorseLeft) && (side != MorseRight)) { diff --git a/cudablas/compute/cuda_zlauum.c b/cudablas/compute/cuda_zlauum.c index 8dce340ad8849659eee4b3aa21eb2dc2b615d1ce..dda1468b6166dc10eefca1a5085086472d65d7aa 100644 --- a/cudablas/compute/cuda_zlauum.c +++ b/cudablas/compute/cuda_zlauum.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) int CUDA_zlauum( diff --git a/cudablas/compute/cuda_zparfb.c b/cudablas/compute/cuda_zparfb.c index 23c357a510a12b419c1cac098f31eb20ddc39215..83e0b393aedc1488a37d5a7609f75f52b204924f 100644 --- a/cudablas/compute/cuda_zparfb.c +++ b/cudablas/compute/cuda_zparfb.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zparfb(MORSE_enum side, MORSE_enum trans, @@ -264,5 +265,6 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, return MORSE_ERR_NOT_SUPPORTED; } + (void)L; return MORSE_SUCCESS; } diff --git a/cudablas/compute/cuda_zpotrf.c b/cudablas/compute/cuda_zpotrf.c index 38e29313ffa0be31a6fc8a14af446b97ef8b81eb..cdd6f4cf1ad9faa19d3fa79bedae62556a0703f2 100644 --- a/cudablas/compute/cuda_zpotrf.c +++ b/cudablas/compute/cuda_zpotrf.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) int CUDA_zpotrf( diff --git a/cudablas/compute/cuda_zssssm.c b/cudablas/compute/cuda_zssssm.c index 4c21b22deab92b32056ac22bcddbcd7e2ad6a2a6..3503425397998a8922a686ccdcaa5ce82ecb03eb 100644 --- a/cudablas/compute/cuda_zssssm.c +++ b/cudablas/compute/cuda_zssssm.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) #if defined(HAVE_MAGMA_GETRF_INCPIV_GPU) diff --git a/cudablas/compute/cuda_zsymm.c b/cudablas/compute/cuda_zsymm.c index 4d93b289ee5609171b927078440bb85f3dca10d9..93ec3f386942d33cee4250721be2a794679e581a 100644 --- a/cudablas/compute/cuda_zsymm.c +++ b/cudablas/compute/cuda_zsymm.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zsymm(MORSE_enum side, MORSE_enum uplo, int m, int n, diff --git a/cudablas/compute/cuda_zsyr2k.c b/cudablas/compute/cuda_zsyr2k.c index b9782a4e1e45532947abea775b1e53f2a1111945..839ea2d68774c3a24f746048f2d9737d53640b85 100644 --- a/cudablas/compute/cuda_zsyr2k.c +++ b/cudablas/compute/cuda_zsyr2k.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zsyr2k( MORSE_enum uplo, MORSE_enum trans, diff --git a/cudablas/compute/cuda_zsyrk.c b/cudablas/compute/cuda_zsyrk.c index 24c6c3baee68a63b0fa250e80328126ecfb6770e..e988fbc41d9f56abd0cc73f8d42190fdea81bef0 100644 --- a/cudablas/compute/cuda_zsyrk.c +++ b/cudablas/compute/cuda_zsyrk.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zsyrk(MORSE_enum uplo, MORSE_enum trans, int n, int k, diff --git a/cudablas/compute/cuda_ztpmqrt.c b/cudablas/compute/cuda_ztpmqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..b9f6afac4cabd9a66c017c29e16abad91e7a2b97 --- /dev/null +++ b/cudablas/compute/cuda_ztpmqrt.c @@ -0,0 +1,83 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file cuda_ztpmqrt.c + * + * MORSE cudablas kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver, + * and INRIA Bordeaux Sud-Ouest + * + * @author Florent Pruvost + * @date 2015-09-16 + * @precisions normal z -> c d s + * + **/ +#include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" + +int +CUDA_ztpmqrt( MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int IB, + const cuDoubleComplex *V, int LDV, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *A, int LDA, + cuDoubleComplex *B, int LDB, + cuDoubleComplex *WORK, + CUBLAS_STREAM_PARAM ) +{ + int m1, n1, ldwork, ldworkc, ws; + + /* Check input arguments */ + if ((side != MorseLeft) && (side != MorseRight)) { + cudablas_error(1, "Illegal value of side"); + return -1; + } + + if ( side == MorseLeft ) { + m1 = K; + n1 = N; + ldwork = IB; + ldworkc = M; + ws = K * n1; + } + else { + m1 = M; + n1 = K; + ldwork = m1; + ldworkc = IB; + ws = m1 * K; + } + + /* TS case */ + if (L == 0) { + CUDA_ztsmqr( side, trans, m1, n1, M, N, K, IB, + A, LDA, B, LDB, V, LDV, T, LDT, + WORK, ldwork, WORK + ws, ldworkc, + CUBLAS_STREAM_VALUE ); + } + /* TT case */ + else if( L == M ) { + cudablas_error(-6, "TTMQRT not available on GPU yet\n" ); + return -6; + /* CUDA_zttmqr( side, trans, m1, n1, M, N, K, IB, */ + /* A, LDA, B, LDB, V, LDV, T, LDT, */ + /* WORK, ldwork ); */ + } + else { + cudablas_error(-6, "TPMQRT not available on GPU yet\n" ); + return -6; + //LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK ); + } + + return MORSE_SUCCESS; +} diff --git a/cudablas/compute/cuda_ztrmm.c b/cudablas/compute/cuda_ztrmm.c index a7c23a453918615f79ec523e5ce00f2abaea8c11..d86fa5267fe483ab5517d393aa93913f1bf7d512 100644 --- a/cudablas/compute/cuda_ztrmm.c +++ b/cudablas/compute/cuda_ztrmm.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_ztrmm( MORSE_enum side, MORSE_enum uplo, diff --git a/cudablas/compute/cuda_ztrsm.c b/cudablas/compute/cuda_ztrsm.c index d4ef2cab0545cbc6a4e229e1fa88ddf1587cb5d9..d82766bc8e7710a3eb65aac2e2ae57e3427bf9a6 100644 --- a/cudablas/compute/cuda_ztrsm.c +++ b/cudablas/compute/cuda_ztrsm.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_ztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transa, MORSE_enum diag, diff --git a/cudablas/compute/cuda_ztrtri.c b/cudablas/compute/cuda_ztrtri.c index 7fea4e490f1d84e977708c6b04c9dcd433bd095d..02243a938e653b87cd81aea958c24f2f1d8d0d57 100644 --- a/cudablas/compute/cuda_ztrtri.c +++ b/cudablas/compute/cuda_ztrtri.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) int CUDA_ztrtri( diff --git a/cudablas/compute/cuda_ztslqt.c b/cudablas/compute/cuda_ztslqt.c index 2abdfd9a48c44ac9d1c900b89b099168369967bd..27c0228f006e732813820a2418f02b7db8945bbe 100644 --- a/cudablas/compute/cuda_ztslqt.c +++ b/cudablas/compute/cuda_ztslqt.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) && 0 int CUDA_ztslqt( @@ -54,11 +55,11 @@ int CUDA_ztslqt( return -1; } else if (n < 0) { return -2; - } else if (ldda2 < max(1,m)) { + } else if (ldda2 < chameleon_max(1,m)) { return -4; } - k = min(m,n); + k = chameleon_min(m,n); if (k == 0) { hwork[0] = *((magmaDoubleComplex*) &one); return MAGMA_SUCCESS; @@ -70,7 +71,7 @@ int CUDA_ztslqt( memset(t, 0, nb*n*sizeof(magmaDoubleComplex)); cudaMemset(dt, 0, nb*n*sizeof(magmaDoubleComplex)); - //k = min(m, nb); // m can be lower than IB + //k = chameleon_min(m, nb); // m can be lower than IB /* copy the first diag tile of A1 from device to host: da1 -> d */ cublasGetMatrix(nb, nb, sizeof(magmaDoubleComplex), da1_ref(0, 0), ldda1, @@ -84,7 +85,7 @@ int CUDA_ztslqt( /* This is only blocked code for now */ for (i = 0; i < m; i += nb) { - ib = min(m-i, nb); + ib = chameleon_min(m-i, nb); cols = n; /* Send the next panel (diagonal block of A1 & block column of A2) diff --git a/cudablas/compute/cuda_ztsmlq.c b/cudablas/compute/cuda_ztsmlq.c index d33e537c61ea7efa9231b083a23b205a4f7c460c..1b2980a344c716afb04a63f07c51c903738cb7a0 100644 --- a/cudablas/compute/cuda_ztsmlq.c +++ b/cudablas/compute/cuda_ztsmlq.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_ztsmlq( MORSE_enum side, MORSE_enum trans, @@ -83,19 +84,19 @@ int CUDA_ztsmlq( if (IB < 0) { return -8; } - if (LDA1 < max(1,M1)){ + if (LDA1 < chameleon_max(1,M1)){ return -10; } - if (LDA2 < max(1,M2)){ + if (LDA2 < chameleon_max(1,M2)){ return -12; } - if (LDV < max(1,K)){ + if (LDV < chameleon_max(1,K)){ return -14; } - if (LDT < max(1,IB)){ + if (LDT < chameleon_max(1,IB)){ return -16; } - if (LDWORK < max(1,NW)){ + if (LDWORK < chameleon_max(1,NW)){ return -18; } @@ -121,7 +122,7 @@ int CUDA_ztsmlq( } for(i = i1; (i > -1) && (i < K); i += i3) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/cudablas/compute/cuda_ztsmqr.c b/cudablas/compute/cuda_ztsmqr.c index e104a085fcccd05a3f4b6746ec98f6713f97025d..678586823674cda7e92cbde6c44977ceb07158e7 100644 --- a/cudablas/compute/cuda_ztsmqr.c +++ b/cudablas/compute/cuda_ztsmqr.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_ztsmqr( MORSE_enum side, MORSE_enum trans, @@ -85,19 +86,19 @@ int CUDA_ztsmqr( if (IB < 0) { return -8; } - if (LDA1 < max(1,M1)){ + if (LDA1 < chameleon_max(1,M1)){ return -10; } - if (LDA2 < max(1,M2)){ + if (LDA2 < chameleon_max(1,M2)){ return -12; } - if (LDV < max(1,NQ)){ + if (LDV < chameleon_max(1,NQ)){ return -14; } - if (LDT < max(1,IB)){ + if (LDT < chameleon_max(1,IB)){ return -16; } - if (LDWORK < max(1,NW)){ + if (LDWORK < chameleon_max(1,NW)){ return -18; } @@ -116,7 +117,7 @@ int CUDA_ztsmqr( } for(i = i1; (i > -1) && (i < K); i += i3) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/cudablas/compute/cuda_ztsqrt.c b/cudablas/compute/cuda_ztsqrt.c index 1ac350eb55b359099df263929e5ccf2d92dc598a..4f11d800fbe12b997f72c4ba85633ca301498419 100644 --- a/cudablas/compute/cuda_ztsqrt.c +++ b/cudablas/compute/cuda_ztsqrt.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) int CUDA_ztsqrt( @@ -56,11 +57,11 @@ int CUDA_ztsqrt( return -1; } else if (n < 0) { return -2; - } else if (ldda2 < max(1,m)) { + } else if (ldda2 < chameleon_max(1,m)) { return -4; } - k = min(m,n); + k = chameleon_min(m,n); if (k == 0) { hwork[0] = *((magmaDoubleComplex*) &one); return MAGMA_SUCCESS; @@ -91,7 +92,7 @@ int CUDA_ztsqrt( /* This is only blocked code for now */ for (i = 0; i < n; i += nb) { - ib = min(n-i, nb); + ib = chameleon_min(n-i, nb); rows = m; /* Send the next panel (diagonal block of A1 & block column of A2) diff --git a/cudablas/compute/cuda_ztstrf.c b/cudablas/compute/cuda_ztstrf.c index c7d85356a95b222fb845cc786825f2dc245afb10..5780ff639af07fb1ddcfb1ddb09f889054e2c840 100644 --- a/cudablas/compute/cuda_ztstrf.c +++ b/cudablas/compute/cuda_ztstrf.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" #if defined(CHAMELEON_USE_MAGMA) && 0 int CUDA_ztstrf( diff --git a/cudablas/compute/cuda_zunmlqt.c b/cudablas/compute/cuda_zunmlqt.c index c9a15473376f72c6d9a0dcee40b2da06e29adf5e..c7fc245a94e6b479e25211e81c90ad1b80b2c22a 100644 --- a/cudablas/compute/cuda_zunmlqt.c +++ b/cudablas/compute/cuda_zunmlqt.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans, @@ -72,13 +73,13 @@ CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans, if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) { return -6; } - if ((LDA < max(1,K)) && (K > 0)) { + if ((LDA < chameleon_max(1,K)) && (K > 0)) { return -8; } - if ((LDC < max(1,M)) && (M > 0)) { + if ((LDC < chameleon_max(1,M)) && (M > 0)) { return -12; } - if ((LDWORK < max(1,nw)) && (nw > 0)) { + if ((LDWORK < chameleon_max(1,nw)) && (nw > 0)) { return -14; } @@ -104,7 +105,7 @@ CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans, } for(i = i1; (i >- 1) && (i < K); i+=i3 ) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/cudablas/compute/cuda_zunmqrt.c b/cudablas/compute/cuda_zunmqrt.c index 6032cabc4bb04e58a637370078abc19509374433..f61f7714c193e5e64b43902c665e9009f1361090 100644 --- a/cudablas/compute/cuda_zunmqrt.c +++ b/cudablas/compute/cuda_zunmqrt.c @@ -23,6 +23,7 @@ * **/ #include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" int CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans, @@ -72,13 +73,13 @@ CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans, if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) { return -6; } - if ((LDA < max(1,nq)) && (nq > 0)) { + if ((LDA < chameleon_max(1,nq)) && (nq > 0)) { return -8; } - if ((LDC < max(1,M)) && (M > 0)) { + if ((LDC < chameleon_max(1,M)) && (M > 0)) { return -12; } - if ((LDWORK < max(1,nw)) && (nw > 0)) { + if ((LDWORK < chameleon_max(1,nw)) && (nw > 0)) { return -14; } @@ -97,7 +98,7 @@ CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans, } for(i = i1; (i >- 1) && (i < K); i+=i3 ) { - kb = min(IB, K-i); + kb = chameleon_min(IB, K-i); if (side == MorseLeft) { /* diff --git a/cudablas/include/cudablas.h b/cudablas/include/cudablas.h index 7032bf0c6c16c80e6d3f162a3da14a5d38cf7383..8ff43b50726cbb2e6f67e9996cebe4e944934543 100644 --- a/cudablas/include/cudablas.h +++ b/cudablas/include/cudablas.h @@ -83,18 +83,10 @@ #include "cudablas/include/cudablas_c.h" #include "cudablas/include/cudablas_s.h" -/******************************************************************************* - * Global utilities +/** **************************************************************************** + * Coreblas Error **/ -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif -#ifndef roundup -#define roundup(a, b) (b <= 0) ? (a) : (((a) + (b)-1) & ~((b)-1)) -#endif +#define cudablas_error(k, str) fprintf(stderr, "%s: Parameter %d / %s\n", __func__, k, str) /** **************************************************************************** * LAPACK Constants diff --git a/cudablas/include/cudablas_z.h b/cudablas/include/cudablas_z.h index 0ee705938a2a6cd88e52e212275983d2b254ec09..e7a0bf44a700c0e84498c29653fa8d3a5c2648c8 100644 --- a/cudablas/include/cudablas_z.h +++ b/cudablas/include/cudablas_z.h @@ -45,6 +45,7 @@ int CUDA_zparfb(MORSE_enum side, MORSE_enum trans, MORSE_enum direct, MORSE_enum int CUDA_zsymm( MORSE_enum side, MORSE_enum uplo, int m, int n, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, cuDoubleComplex *beta, cuDoubleComplex *C, int ldc, CUBLAS_STREAM_PARAM); int CUDA_zsyr2k( MORSE_enum uplo, MORSE_enum trans, int n, int k, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, cuDoubleComplex *beta, cuDoubleComplex *C, int ldc, CUBLAS_STREAM_PARAM); int CUDA_zsyrk( MORSE_enum uplo, MORSE_enum trans, int n, int k, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, cuDoubleComplex *beta, cuDoubleComplex *C, int ldc, CUBLAS_STREAM_PARAM); +int CUDA_ztpmqrt( MORSE_enum side, MORSE_enum trans, int M, int N, int K, int L, int IB, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *A, int LDA, cuDoubleComplex *B, int LDB, cuDoubleComplex *WORK, CUBLAS_STREAM_PARAM ); int CUDA_ztrmm( MORSE_enum side, MORSE_enum uplo, MORSE_enum transa, MORSE_enum diag, int m, int n, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb, CUBLAS_STREAM_PARAM); int CUDA_ztrsm( MORSE_enum side, MORSE_enum uplo, MORSE_enum transa, MORSE_enum diag, int m, int n, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb, CUBLAS_STREAM_PARAM); int CUDA_ztsmlq( MORSE_enum side, MORSE_enum trans, int M1, int N1, int M2, int N2, int K, int IB, cuDoubleComplex *A1, int LDA1, cuDoubleComplex *A2, int LDA2, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *WORK, int LDWORK, cuDoubleComplex *WORKC, int LDWORKC, CUBLAS_STREAM_PARAM); diff --git a/include/morse_types.h b/include/morse_types.h index b16d01607d78962ea4c7c07e9b79cb9190814b22..80449afe3c8e512c37c3ef3514832b4784a1910d 100644 --- a/include/morse_types.h +++ b/include/morse_types.h @@ -120,4 +120,15 @@ extern double creal(MORSE_Complex64_t z); #endif /* MORSE_HAS_COMPLEX_H*/ +/******************************************************************************* + * Global utilities + **/ +static inline int chameleon_max( int a, int b ) { + if ( a > b ) return a; else return b; +} + +static inline int chameleon_min( int a, int b ) { + if ( a < b ) return a; else return b; +} + #endif /* __CHAMELEON_H__ */ diff --git a/runtime/parsec/codelets/codelet_zlange.c b/runtime/parsec/codelets/codelet_zlange.c index 77f1f039c0fabb1c6d224c1b2d5c052e6e11d34b..5d1bd5dd09718f3c87663175e3473fb550b2de03 100644 --- a/runtime/parsec/codelets/codelet_zlange.c +++ b/runtime/parsec/codelets/codelet_zlange.c @@ -55,7 +55,7 @@ void MORSE_TASK_zlange(const MORSE_option_t *options, { dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt); - int szeW = max( M, N ); + int szeW = chameleon_max( M, N ); dague_insert_task( DAGUE_dtd_handle, CORE_zlange_parsec, "lange", diff --git a/runtime/parsec/codelets/codelet_zlanhe.c b/runtime/parsec/codelets/codelet_zlanhe.c index fffdb4bec17f6d54d9053880cd42ab6f244c7e98..fc50a99381508c6e3960d729e90543c619c672ce 100644 --- a/runtime/parsec/codelets/codelet_zlanhe.c +++ b/runtime/parsec/codelets/codelet_zlanhe.c @@ -55,7 +55,7 @@ void MORSE_TASK_zlanhe(const MORSE_option_t *options, { dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt); - int szeW = max( 1, N ); + int szeW = chameleon_max( 1, N ); dague_insert_task( DAGUE_dtd_handle, CORE_zlanhe_parsec, "LANHE", diff --git a/runtime/parsec/codelets/codelet_zlansy.c b/runtime/parsec/codelets/codelet_zlansy.c index 113d06c44fbe036d58ba918469e5135979c0e76b..62cf558513eebb33d6f222a3be6c5bc9689ef3cd 100644 --- a/runtime/parsec/codelets/codelet_zlansy.c +++ b/runtime/parsec/codelets/codelet_zlansy.c @@ -55,7 +55,7 @@ void MORSE_TASK_zlansy(const MORSE_option_t *options, { dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt); - int szeW = max( 1, N ); + int szeW = chameleon_max( 1, N ); dague_insert_task( DAGUE_dtd_handle, CORE_zlansy_parsec, "lansy", diff --git a/runtime/parsec/codelets/codelet_zlantr.c b/runtime/parsec/codelets/codelet_zlantr.c index ef290a2a6cda59e869c57b1adc49fc1806483121..ed0cb32169f71826d1ce0036ead75f3e8b798e2a 100644 --- a/runtime/parsec/codelets/codelet_zlantr.c +++ b/runtime/parsec/codelets/codelet_zlantr.c @@ -58,7 +58,7 @@ void MORSE_TASK_zlantr(const MORSE_option_t *options, { dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt); - int szeW = max( 1, N ); + int szeW = chameleon_max( 1, N ); dague_insert_task( DAGUE_dtd_handle, CORE_zlantr_parsec, "lantr", diff --git a/runtime/quark/codelets/codelet_zlange.c b/runtime/quark/codelets/codelet_zlange.c index c8a9f6c5fb2f80f90bc756dfd159984add0b5f58..e3a6b92ec1453b0ddb2c284cfb70f15f60674bfd 100644 --- a/runtime/quark/codelets/codelet_zlange.c +++ b/runtime/quark/codelets/codelet_zlange.c @@ -36,16 +36,17 @@ void MORSE_TASK_zlange(const MORSE_option_t *options, { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LANGE; - int szeW = max( M, N ); - QUARK_Insert_Task(opt->quark, CORE_zlange_quark, (Quark_Task_Flags*)opt, - sizeof(MORSE_enum), &norm, VALUE, - sizeof(int), &M, VALUE, - sizeof(int), &N, VALUE, - sizeof(MORSE_Complex64_t)*NB*NB, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT, - sizeof(int), &LDA, VALUE, - sizeof(double)*szeW, NULL, SCRATCH, - sizeof(double), RTBLKADDR(B, double, Bm, Bn), OUTPUT, - 0); + int szeW = chameleon_max( M, N ); + QUARK_Insert_Task( + opt->quark, CORE_zlange_quark, (Quark_Task_Flags*)opt, + sizeof(MORSE_enum), &norm, VALUE, + sizeof(int), &M, VALUE, + sizeof(int), &N, VALUE, + sizeof(MORSE_Complex64_t)*NB*NB, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT, + sizeof(int), &LDA, VALUE, + sizeof(double)*szeW, NULL, SCRATCH, + sizeof(double), RTBLKADDR(B, double, Bm, Bn), OUTPUT, + 0); } void CORE_zlange_quark(Quark *quark) @@ -69,10 +70,11 @@ void MORSE_TASK_zlange_max(const MORSE_option_t *options, { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LANGE_MAX; - QUARK_Insert_Task(opt->quark, CORE_zlange_max_quark, (Quark_Task_Flags*)opt, - sizeof(double), RTBLKADDR(A, double, Am, An), INPUT, - sizeof(double), RTBLKADDR(B, double, Bm, Bn), OUTPUT, - 0); + QUARK_Insert_Task( + opt->quark, CORE_zlange_max_quark, (Quark_Task_Flags*)opt, + sizeof(double), RTBLKADDR(A, double, Am, An), INPUT, + sizeof(double), RTBLKADDR(B, double, Bm, Bn), OUTPUT, + 0); } @@ -82,7 +84,7 @@ void CORE_zlange_max_quark(Quark *quark) double *normA; quark_unpack_args_2(quark, A, normA); - if ( A[0] > *normA ) - *normA = A[0]; + if ( A[0] > *normA ) + *normA = A[0]; } diff --git a/runtime/quark/codelets/codelet_zlanhe.c b/runtime/quark/codelets/codelet_zlanhe.c index f3bd6fa8c24ee785a2c3efb139bf3d52f07772ad..70ab4cf8f7d4cf9b266fd5426c914b1ced81b211 100644 --- a/runtime/quark/codelets/codelet_zlanhe.c +++ b/runtime/quark/codelets/codelet_zlanhe.c @@ -36,16 +36,17 @@ void MORSE_TASK_zlanhe(const MORSE_option_t *options, { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LANHE; - int szeW = max( 1, N ); - QUARK_Insert_Task(opt->quark, CORE_zlanhe_quark, (Quark_Task_Flags*)opt, - sizeof(MORSE_enum), &norm, VALUE, - sizeof(MORSE_enum), &uplo, VALUE, - sizeof(int), &N, VALUE, - sizeof(MORSE_Complex64_t)*NB*NB, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT, - sizeof(int), &LDA, VALUE, - sizeof(double)*szeW, NULL, SCRATCH, - sizeof(double), RTBLKADDR(B, double, Bm, Bn), OUTPUT, - 0); + int szeW = chameleon_max( 1, N ); + QUARK_Insert_Task( + opt->quark, CORE_zlanhe_quark, (Quark_Task_Flags*)opt, + sizeof(MORSE_enum), &norm, VALUE, + sizeof(MORSE_enum), &uplo, VALUE, + sizeof(int), &N, VALUE, + sizeof(MORSE_Complex64_t)*NB*NB, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT, + sizeof(int), &LDA, VALUE, + sizeof(double)*szeW, NULL, SCRATCH, + sizeof(double), RTBLKADDR(B, double, Bm, Bn), OUTPUT, + 0); } void CORE_zlanhe_quark(Quark *quark) diff --git a/runtime/quark/codelets/codelet_zlansy.c b/runtime/quark/codelets/codelet_zlansy.c index 3aaf510c415531130ec19271bd67b423704ecf0f..43e2132e17912a2bc6823195910b0119b1ac734c 100644 --- a/runtime/quark/codelets/codelet_zlansy.c +++ b/runtime/quark/codelets/codelet_zlansy.c @@ -36,8 +36,9 @@ void MORSE_TASK_zlansy(const MORSE_option_t *options, { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LANSY; - int szeW = max( 1, N ); - QUARK_Insert_Task(opt->quark, CORE_zlansy_quark, (Quark_Task_Flags*)opt, + int szeW = chameleon_max( 1, N ); + QUARK_Insert_Task( + opt->quark, CORE_zlansy_quark, (Quark_Task_Flags*)opt, sizeof(MORSE_enum), &norm, VALUE, sizeof(MORSE_enum), &uplo, VALUE, sizeof(int), &N, VALUE, diff --git a/runtime/quark/codelets/codelet_zlantr.c b/runtime/quark/codelets/codelet_zlantr.c index b964eb3aa7443685f09ee11fbffe9c24c90cff62..a6d0009b5481a8784cd908c9ede233c7a5d32839 100644 --- a/runtime/quark/codelets/codelet_zlantr.c +++ b/runtime/quark/codelets/codelet_zlantr.c @@ -34,8 +34,9 @@ void MORSE_TASK_zlantr(const MORSE_option_t *options, { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LANTR; - int szeW = max( 1, N ); - QUARK_Insert_Task(opt->quark, CORE_zlantr_quark, (Quark_Task_Flags*)opt, + int szeW = chameleon_max( 1, N ); + QUARK_Insert_Task( + opt->quark, CORE_zlantr_quark, (Quark_Task_Flags*)opt, sizeof(MORSE_enum), &norm, VALUE, sizeof(MORSE_enum), &uplo, VALUE, sizeof(MORSE_enum), &diag, VALUE, diff --git a/runtime/starpu/codelets/codelet_zasum.c b/runtime/starpu/codelets/codelet_zasum.c index bd04e779c28f4c3e4fb7d8068618d48e46396b7c..7c17a35d1c8eac5f9144c4a31f7acd7544ef418d 100644 --- a/runtime/starpu/codelets/codelet_zasum.c +++ b/runtime/starpu/codelets/codelet_zasum.c @@ -36,7 +36,7 @@ void MORSE_TASK_dzasum(const MORSE_option_t *options, struct starpu_codelet *codelet = &cl_zasum; void (*callback)(void*) = options->profiling ? cl_zasum_callback : NULL; starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &storev, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &M, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index 26735cad8ff1b02348623c1fe559cff7fd79df22..557eaa1086febacb8b2e7f7befc87945542c8629 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -36,7 +36,7 @@ void MORSE_TASK_zaxpy(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) || morse_desc_islocal( B, Bm, Bn ) ){ starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, alpha, sizeof(MORSE_Complex64_t), STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c index fa1cd01cc9c4349d266ce085648caa7703ccd181..de3b1650bb2b8fcb1a0f7d27636fb90e73f0b523 100644 --- a/runtime/starpu/codelets/codelet_zbuild.c +++ b/runtime/starpu/codelets/codelet_zbuild.c @@ -51,7 +51,7 @@ void MORSE_TASK_zbuild( const MORSE_option_t *options, col_min = An*A->nb ; col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ; starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &row_min, sizeof(int), STARPU_VALUE, &row_max, sizeof(int), STARPU_VALUE, &col_min, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index bb26aa301e40deb1346e3abc048dfbd9bd1b26af..aa4d4dda3244fc7d5f507088a6c155bf97a7a0ce 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -28,61 +28,61 @@ #include "runtime/starpu/include/morse_starpu.h" #include "runtime/starpu/include/runtime_codelet_z.h" -CHAMELEON_CL_CB(zasum, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zaxpy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[1]), 0, M); -CHAMELEON_CL_CB(zgeadd, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zlascal, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zgelqt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (4./3.)*M*N*K); -CHAMELEON_CL_CB(zgemm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), starpu_matrix_get_ny(task->handles[0]), 2. *M*N*K); /* If A^t, computation is wrong */ -CHAMELEON_CL_CB(zgeqrt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (4./3.)*M*M*N); -CHAMELEON_CL_CB(zgessm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_nx(task->handles[2]), 2. *M*N*K); -CHAMELEON_CL_CB(zgessq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 0, 4.*M*N); -CHAMELEON_CL_CB(zgetrf, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K); -CHAMELEON_CL_CB(zgetrf_incpiv, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K); -CHAMELEON_CL_CB(zgetrf_nopiv, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K); -CHAMELEON_CL_CB(zhe2ge, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (1./2.0)*M*N); -CHAMELEON_CL_CB(zherfb, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 2. *M* M*M); +CHAMELEON_CL_CB(zasum, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zaxpy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[1]), 0, M) +CHAMELEON_CL_CB(zgeadd, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zlascal, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zgelqt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (4./3.)*M*N*K) +CHAMELEON_CL_CB(zgemm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), starpu_matrix_get_ny(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */ +CHAMELEON_CL_CB(zgeqrt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (4./3.)*M*M*N) +CHAMELEON_CL_CB(zgessm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_nx(task->handles[2]), 2. *M*N*K) +CHAMELEON_CL_CB(zgessq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 0, 4.*M*N) +CHAMELEON_CL_CB(zgetrf, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K) +CHAMELEON_CL_CB(zgetrf_incpiv, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K) +CHAMELEON_CL_CB(zgetrf_nopiv, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K) +CHAMELEON_CL_CB(zhe2ge, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (1./2.0)*M*N) +CHAMELEON_CL_CB(zherfb, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 2. *M* M*M) #if defined(PRECISION_z) || defined(PRECISION_c) -CHAMELEON_CL_CB(zhemm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N); -CHAMELEON_CL_CB(zher2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M); -CHAMELEON_CL_CB(zherk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N); +CHAMELEON_CL_CB(zhemm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N) +CHAMELEON_CL_CB(zher2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M) +CHAMELEON_CL_CB(zherk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N) #endif -CHAMELEON_CL_CB(zlacpy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zlange, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zlaset, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zlaset2, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zlatro, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zlauum, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M* M*M); +CHAMELEON_CL_CB(zlacpy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zlange, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zlaset, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zlaset2, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zlatro, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zlauum, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M* M*M) #if defined(PRECISION_z) || defined(PRECISION_c) -CHAMELEON_CL_CB(zplghe, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zsytrf_nopiv, starpu_matrix_get_nx(task->handles[0]), 0, 0, (1./3.)*M* M*M); +CHAMELEON_CL_CB(zplghe, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zsytrf_nopiv, starpu_matrix_get_nx(task->handles[0]), 0, 0, (1./3.)*M* M*M) #endif -CHAMELEON_CL_CB(zplgsy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zplrnt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zbuild, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); -CHAMELEON_CL_CB(zplssq, 1, 1, 0, 4); -CHAMELEON_CL_CB(zplssq2, 1, 1, 0, 1); -CHAMELEON_CL_CB(zpotrf, starpu_matrix_get_nx(task->handles[0]), 0, 0, (1./3.)*M* M*M); -CHAMELEON_CL_CB(zssssm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), M*M*(2.*M+starpu_matrix_get_nx(task->handles[2]))); -CHAMELEON_CL_CB(zsymm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N); -CHAMELEON_CL_CB(zsyr2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M); -CHAMELEON_CL_CB(zsyrk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N); -CHAMELEON_CL_CB(ztpqrt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K); -CHAMELEON_CL_CB(ztpmqrt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K); -CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1)); -CHAMELEON_CL_CB(ztrmm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N); -CHAMELEON_CL_CB(ztrsm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N); -CHAMELEON_CL_CB(ztrtri, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M *M*M); -CHAMELEON_CL_CB(ztslqt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M); -CHAMELEON_CL_CB(ztsmlq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M); -CHAMELEON_CL_CB(ztsmqr, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M); -CHAMELEON_CL_CB(ztsmlq_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M); -CHAMELEON_CL_CB(ztsmqr_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M); -CHAMELEON_CL_CB(ztsqrt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M); -CHAMELEON_CL_CB(ztstrf, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), M* M*M); -CHAMELEON_CL_CB(zttlqt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 1. *M* M*M); -CHAMELEON_CL_CB(zttmlq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M); -CHAMELEON_CL_CB(zttmqr, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M); -CHAMELEON_CL_CB(zttqrt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 1. *M* M*M); -CHAMELEON_CL_CB(zunmlq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M); -CHAMELEON_CL_CB(zunmqr, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M); +CHAMELEON_CL_CB(zplgsy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zplrnt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zbuild, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(zplssq, 1, 1, 0, 4) +CHAMELEON_CL_CB(zplssq2, 1, 1, 0, 1) +CHAMELEON_CL_CB(zpotrf, starpu_matrix_get_nx(task->handles[0]), 0, 0, (1./3.)*M* M*M) +CHAMELEON_CL_CB(zssssm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), M*M*(2.*M+starpu_matrix_get_nx(task->handles[2]))) +CHAMELEON_CL_CB(zsymm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N) +CHAMELEON_CL_CB(zsyr2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M) +CHAMELEON_CL_CB(zsyrk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N) +CHAMELEON_CL_CB(ztpqrt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K) +CHAMELEON_CL_CB(ztpmqrt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K) +CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1)) +CHAMELEON_CL_CB(ztrmm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N) +CHAMELEON_CL_CB(ztrsm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N) +CHAMELEON_CL_CB(ztrtri, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M *M*M) +CHAMELEON_CL_CB(ztslqt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M) +CHAMELEON_CL_CB(ztsmlq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M) +CHAMELEON_CL_CB(ztsmqr, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M) +CHAMELEON_CL_CB(ztsmlq_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M) +CHAMELEON_CL_CB(ztsmqr_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M) +CHAMELEON_CL_CB(ztsqrt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M) +CHAMELEON_CL_CB(ztstrf, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), M* M*M) +CHAMELEON_CL_CB(zttlqt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 1. *M* M*M) +CHAMELEON_CL_CB(zttmlq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M) +CHAMELEON_CL_CB(zttmqr, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M) +CHAMELEON_CL_CB(zttqrt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 1. *M* M*M) +CHAMELEON_CL_CB(zunmlq, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M) +CHAMELEON_CL_CB(zunmqr, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 2. *M* M*M) diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index f2ebfc783c0dbbb186d433aa41d1a8e66ad3ae3e..1a57e312356b644a2ba23ff55d2c67495d7d05ac 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -95,7 +95,7 @@ void MORSE_TASK_zgeadd(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), @@ -112,6 +112,8 @@ void MORSE_TASK_zgeadd(const MORSE_option_t *options, #endif 0); } + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 023af3243433871b3e279152ef82897f2e3bd878..2b4c0daf8121b4acce12ccbf675a5723bc9cf0ed 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -108,7 +108,7 @@ void MORSE_TASK_zgelqt(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -149,7 +149,7 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); - WORK = TAU + max( m, n ); + WORK = TAU + chameleon_max( m, n ); CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -182,7 +182,7 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg) /* Gather pointer to scratch data on host */ h_T = h_A + ib*n; h_TAU = h_T + ib*ib; - h_W = h_TAU + max(m,n); + h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*ib; stream = starpu_cuda_get_local_stream(); diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 68564e7968f2a555a29e26ba3977221000c77f7a..d9ba98ff68d151edc28c8dfa5ef023200fd2e508 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -77,7 +77,7 @@ void MORSE_TASK_zgemm(const MORSE_option_t *options, ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &transA, sizeof(MORSE_enum), STARPU_VALUE, &transB, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), @@ -176,4 +176,4 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC); +CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index ca0875bbbe8c8064307ad70930479f977abd519c..d738fc0b5a741f683a5d4f5499212cf459a4f172 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -109,7 +109,7 @@ void MORSE_TASK_zgeqrt(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -150,7 +150,7 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); - WORK = TAU + max( m, n ); + WORK = TAU + chameleon_max( m, n ); CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK); } @@ -182,7 +182,7 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg) /* Gather pointer to scratch data on host */ h_T = h_A + m*ib; h_TAU = h_T + ib*ib; - h_W = h_TAU + max(m,n); + h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*ib; stream = starpu_cuda_get_local_stream(); diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c index 09d5a60ef334c295662ebca9747b80a59944280d..cabe11ce96babfc59ed50a7e5ea8fbb002394752 100644 --- a/runtime/starpu/codelets/codelet_zgessm.c +++ b/runtime/starpu/codelets/codelet_zgessm.c @@ -95,7 +95,7 @@ void MORSE_TASK_zgessm(const MORSE_option_t *options, morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &k, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c index 4c65d87aa1aac7c2797a2e6a8a861c33344e92ee..cd229712b59f21dc3fd901dd73faeefff0a00514 100644 --- a/runtime/starpu/codelets/codelet_zgessq.c +++ b/runtime/starpu/codelets/codelet_zgessq.c @@ -34,9 +34,12 @@ void MORSE_TASK_zgessq( const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zgessq; void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL; + if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c index 9943b30ef7bf548cf6ebc70dd4b3f7eb87783aac..54544d0690f06c26e37bca0d76db946ba546da42 100644 --- a/runtime/starpu/codelets/codelet_zgetrf.c +++ b/runtime/starpu/codelets/codelet_zgetrf.c @@ -43,7 +43,7 @@ void MORSE_TASK_zgetrf(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c index 736bcecf753ef4349dab5c1841bee7dc2770ff9b..c76be02aaf3026a9a45bea2173a8d47024e8e86d 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c @@ -104,7 +104,7 @@ void MORSE_TASK_zgetrf_incpiv(const MORSE_option_t *options, morse_desc_islocal( L, Lm, Ln ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -162,7 +162,7 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) L += ib; for (i=0; i<n; i+=ib) { - sb = min( ib, n-i ); + sb = chameleon_min( ib, n-i ); CORE_zlacpy(MorseUpperLower, sb, sb, A+(i*lda+i), lda, L+(i*ldl), ldl ); CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl), ldl, &info ); diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c index d2a6425d3f72fbe3b8f907723a4f97719d9b121a..3fa0f5c3920b6caf99c5fed29befadfa5168a92f 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c @@ -88,7 +88,7 @@ void MORSE_TASK_zgetrf_nopiv(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index a6a33e7838b25eb8919e14df3350b3218f50c316..f107252cc70a863a5774da37f9ea6aa2000a6327 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -43,7 +43,7 @@ void MORSE_TASK_zhe2ge(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index af47b8e988723605c153cf8b45fe9427b9191c24..4489e84ad443df55c9a5762657fc7fc9d8ec341d 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -53,7 +53,7 @@ void MORSE_TASK_zhemm(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 2287ab27fbdbf42658cee6b7c27e4e1b21a3801b..78a4d6f137c9a037a9555b1b88571da4301de11a 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -53,7 +53,7 @@ void MORSE_TASK_zher2k(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c index eeaecd32c45b845e67dd9032f9a54e37c80edbc0..210fb3278ab53ffdee9805b1a7061696669e9a46 100644 --- a/runtime/starpu/codelets/codelet_zherfb.c +++ b/runtime/starpu/codelets/codelet_zherfb.c @@ -45,7 +45,7 @@ void MORSE_TASK_zherfb(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &k, sizeof(int), @@ -112,6 +112,7 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) int ldc; cuDoubleComplex *WORK; int ldwork; + CUstream stream; stream = starpu_cuda_get_local_stream(); @@ -122,7 +123,7 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &lda, &ldt, &ldc, &ldwork); - CUDA_zherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork); + CUDA_zherfb( uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -134,4 +135,4 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC); +CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index ce5f60025d0dd547d218458b1b7a2d1a58afed2c..8e0d48f6f41c0751db73210ab67756c29217df1c 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -51,7 +51,7 @@ void MORSE_TASK_zherk(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c index 8faa15d0bdb077fdb9beb00d5447d30527acb510..11666dbce7ab3dcd260b47f1097b24e00cc0326d 100644 --- a/runtime/starpu/codelets/codelet_zhessq.c +++ b/runtime/starpu/codelets/codelet_zhessq.c @@ -34,9 +34,12 @@ void MORSE_TASK_zhessq( const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zhessq; void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL; + if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index c36bc9f8ea3ecf4e7cf96e99c9b500346d4d180a..ee86f5d18efef05a17b147c0f4689bed0b212fe8 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -51,7 +51,7 @@ void MORSE_TASK_zlacpyx(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index f3b0f2a8696880408e3392bded7181e08d43b6fa..7390acaae69b5e5d3e95d5ea8995e98a95a63bfc 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -48,7 +48,7 @@ void MORSE_TASK_zlag2c(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), @@ -94,7 +94,7 @@ void MORSE_TASK_clag2z(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, MORSE_Complex32_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c index ef031041e5f0beac8cc2d8f2f51afeb859476522..9a1b1ebcf01d0a7fcf1a500ff4d3086179f3d3b9 100644 --- a/runtime/starpu/codelets/codelet_zlange.c +++ b/runtime/starpu/codelets/codelet_zlange.c @@ -38,22 +38,25 @@ void MORSE_TASK_zlange(const MORSE_option_t *options, (void)NB; struct starpu_codelet *codelet = &cl_zlange; void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL; + if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( B, Bm, Bn ) ){ - starpu_insert_task(codelet, - STARPU_VALUE, &norm, sizeof(MORSE_enum), - STARPU_VALUE, &M, sizeof(int), - STARPU_VALUE, &N, sizeof(int), - STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), - STARPU_VALUE, &LDA, sizeof(int), - STARPU_SCRATCH, options->ws_worker, - STARPU_W, RTBLKADDR(B, double, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + morse_desc_islocal( B, Bm, Bn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &norm, sizeof(MORSE_enum), + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, &N, sizeof(int), + STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &LDA, sizeof(int), + STARPU_SCRATCH, options->ws_worker, + STARPU_W, RTBLKADDR(B, double, Bm, Bn), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlange", + STARPU_NAME, "zlange", #endif - 0); + 0); } } @@ -87,17 +90,20 @@ void MORSE_TASK_zlange_max(const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zlange_max; void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL; + if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( B, Bm, Bn ) ){ - starpu_insert_task(codelet, - STARPU_R, RTBLKADDR(A, double, Am, An), - STARPU_RW, RTBLKADDR(B, double, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + morse_desc_islocal( B, Bm, Bn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_R, RTBLKADDR(A, double, Am, An), + STARPU_RW, RTBLKADDR(B, double, Bm, Bn), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlange_max", + STARPU_NAME, "zlange_max", #endif - 0); + 0); } } @@ -112,6 +118,8 @@ static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg) if ( *A > *normA ) *normA = *A; + + (void)cl_arg; } #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c index c547eac267ff438c28153c582504c9996ceecbef..24a87f7525e82f2427c9b854b2661a3ef1bcbf55 100644 --- a/runtime/starpu/codelets/codelet_zlanhe.c +++ b/runtime/starpu/codelets/codelet_zlanhe.c @@ -36,9 +36,12 @@ void MORSE_TASK_zlanhe(const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zlanhe; void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL; + if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( B, Bm, Bn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( B, Bm, Bn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_VALUE, &norm, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &N, sizeof(int), @@ -53,6 +56,8 @@ void MORSE_TASK_zlanhe(const MORSE_option_t *options, #endif 0); } + + (void)NB; } #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c index 69623180d10407ce2682da1143aa6ba5553d1e80..0e3fe5933c241be87004ae3582cd6dcd22954ae1 100644 --- a/runtime/starpu/codelets/codelet_zlansy.c +++ b/runtime/starpu/codelets/codelet_zlansy.c @@ -38,9 +38,12 @@ void MORSE_TASK_zlansy(const MORSE_option_t *options, (void)NB; struct starpu_codelet *codelet = &cl_zlansy; void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL; + if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( B, Bm, Bn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( B, Bm, Bn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_VALUE, &norm, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &N, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c index e1d585f8691ec7b790606d42a767a898d76f7202..c6815973f6c4c92330f76ad5a6334cd9f834fe80 100644 --- a/runtime/starpu/codelets/codelet_zlantr.c +++ b/runtime/starpu/codelets/codelet_zlantr.c @@ -35,9 +35,12 @@ void MORSE_TASK_zlantr(const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zlantr; void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL; + if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( B, Bm, Bn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( B, Bm, Bn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_VALUE, &norm, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &diag, sizeof(MORSE_enum), @@ -54,6 +57,8 @@ void MORSE_TASK_zlantr(const MORSE_option_t *options, #endif 0); } + + (void)NB; } #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 36af8ccb5ec6327476907326f31cd304a108a3c4..445ee1c318e4d55e804b9d7dfa8b92c0cb351898 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -74,7 +74,7 @@ void MORSE_TASK_zlascal(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An )) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index 4f2c84485f3ca501edb9059858c8d544342e326d..639acdb5658de9538d9ae71eb4784dc29b2c2bf4 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -80,7 +80,7 @@ void MORSE_TASK_zlaset(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c index 453025b813dc45fcbdb39301a39fbc09d0493f9e..ac9e8fda8f83acd4d7a75bb0843a3e7dbc1efd35 100644 --- a/runtime/starpu/codelets/codelet_zlaset2.c +++ b/runtime/starpu/codelets/codelet_zlaset2.c @@ -77,7 +77,7 @@ void MORSE_TASK_zlaset2(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c index cf3433112f9039f28d54f3b67b9432972a2a5928..29345b4424a13aa2de6b9eec195d52a7430665ac 100644 --- a/runtime/starpu/codelets/codelet_zlatro.c +++ b/runtime/starpu/codelets/codelet_zlatro.c @@ -51,7 +51,7 @@ void MORSE_TASK_zlatro(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 5539ca63f24864743de95d6c6f20ebc6c42e6dd0..1f5876bd25a9e7fafcb46e305e2593f5229ddb4c 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -49,7 +49,7 @@ void MORSE_TASK_zlauum(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index 42d3b73204686ca29fa6df74321b556c3fc612ce..de57291a5c48f4abe986d07b6026a2f3b0ff8b19 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -44,7 +44,7 @@ void MORSE_TASK_zplghe( const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &bump, sizeof(double), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), @@ -87,4 +87,4 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func); +CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index 027b804895ccd186788aab70100567101622df62..965ff11d884e566a096d6495bd3c6ebc59682d6b 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -45,7 +45,7 @@ void MORSE_TASK_zplgsy( const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &bump, sizeof(MORSE_Complex64_t), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index 7c6d3556d6555cc2c173405b71bca5db7f7d9154..12bd1dfbdfa2c8952d5bef5d64cf630f79b106ec 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -44,7 +44,7 @@ void MORSE_TASK_zplrnt( const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_W, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c index 25de90a799b9d4a88a439110bee908a8bdd63839..0c85274d127025756eba7f0dbb917ec760ee53bf 100644 --- a/runtime/starpu/codelets/codelet_zplssq.c +++ b/runtime/starpu/codelets/codelet_zplssq.c @@ -65,9 +65,12 @@ void MORSE_TASK_zplssq( const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zplssq; void (*callback)(void*) = options->profiling ? cl_zplssq_callback : NULL; + if ( morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) || - morse_desc_islocal( SCLSSQ, SCLSSQm, SCLSSQn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( SCLSSQ, SCLSSQm, SCLSSQn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_R, RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), STARPU_RW, RTBLKADDR(SCLSSQ, double, SCLSSQm, SCLSSQn), STARPU_PRIORITY, options->priority, @@ -95,6 +98,8 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) } else { SCLSSQ[1] = SCLSSQ[1] + (SCALESUMSQ[1] * (( SCALESUMSQ[0] / SCLSSQ[0] ) * ( SCALESUMSQ[0] / SCLSSQ[0] ))); } + + (void)cl_arg; } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -108,8 +113,10 @@ void MORSE_TASK_zplssq2( const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zplssq2; void (*callback)(void*) = options->profiling ? cl_zplssq2_callback : NULL; - if ( morse_desc_islocal( RESULT, RESULTm, RESULTn ) ){ - starpu_insert_task(codelet, + + if ( morse_desc_islocal( RESULT, RESULTm, RESULTn ) ) { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_RW, RTBLKADDR(RESULT, double, RESULTm, RESULTn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, @@ -129,6 +136,8 @@ static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg) RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]); RESULT[0] = RESULT[0] * sqrt( RESULT[1] ); + + (void)cl_arg; } #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index af1449231caf29f5fb4be0eb28de91d15dc41e9c..2f5e6cfc984e4021f6c2506aa5b0c386baf98362 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -50,7 +50,7 @@ void MORSE_TASK_zpotrf(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c index 79c90bb56098fd44afe6c1da8cfa4a2d39f02657..20dfcd9da62fa92a5faa445b4704d768cbd64449 100644 --- a/runtime/starpu/codelets/codelet_zssssm.c +++ b/runtime/starpu/codelets/codelet_zssssm.c @@ -121,7 +121,7 @@ void MORSE_TASK_zssssm(const MORSE_option_t *options, morse_desc_islocal( L2, L2m, L2n ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m1, sizeof(int), STARPU_VALUE, &n1, sizeof(int), STARPU_VALUE, &m2, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index d7d557ef7765991cfaa06f39d6e57a50f2ba396b..78ad94c01ba60bd5c65981017bb4185e7ecd577d 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -53,7 +53,7 @@ void MORSE_TASK_zsymm(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index a9d7a0ed2acbdee7791b7f33004ac4a9d2be0dfc..d2a59d995c18b9d854ceb989f0d0e713f2879cf0 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -53,7 +53,7 @@ void MORSE_TASK_zsyr2k(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 33e07811c1ce53717e4d073ad0b43c3afcf48587..c38981ef4a49b300d3bc16a6f00dd3f20d56f6ad 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -51,7 +51,7 @@ void MORSE_TASK_zsyrk(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c index 29d2f2df3b7e3f51ad905e2dea16e8dad92c87cd..7335678bddfc08d70a921a71fa00218aa8233253 100644 --- a/runtime/starpu/codelets/codelet_zsyssq.c +++ b/runtime/starpu/codelets/codelet_zsyssq.c @@ -35,8 +35,10 @@ void MORSE_TASK_zsyssq( const MORSE_option_t *options, struct starpu_codelet *codelet = &cl_zsyssq; void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL; if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c index 6a375295fbd1f45d5554dcdb871dd0284a42b872..a1592b9aa3d44d6f968debea2e54c6d960e93cb5 100644 --- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c @@ -44,7 +44,7 @@ void MORSE_TASK_zsytrf_nopiv(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), diff --git a/runtime/starpu/codelets/codelet_ztile_zero.c b/runtime/starpu/codelets/codelet_ztile_zero.c index 567e231a6008aa3dd9a6d4ae33400f52a22c55f0..553729e36cf47cea9916bd6153d30ea29f90f559 100644 --- a/runtime/starpu/codelets/codelet_ztile_zero.c +++ b/runtime/starpu/codelets/codelet_ztile_zero.c @@ -41,7 +41,7 @@ void MORSE_TASK_ztile_zero(const const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &X1, sizeof(int), STARPU_VALUE, &X2, sizeof(int), STARPU_VALUE, &Y1, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index 382d64d00131df4f72412c1012c4f3ae8c8dcb04..1a6a8408584bfd0e861b08db5787c7552610ea14 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -42,7 +42,7 @@ void MORSE_TASK_ztpmqrt( const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &M, sizeof(int), @@ -114,7 +114,6 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) int N; int K; int L; - int k; int ib; const cuDoubleComplex *V; int ldv; @@ -137,11 +136,10 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) &ldv, &ldt, &lda, &ldb ); stream = starpu_cuda_get_local_stream(); - cublasSetKernelStream( stream ); CUDA_ztpmqrt( side, trans, M, N, K, L, ib, - A, lda, B, ldb, V, ldv, T, ldt, + V, ldv, T, ldt, A, lda, B, ldb, W, stream ); #ifndef STARPU_CUDA_ASYNC diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index 7c00226bbd05b5d68b97fbfcf15202240384eb58..8121d765e808fd7fefe9add88dec4956572ddaf6 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -39,7 +39,7 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_VALUE, &L, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index 53cab26a45dad0d556c5b66120c8ec2bb4eaba24..9147e25affaa5222547338aec3fc03f8ae3ca6b8 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -99,7 +99,7 @@ void MORSE_TASK_ztradd(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), @@ -117,6 +117,8 @@ void MORSE_TASK_ztradd(const MORSE_option_t *options, #endif 0); } + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c index f8aa3743e2500d905a350efff4e8be8bdd937a84..859e2688837be2d4ccb1cd8ea89675f71fdf20e2 100644 --- a/runtime/starpu/codelets/codelet_ztrasm.c +++ b/runtime/starpu/codelets/codelet_ztrasm.c @@ -35,7 +35,7 @@ void MORSE_TASK_ztrasm(const MORSE_option_t *options, struct starpu_codelet *codelet = &cl_ztrasm; void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL; starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &storev, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &diag, sizeof(MORSE_enum), diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 745d429bdffbcd1d72b92126056b50c86c48858c..0072d1f24f34fac62ccf1fa6cf03876e3fa520e2 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -51,7 +51,7 @@ void MORSE_TASK_ztrmm(const MORSE_option_t *options, morse_desc_islocal( B, Bm, Bn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &transA, sizeof(MORSE_enum), diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 4bbea5e1a6b5aa276493c8b62b07bc5f06709edd..dbed609debc3c2688ac937a5f6cb5b3f49f9f027 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -70,7 +70,7 @@ void MORSE_TASK_ztrsm(const MORSE_option_t *options, ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &transA, sizeof(MORSE_enum), diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c index 92ef401a197c6132356374321a0f63b60b575de3..ce875156976c82406e31e86814e9b36fd6caa736 100644 --- a/runtime/starpu/codelets/codelet_ztrssq.c +++ b/runtime/starpu/codelets/codelet_ztrssq.c @@ -36,8 +36,10 @@ void MORSE_TASK_ztrssq( const MORSE_option_t *options, struct starpu_codelet *codelet = &cl_ztrssq; void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL; if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ){ - starpu_insert_task(codelet, + morse_desc_islocal( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ) ) + { + starpu_insert_task( + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &diag, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 962ef0b7663957d3f6e56ce01832a0efbe44a51d..4d4b8a23c03f9aa73551bb4407a98c838409bdbd 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -51,7 +51,7 @@ void MORSE_TASK_ztrtri(const MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(MORSE_enum), STARPU_VALUE, &diag, sizeof(MORSE_enum), STARPU_VALUE, &n, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c index 653dffff04fa2b127c197b26889b80b20330bcf1..04c75182b0010da72689f834c8c783d20a1db5b9 100644 --- a/runtime/starpu/codelets/codelet_ztslqt.c +++ b/runtime/starpu/codelets/codelet_ztslqt.c @@ -124,7 +124,7 @@ void MORSE_TASK_ztslqt(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -170,7 +170,7 @@ static void cl_ztslqt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); - WORK = TAU + max( m, n ); + WORK = TAU + chameleon_max( m, n ); CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); } @@ -200,7 +200,7 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg) h_A2 = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work); h_T = h_A2 + ib*n; h_TAU = h_T + ib*n; - h_W = h_TAU + max(m,n); + h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*m; stream = starpu_cuda_get_local_stream(); diff --git a/runtime/starpu/codelets/codelet_ztsmlq.c b/runtime/starpu/codelets/codelet_ztsmlq.c index 4215279ed58edd836ddd9f0555f974c6201cdfa5..62fa28f927f7f03ac47ebbeba1a6fd1d8c925b35 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq.c +++ b/runtime/starpu/codelets/codelet_ztsmlq.c @@ -151,7 +151,7 @@ void MORSE_TASK_ztsmlq(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m1, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c index da3a7516e47f2eaa77b35e72340e2664c6b47830..0c1d3625584653584bd772962b5ca86ca1ac24b6 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c @@ -52,7 +52,7 @@ void MORSE_TASK_ztsmlq_hetra1(const MORSE_option_t *options, morse_desc_islocal( V, Vm, Vn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m1, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c index 661b597e69800819b25f6c18a091734e503317da..7183c7f6f42c97a0ff19fdc165f4a6352c5ccf53 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr.c +++ b/runtime/starpu/codelets/codelet_ztsmqr.c @@ -177,7 +177,7 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options, rank_changed ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m1, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c index 2af83b5d23cd6239f9217ad18235bb774071d240..16edc18b40e71dfa1468fd05569d3ea94ab01c71 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c @@ -52,7 +52,7 @@ void MORSE_TASK_ztsmqr_hetra1(const MORSE_option_t *options, morse_desc_islocal( V, Vm, Vn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m1, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c index 63660c0a9a6bbf2acf632d581935cb5b08e95ebe..7081a754a0aa53d583b6ecf97ce9f03840b46125 100644 --- a/runtime/starpu/codelets/codelet_ztsqrt.c +++ b/runtime/starpu/codelets/codelet_ztsqrt.c @@ -113,7 +113,7 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -160,7 +160,7 @@ static void cl_ztsqrt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); - WORK = TAU + max( m, n ); + WORK = TAU + chameleon_max( m, n ); CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); } @@ -190,7 +190,7 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg) h_A2 = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work); h_T = h_A2 + m*ib; h_TAU = h_T + ib*ib; - h_W = h_TAU + max(m,n); + h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*n; stream = starpu_cuda_get_local_stream(); diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c index f1ae6bb1722bda85e2738e8743074d0e7913ec55..0628740d9bc0f0cc6fb797b80eb45addcc3e7599 100644 --- a/runtime/starpu/codelets/codelet_ztstrf.c +++ b/runtime/starpu/codelets/codelet_ztstrf.c @@ -118,7 +118,7 @@ void MORSE_TASK_ztstrf(const MORSE_option_t *options, morse_desc_islocal( L, Lm, Ln ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -187,7 +187,7 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) { int i, sb; for (i=0; i<n; i+=ib) { - sb = min( ib, n-i ); + sb = chameleon_min( ib, n-i ); CORE_zlacpy(MorseUpperLower, sb, sb, L+(i*ldl), ldl, L+(i*ldl)+ib, ldl ); CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl)+ib, ldl, &info ); diff --git a/runtime/starpu/codelets/codelet_zttlqt.c b/runtime/starpu/codelets/codelet_zttlqt.c index 518d965e8a2794ea57d72f69ee97cee973fbabee..269cec7a973661dff42dbec34f6d0dc3c13ed39a 100644 --- a/runtime/starpu/codelets/codelet_zttlqt.c +++ b/runtime/starpu/codelets/codelet_zttlqt.c @@ -122,7 +122,7 @@ void MORSE_TASK_zttlqt(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -166,7 +166,7 @@ static void cl_zttlqt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt); - WORK = TAU + max( m, n ); + WORK = TAU + chameleon_max( m, n ); CORE_zttlqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); } diff --git a/runtime/starpu/codelets/codelet_zttmlq.c b/runtime/starpu/codelets/codelet_zttmlq.c index cd6fbaff4bec64f248a983af6df78dbb0b7398d0..ca03af3c7e836b6a241288974a683b19e93d0573 100644 --- a/runtime/starpu/codelets/codelet_zttmlq.c +++ b/runtime/starpu/codelets/codelet_zttmlq.c @@ -144,7 +144,7 @@ void MORSE_TASK_zttmlq(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m1, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zttmqr.c b/runtime/starpu/codelets/codelet_zttmqr.c index 453ed0af0606b827ad3cce9da3575c183415721d..e31b558607eec2c9588e636221fc2bb890e70fac 100644 --- a/runtime/starpu/codelets/codelet_zttmqr.c +++ b/runtime/starpu/codelets/codelet_zttmqr.c @@ -170,7 +170,7 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options, rank_changed ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m1, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zttqrt.c b/runtime/starpu/codelets/codelet_zttqrt.c index a5bf954ce62c6fdf520c72f9e7f78352c33c2f8b..b368a3202a70036be6afb7db92e29f8ae9a91f25 100644 --- a/runtime/starpu/codelets/codelet_zttqrt.c +++ b/runtime/starpu/codelets/codelet_zttqrt.c @@ -122,7 +122,7 @@ void MORSE_TASK_zttqrt(const MORSE_option_t *options, morse_desc_islocal( T, Tm, Tn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), @@ -166,7 +166,7 @@ static void cl_zttqrt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt); - WORK = TAU + max( m, n ); + WORK = TAU + chameleon_max( m, n ); CORE_zttqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); } diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index a615c4784ccf2375a9a16d8bf54445f8122b9765..956d92643e51e0d09601ce45be9ec514c10beaf9 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -132,7 +132,7 @@ void MORSE_TASK_zunmlq(const MORSE_option_t *options, morse_desc_islocal( C, Cm, Cn ) ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), @@ -191,7 +191,6 @@ static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg) #if defined(CHAMELEON_USE_CUDA) static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) { - MORSE_starpu_ws_t *d_work; MORSE_enum side; MORSE_enum trans; int m; @@ -201,7 +200,6 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) const cuDoubleComplex *A, *T; cuDoubleComplex *C, *WORK; int lda, ldt, ldc, ldwork; - int info = 0; CUstream stream; starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, @@ -229,4 +227,4 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC); +CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index acb7279eda15aeb8f3c2d8cb60ef6e7be517796a..fcbf06a7ec3ae8dda9addac2d6cc6ad916c140cb 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -154,7 +154,7 @@ void MORSE_TASK_zunmqr(const MORSE_option_t *options, rank_changed ) { starpu_insert_task( - codelet, + starpu_mpi_codelet(codelet), STARPU_VALUE, &side, sizeof(MORSE_enum), STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), @@ -216,7 +216,6 @@ static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg) #if defined(CHAMELEON_USE_CUDA) static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) { - MORSE_starpu_ws_t *d_work; MORSE_enum side; MORSE_enum trans; int m; @@ -226,7 +225,6 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) const cuDoubleComplex *A, *T; cuDoubleComplex *C, *WORK; int lda, ldt, ldc, ldwork; - int info = 0; CUstream stream; starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index fe0312012332a2b2570da403482da5df5623d9ff..8e14c725f078fea0ad3201793d2da89aebfba708 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -134,7 +134,7 @@ void RUNTIME_desc_create( MORSE_desc_t *desc ) morse_error("RUNTIME_desc_create", "MPI_TAG_UB not known by MPI"); } - while ( ((uintptr_t)(1UL<<tag_width - 1) > (uint)(*tag_ub) ) + while ( ((uintptr_t)((1UL<<tag_width) - 1) > (uintptr_t)(*tag_ub) ) && (tag_width >= TAG_WIDTH_MIN) ) { tag_width--; tag_sep--; @@ -149,17 +149,17 @@ void RUNTIME_desc_create( MORSE_desc_t *desc ) } /* Check that we won't create overflow in tags used */ - if ( (lnt*lmt) > ((uintptr_t)(1UL<<tag_sep)) ) { + if ( ((uintptr_t)(lnt*lmt)) > ((uintptr_t)(1UL<<tag_sep)) ) { morse_fatal_error("RUNTIME_desc_create", "Too many tiles in the descriptor for MPI tags"); return; } assert(lmt*lmt<=(1<<tag_sep)); - if (desc->id >= 1UL<<(tag_width-tag_sep)) { + if ( ((uintptr_t)desc->id) >= (uintptr_t)(1UL<<(tag_width-tag_sep)) ) { morse_fatal_error("RUNTIME_desc_create", "Number of descriptor available in MPI mode out of stock"); return; } - assert( desc->id < (1UL<<(tag_width-tag_sep)) ); + assert( ((uintptr_t)desc->id) < (uintptr_t)(1UL<<(tag_width-tag_sep)) ); } #endif } @@ -193,9 +193,6 @@ void RUNTIME_desc_destroy( MORSE_desc_t *desc ) #if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) if (desc->use_mat == 1 && desc->register_mat == 1){ - int64_t eltsze = MORSE_Element_Size(desc->dtyp); - size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) * eltsze; - /* Unmap the pinned memory associated to the matrix */ if (cudaHostUnregister(desc->mat) != cudaSuccess) { diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h index efbc57cf1c1ec1e176f14fded91b827589ad411d..64edb2b180eb9ad07c2d2aec807683c68b30cc21 100644 --- a/runtime/starpu/include/morse_starpu.h +++ b/runtime/starpu/include/morse_starpu.h @@ -60,7 +60,14 @@ typedef struct starpu_conf starpu_conf_t; */ #if defined(CHAMELEON_USE_MPI) #undef STARPU_REDUX -#define starpu_insert_task(...) starpu_mpi_insert_task(MPI_COMM_WORLD, __VA_ARGS__) +//#define starpu_insert_task(...) starpu_mpi_insert_task(MPI_COMM_WORLD, __VA_ARGS__) +#define starpu_insert_task starpu_mpi_insert_task +#define starpu_mpi_codelet(_codelet_) MPI_COMM_WORLD, _codelet_ + +#else + +#define starpu_mpi_codelet(_codelet_) _codelet_ + #endif /* diff --git a/timing/time_zgemm.c b/timing/time_zgemm.c index c00f590c77b6c55413525a8582fb6f7fe9c31237..57e50ba0fc8ea0225acb7513f23b017e681ebe3d 100644 --- a/timing/time_zgemm.c +++ b/timing/time_zgemm.c @@ -26,13 +26,13 @@ #include "timing_zauxiliary.h" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_Complex64_t alpha, beta; PASTE_CODE_IPARAM_LOCALS( iparam ); - LDB = max(K, iparam[IPARAM_LDB]); - LDC = max(M, iparam[IPARAM_LDC]); + LDB = chameleon_max(K, iparam[IPARAM_LDB]); + LDC = chameleon_max(M, iparam[IPARAM_LDC]); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, K ); diff --git a/timing/time_zgemm_tile.c b/timing/time_zgemm_tile.c index 95d0d0c3d17a574ab508a1e5cc8308e0621745f1..7bd1ba7614b99ef773643b75ad9bc23581137992 100644 --- a/timing/time_zgemm_tile.c +++ b/timing/time_zgemm_tile.c @@ -26,14 +26,14 @@ #include "timing_zauxiliary.h" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_Complex64_t alpha, beta; PASTE_CODE_IPARAM_LOCALS( iparam ); - LDB = max(K, iparam[IPARAM_LDB]); - LDC = max(M, iparam[IPARAM_LDC]); + LDB = chameleon_max(K, iparam[IPARAM_LDB]); + LDC = chameleon_max(M, iparam[IPARAM_LDC]); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, K ); diff --git a/timing/time_zgetrf_incpiv.c b/timing/time_zgetrf_incpiv.c index de056859261560c10f5a2e57d54adbc4f6f0a5db..28a5814dd670cabedc596621ec2a01c8bcdbc37c 100644 --- a/timing/time_zgetrf_incpiv.c +++ b/timing/time_zgetrf_incpiv.c @@ -26,7 +26,7 @@ #include "timing_zauxiliary.h" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_desc_t *L; int *piv; @@ -44,7 +44,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_zplrnt(M, N, A, LDA, 3456); /* Allocate Workspace */ - MORSE_Alloc_Workspace_zgesv_incpiv( min(M,N), &L, &piv, P, Q); + MORSE_Alloc_Workspace_zgesv_incpiv( chameleon_min(M,N), &L, &piv, P, Q); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, MORSE_Complex64_t, A, LDA, N ); @@ -63,8 +63,8 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_zgetrs_incpiv( MorseNoTrans, N, NRHS, A, LDA, L, piv, X, LDB ); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, B, X, LDB, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), + &(dparam[IPARAM_ANORM]), + &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free( Acpy ); free( B ); free( X ); diff --git a/timing/time_zgetrf_incpiv_tile.c b/timing/time_zgetrf_incpiv_tile.c index 0aea8a953114a2cac15b35fd4c684051c429ff42..907f53ac3423f1fbce55a4966274d5da2e3e733d 100644 --- a/timing/time_zgetrf_incpiv_tile.c +++ b/timing/time_zgetrf_incpiv_tile.c @@ -25,7 +25,7 @@ #include "./timing.c" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_desc_t *descL; int *piv; @@ -52,7 +52,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_zplrnt_Tile(descA, 3456); /* Allocate Workspace */ - MORSE_Alloc_Workspace_zgesv_incpiv_Tile(min(M,N), &descL, &piv, P, Q); + MORSE_Alloc_Workspace_zgesv_incpiv_Tile(chameleon_min(M,N), &descL, &piv, P, Q); /* Save A for check */ if (check == 1){ diff --git a/timing/time_zgetrs_incpiv_tile.c b/timing/time_zgetrs_incpiv_tile.c index 491da4f43ff45f71ef728f908fa744ded2c7a1ac..3ba081e9cdbd64632b5a7d2c39e1cf61a9529e54 100644 --- a/timing/time_zgetrs_incpiv_tile.c +++ b/timing/time_zgetrs_incpiv_tile.c @@ -25,7 +25,7 @@ #include "./timing.c" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_desc_t *descL; int *piv; @@ -53,7 +53,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_zplrnt_Tile(descA, 3456); /* Allocate Workspace */ - MORSE_Alloc_Workspace_zgesv_incpiv_Tile(min(M,N), &descL, &piv, P, Q); + MORSE_Alloc_Workspace_zgesv_incpiv_Tile(chameleon_min(M,N), &descL, &piv, P, Q); /* Save A for check */ if (check == 1){ diff --git a/timing/time_zheevd_tile.c b/timing/time_zheevd_tile.c index d659b82138de39592c6925f0f2ce709560e15f77..56b08fa76c9a77d4f3e8198f55e4b4c39607074b 100644 --- a/timing/time_zheevd_tile.c +++ b/timing/time_zheevd_tile.c @@ -34,7 +34,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) int vec = MorseVec; int INFO; - LDA = max(LDA, N); + LDA = chameleon_max(LDA, N); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N); diff --git a/timing/time_zlange.c b/timing/time_zlange.c index 502ceee3c5b30e75fe394f2c2688ef0d367c0be3..25ffb956943f3dcf0d4cd08ed10fdadbea501d26 100644 --- a/timing/time_zlange.c +++ b/timing/time_zlange.c @@ -45,7 +45,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Check the solution */ if ( check ) { - double *work = (double*) malloc(max(M,N)*sizeof(double)); + double *work = (double*) malloc(chameleon_max(M,N)*sizeof(double)); normlapack = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(norm), M, N, A, LDA, work); result = fabs(normmorse - normlapack); switch(norm) { diff --git a/timing/time_zlange_tile.c b/timing/time_zlange_tile.c index 36af154fefdf261cd958308d9784e197050bbc2c..a12174a9418578a7929adb1ca23379aedd93a95c 100644 --- a/timing/time_zlange_tile.c +++ b/timing/time_zlange_tile.c @@ -47,7 +47,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) { /* Allocate Data */ PASTE_TILE_TO_LAPACK( descA, A, check, MORSE_Complex64_t, M, N ); - double *work = (double*) malloc(max(M,N)*sizeof(double)); + double *work = (double*) malloc(chameleon_max(M,N)*sizeof(double)); normlapack = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(norm), M, N, A, LDA, work); result = fabs(normmorse - normlapack); switch(norm) { diff --git a/timing/time_zposv.c b/timing/time_zposv.c index 979a2f5303dc298a80b2b46de68961a91871881e..67af772cff309d5aedb4075e2c72af046ad139e1 100644 --- a/timing/time_zposv.c +++ b/timing/time_zposv.c @@ -26,17 +26,17 @@ #include "timing_zauxiliary.h" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); MORSE_enum uplo = MorseUpper; - - LDA = max(LDA, N); - + + LDA = chameleon_max(LDA, N); + /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( X, 1, MORSE_Complex64_t, LDB, NRHS ); - + /* Initialiaze Data */ MORSE_zplghe((double)N, MorseUpperLower, N, A, LDA, 51 ); MORSE_zplrnt( N, NRHS, X, LDB, 5673 ); @@ -54,13 +54,13 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) if (check) { dparam[IPARAM_RES] = z_check_solution(N, N, NRHS, Acpy, LDA, B, X, LDB, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), + &(dparam[IPARAM_ANORM]), + &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(Acpy); free(B); } - free(A); free(X); + free(A); free(X); return 0; } diff --git a/timing/time_zposv_tile.c b/timing/time_zposv_tile.c index 205e06ae88a496ac74b9a79b562168937d0f904d..319f70e64e9194c51e95c4349b4a5b79cfdcf9d8 100644 --- a/timing/time_zposv_tile.c +++ b/timing/time_zposv_tile.c @@ -30,7 +30,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) PASTE_CODE_IPARAM_LOCALS( iparam ); MORSE_enum uplo = MorseUpper; - LDA = max(LDA, N); + LDA = chameleon_max(LDA, N); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, N, N ); diff --git a/timing/time_zpotrf.c b/timing/time_zpotrf.c index 0f4d46619dc57bf59e4df13e6cbd44b84ac691d2..6a242c229d7073ddefe94ca5ef48a7ff492a5cfd 100644 --- a/timing/time_zpotrf.c +++ b/timing/time_zpotrf.c @@ -31,7 +31,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) PASTE_CODE_IPARAM_LOCALS( iparam ); int uplo = MorseLower; - LDA = max(LDA, N); + LDA = chameleon_max(LDA, N); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N ); diff --git a/timing/time_zpotrf_tile.c b/timing/time_zpotrf_tile.c index 14fdc32fc19a98602e03266494a4193eefde0291..475aea6948124cfe987fe174a1bbd5ec25d3e90b 100644 --- a/timing/time_zpotrf_tile.c +++ b/timing/time_zpotrf_tile.c @@ -30,7 +30,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) PASTE_CODE_IPARAM_LOCALS( iparam ); int uplo = MorseUpper; - LDA = max(LDA, N); + LDA = chameleon_max(LDA, N); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, N, N ); diff --git a/timing/time_zpotri_tile.c b/timing/time_zpotri_tile.c index a8e8bcab46eff4e5c0ea61ea4f558c319c508cb7..07ef6007f8a71bf8fd275301ce0c3af344ca26b2 100644 --- a/timing/time_zpotri_tile.c +++ b/timing/time_zpotri_tile.c @@ -27,19 +27,19 @@ #include "./timing.c" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); MORSE_enum uplo = MorseLower; - LDA = max(LDA, N); + LDA = chameleon_max(LDA, N); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, N, N ); - /* - * Initialize Data - * It's done in static to avoid having the same sequence than one + /* + * Initialize Data + * It's done in static to avoid having the same sequence than one * the function we want to trace */ MORSE_zplghe_Tile( (double)N, MorseUpperLower, descA, 51 ); diff --git a/timing/time_zpotrs_tile.c b/timing/time_zpotrs_tile.c index c50e86b05ca860de6318b3e001a5357151996ae1..131393d41d10380c336cefd4210da8e75bcb8cf4 100644 --- a/timing/time_zpotrs_tile.c +++ b/timing/time_zpotrs_tile.c @@ -30,7 +30,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) PASTE_CODE_IPARAM_LOCALS( iparam ); int uplo = MorseUpper; - LDA = max(LDA, N); + LDA = chameleon_max(LDA, N); check = 1; /* Allocate Data */ diff --git a/timing/time_zsytrf_tile.c b/timing/time_zsytrf_tile.c index 6c1d880edc120245f527a447db4c55562bf1ea80..b3ea2ab4b705ef69ce3985b1f9c58f19938a84c8 100644 --- a/timing/time_zsytrf_tile.c +++ b/timing/time_zsytrf_tile.c @@ -30,7 +30,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) PASTE_CODE_IPARAM_LOCALS( iparam ); int uplo = MorseUpper; - LDA = max(LDA, N); + LDA = chameleon_max(LDA, N); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, N, N ); diff --git a/timing/time_ztrsm.c b/timing/time_ztrsm.c index 1216bb19792c7e71d8986f01855dc83f6f1fd882..96413f96fe8e07dcc1a353cd8337c6117d502b31 100644 --- a/timing/time_ztrsm.c +++ b/timing/time_ztrsm.c @@ -30,8 +30,8 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_Complex64_t alpha; PASTE_CODE_IPARAM_LOCALS( iparam ); - - LDA = max( LDA, N ); + + LDA = chameleon_max( LDA, N ); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N ); @@ -57,10 +57,10 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Check the solution */ if (check) { - dparam[IPARAM_RES] = z_check_trsm( MorseLeft, MorseUpper, MorseNoTrans, MorseUnit, + dparam[IPARAM_RES] = z_check_trsm( MorseLeft, MorseUpper, MorseNoTrans, MorseUnit, N, NRHS, alpha, A, LDA, B, B2, LDB, - &(dparam[IPARAM_ANORM]), + &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(B2); diff --git a/timing/timing.c b/timing/timing.c index 28bb1c23a6e2b514d101fe3fe44177819492a460..533fc41c1a582a0f41a1de5ad79c1f304f82a2dd 100644 --- a/timing/timing.c +++ b/timing/timing.c @@ -700,9 +700,9 @@ main(int argc, char *argv[]) { { if ( nx > 0 ) { iparam[IPARAM_M] = i; - iparam[IPARAM_N] = max(1, i/nx); + iparam[IPARAM_N] = chameleon_max(1, i/nx); } else if ( mx > 0 ) { - iparam[IPARAM_M] = max(1, i/mx); + iparam[IPARAM_M] = chameleon_max(1, i/mx); iparam[IPARAM_N] = i; } else { if ( m == -1 ) diff --git a/timing/timing.h b/timing/timing.h index 948866a778d05dd648a393b2a35b357851d35b2f..fd2ac6ac3549b75900eab0d622addc5d083f080a 100644 --- a/timing/timing.h +++ b/timing/timing.h @@ -86,9 +86,9 @@ enum dparam_timing { int64_t N = iparam[IPARAM_N]; \ int64_t K = iparam[IPARAM_K]; \ int64_t NRHS = K; \ - int64_t LDA = max(M, iparam[IPARAM_LDA]); \ - int64_t LDB = max(N, iparam[IPARAM_LDB]); \ - int64_t LDC = max(K, iparam[IPARAM_LDC]); \ + int64_t LDA = chameleon_max(M, iparam[IPARAM_LDA]); \ + int64_t LDB = chameleon_max(N, iparam[IPARAM_LDB]); \ + int64_t LDC = chameleon_max(K, iparam[IPARAM_LDC]); \ int64_t IB = iparam[IPARAM_IB]; \ int64_t MB = iparam[IPARAM_MB]; \ int64_t NB = iparam[IPARAM_NB]; \ diff --git a/timing/timing_zauxiliary.c b/timing/timing_zauxiliary.c index 45c844251b025d144f7a911a6e3c0f69a741edf0..2d9dba18dcddb9371939207a22ff7c9be2aad7d4 100644 --- a/timing/timing_zauxiliary.c +++ b/timing/timing_zauxiliary.c @@ -33,7 +33,7 @@ int z_check_orthogonality(int M, int N, int LDQ, MORSE_Complex64_t *Q) double normQ; int info_ortho; int i; - int minMN = min(M, N); + int minMN = chameleon_min(M, N); double eps; double *work = (double *)malloc(minMN*sizeof(double)); @@ -89,7 +89,7 @@ int z_check_QRfactorization(int M, int N, MORSE_Complex64_t *A1, MORSE_Complex64 MORSE_Complex64_t *Ql = (MORSE_Complex64_t *)malloc(M*N*sizeof(MORSE_Complex64_t)); MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(M*N*sizeof(MORSE_Complex64_t)); - double *work = (double *)malloc(max(M,N)*sizeof(double)); + double *work = (double *)malloc(chameleon_max(M,N)*sizeof(double)); alpha=1.0; beta=0.0; @@ -226,7 +226,7 @@ double z_check_gemm(MORSE_enum transA, MORSE_enum transB, int M, int N, int K, { MORSE_Complex64_t beta_const = -1.0; double Rnorm; - double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); + double *work = (double *)malloc(chameleon_max(K,chameleon_max(M, N))* sizeof(double)); *Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work); *Cmorsenorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cmorse, LDC, work); @@ -256,7 +256,7 @@ double z_check_trsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum trans, MORSE_en { MORSE_Complex64_t beta_const = -1.0; double Rnorm; - double *work = (double *)malloc(max(M, NRHS)* sizeof(double)); + double *work = (double *)malloc(chameleon_max(M, NRHS)* sizeof(double)); /*double eps = LAPACKE_dlamch_work('e');*/ *Binitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, Bref, LDB, work); @@ -272,7 +272,7 @@ double z_check_trsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum trans, MORSE_en Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, Bref, LDB, work); Rnorm = Rnorm / *Blapacknorm; - /* max(M,NRHS) * eps);*/ + /* chameleon_max(M,NRHS) * eps);*/ free(work); @@ -291,7 +291,7 @@ double z_check_solution(int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, double Rnorm = -1.00; MORSE_Complex64_t zone = 1.0; MORSE_Complex64_t mzone = -1.0; - double *work = (double *)malloc(max(M, N)* sizeof(double)); + double *work = (double *)malloc(chameleon_max(M, N)* sizeof(double)); *anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, A, LDA, work); *xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, NRHS, X, LDB, work);