diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 68a77fe561d452f473a2eef963774b4c3675bcc0..4c447adeeb43bac8f9d59709d3afee92efb4fbc0 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -66,8 +66,10 @@ set(ZSRC pztrsm.c pztrsmpl.c pztradd.c + pzlascal.c ### zgeadd.c + zlascal.c zgemm.c zhemm.c zher2k.c diff --git a/compute/pzlascal.c b/compute/pzlascal.c new file mode 100644 index 0000000000000000000000000000000000000000..524c113022d68ea916d744087e8d3444ff25d64a --- /dev/null +++ b/compute/pzlascal.c @@ -0,0 +1,103 @@ +/** + * + * @file pzlascal.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.8.0 + * @author Dalal Sukkari + * @date 2010-11-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m, n) A, m, n +/** + * Parallel scale of a matrix A + **/ +void morse_pzlascal(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + + int tempmm, tempnn, tempmn, tempnm; + int m, n; + int ldam, ldan; + int minmnt = min(A->mt, A->nt); + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + + RUNTIME_options_init(&options, morse, sequence, request); + + switch(uplo) { + case MorseLower: + for (n = 0; n < minmnt; n++) { + tempnm = n == A->mt-1 ? A->m-n*A->mb : A->mb; + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + ldan = BLKLDD(A, n); + + MORSE_TASK_zlascal( + &options, + MorseLower, tempnm, tempnn, A->mb, + alpha, A(n, n), ldan); + + for (m = n+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; + ldam = BLKLDD(A, m); + + MORSE_TASK_zlascal( + &options, + MorseUpperLower, tempmm, tempnn, A->mb, + alpha, A(m, n), ldam); + } + } + break; + + case MorseUpper: + for (m = 0; m < minmnt; m++) { + tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; + tempmn = m == A->nt-1 ? A->n-m*A->nb : A->nb; + ldam = BLKLDD(A, m); + + MORSE_TASK_zlascal( + &options, + MorseUpper, tempmm, tempmn, A->mb, + alpha, A(m, m), ldam); + + for (n = m+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + MORSE_TASK_zlascal( + &options, + MorseUpperLower, tempmm, tempnn, A->mb, + alpha, A(m, n), ldam); + } + } + break; + + case MorseUpperLower: + default: + for (m = 0; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; + ldam = BLKLDD(A, m); + + for (n = 0; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + MORSE_TASK_zlascal( + &options, + MorseUpperLower, tempmm, tempnn, A->mb, + alpha, A(m, n), ldam); + } + } + } + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); +} diff --git a/compute/zlascal.c b/compute/zlascal.c new file mode 100644 index 0000000000000000000000000000000000000000..8c7d2f36224eee1e22e46ebad2e2c99606c94080 --- /dev/null +++ b/compute/zlascal.c @@ -0,0 +1,281 @@ +/** + * + * @file zlascal.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.8.0 + * @author Dalal Sukkari + * @date 2010-11-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zlascal - Scales a matrix by the scalar alpha as in + * ScaLAPACK pzlascal(). + * + * \f[ A = \alpha A \f], + * + * alpha is a scalar, and A a general, upper or lower trapezoidal matrix. + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A: + * = MorseUpperLower: A is a general matrix. + * = MorseUpper: A is an upper trapezoidal matrix. + * = MorseLower: A is a lower trapezoidal matrix. + * + * @param[in] M + * M specifies the number of rows of the matrix A. M >= 0. + * + * @param[in] N + * N specifies the number of columns of the matrix A. N >= 0. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in,out] A + * A is a LDA-by-N matrix. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zlascal_Tile + * @sa MORSE_clascal + * @sa MORSE_dlascal + * @sa MORSE_slascal + * + ******************************************************************************/ +int MORSE_zlascal(MORSE_enum uplo, int M, int N, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA) +{ + int NB; + int status; + MORSE_desc_t descA; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zlascal", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (uplo != MorseUpper && uplo != MorseLower && uplo != MorseUpperLower) { + morse_error("MORSE_zlascal", "illegal value of uplo"); + return -1; + } + if (M < 0) { + morse_error("MORSE_zlascal", "illegal value of M"); + return -2; + } + if (N < 0) { + morse_error("MORSE_zlascal", "illegal value of N"); + return -3; + } + if (LDA < max(1, M)) { + morse_error("MORSE_zlascal", "illegal value of LDA"); + return -6; + } + + /* Quick return */ + if (M == 0 || N == 0 || + (alpha == (MORSE_Complex64_t)1.0)) + return MORSE_SUCCESS; + + /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGEMM, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zlascal", "morse_tune() failed"); + return status; + } + + /* Set MT & NT & KT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N , 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zlascal_Tile_Async( + uplo, alpha, &descA, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); + RUNTIME_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_dynamic_sync();*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zlascal_Tile - Scales a matrix by the scalar alpha as in + * ScaLAPACK pzlascal(). + * + * \f[ A = \alpha A \f], + * + * alpha is a scalar, and A a general, upper or lower trapezoidal matrix. + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A: + * = MorseUpperLower: A is a general matrix. + * = MorseUpper: A is an upper trapezoidal matrix. + * = MorseLower: A is a lower trapezoidal matrix. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in] A + * A is a LDA-by-N matrix. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zlascal + * @sa MORSE_zlascal_Tile_Async + * @sa MORSE_clascal_Tile + * @sa MORSE_dlascal_Tile + * @sa MORSE_slascal_Tile + * + ******************************************************************************/ +int MORSE_zlascal_Tile(MORSE_enum uplo, + MORSE_Complex64_t alpha, MORSE_desc_t *A) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zlascal_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zlascal_Tile_Async(uplo, alpha, A, sequence, &request); + RUNTIME_sequence_wait(morse, sequence); + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zlascal_Tile_Async - Scales a matrix by the scalar alpha as in + * ScaLAPACK pzlascal(). + * Non-blocking equivalent of MORSE_zlascal_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zlascal + * @sa MORSE_zlascal_Tile + * @sa MORSE_clascal_Tile_Async + * @sa MORSE_dlascal_Tile_Async + * @sa MORSE_slascal_Tile_Async + * + ******************************************************************************/ +int MORSE_zlascal_Tile_Async(MORSE_enum uplo, + MORSE_Complex64_t alpha, MORSE_desc_t *A, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_desc_t descA; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zlascal_Tile_Async", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zlascal_Tile_Async", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zlascal_Tile_Async", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zlascal_Tile_Async", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } else { + descA = *A; + } + /* Check input arguments */ + if (uplo != MorseUpper && uplo != MorseLower && uplo != MorseUpperLower) { + morse_error("MORSE_zlascal", "illegal value of uplo"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + + if ( (descA.i%descA.mb != 0) || (descA.j%descA.nb != 0) ) { + morse_error("MORSE_zlascal", "start indexes have to be multiple of tile size"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + + /* Quick return */ + if ( (descA.m == 0) || (descA.n == 0) || + (alpha == (MORSE_Complex64_t)1.0) ) + return MORSE_SUCCESS; + + morse_pzlascal( uplo, alpha, A, sequence, request); + + return MORSE_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index cf34dcd1afd11c792691da2ce837c1d97daea5e7..78656825adbe77b371ca03fcd30fcf93b3ca3da4 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -118,6 +118,7 @@ void morse_pzlanhe(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re #endif void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzlascal(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlaset( MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlaset2(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlaswp(MORSE_desc_t *B, int *IPIV, int inc, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index 6dc104b74a527d790c76b5f0e9b3bc584cb1d8f3..a0253041cf5b526e957538c3f3c2c4a8a3bb1bb3 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -33,6 +33,7 @@ set(ZSRC core_dzasum.c core_zaxpy.c core_zgeadd.c + core_zlascal.c core_zgelqt.c core_zgemm.c core_zgeqrt.c diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c new file mode 100644 index 0000000000000000000000000000000000000000..bc17934d3b06cec9c58fe7dc95456f7d6cd5460c --- /dev/null +++ b/coreblas/compute/core_zlascal.c @@ -0,0 +1,104 @@ +/** + * @file core_zlascal.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.8.0 + * @author Dalal Sukkari + * @date 2015-11-05 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/coreblas.h" +#include "coreblas/include/cblas.h" +#include <math.h> + +/** + ******************************************************************************* + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_zlascal scales a two-dimensional matrix A. As opposite to + * CORE_zlascl(), no checks is performed to prevent under/overflow. This should + * have been done at higher level. + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A: + * = MorseUpperLower: A is a general matrix. + * = MorseUpper: A is an upper trapezoidal matrix. + * = MorseLower: A is a lower trapezoidal matrix. + * + * @param[in] m is the number of rows of the matrix A. m >= 0 + * + * @param[in] n is the number of columns of the matrix A. n >= 0 + * + * @param[in] alpha + * The scalar factor. + * + * @param[in,out] A is the matrix to be multiplied by alpha + * + * @param[in] lda is the leading dimension of the array A. lda >= max(1,m). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ +int +CORE_zlascal( MORSE_enum uplo, int m, int n, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int lda ) +{ + int i; + + if ( (uplo != MorseUpperLower) && + (uplo != MorseUpper) && + (uplo != MorseLower)) + { + coreblas_error(1, "illegal value of uplo"); + return -1; + } + + if (m < 0) { + coreblas_error(2, "Illegal value of m"); + return -2; + } + if (n < 0) { + coreblas_error(3, "Illegal value of n"); + return -3; + } + if ( (lda < max(1,m)) && (m > 0) ) { + coreblas_error(6, "Illegal value of lda"); + return -6; + } + + switch ( uplo ) { + case MorseUpper: + for(i=0; i<n; i++) { + cblas_zscal( min( i+1, m ), CBLAS_SADDR(alpha), A+i*lda, 1 ); + } + break; + + case MorseLower: + for(i=0; i<n; i++) { + cblas_zscal( max( m, m-i ), CBLAS_SADDR(alpha), A+i*lda, 1 ); + } + break; + default: + if (m == lda) { + cblas_zscal( m*n, CBLAS_SADDR(alpha), A, 1 ); + } + else { + for(i=0; i<n; i++) { + cblas_zscal( m, CBLAS_SADDR(alpha), A+i*lda, 1 ); + } + } + } + + return MORSE_SUCCESS; +} diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c index aadcbbe5005334a4f987e40bf09ba2cdd3bebb49..47a4f09c6348a51a108253cd0d289a58a0fad0fc 100644 --- a/coreblas/compute/core_ztsmlq.c +++ b/coreblas/compute/core_ztsmlq.c @@ -259,10 +259,10 @@ int CORE_ztsmlq(MORSE_enum side, MORSE_enum trans, CORE_zparfb( side, trans, MorseForward, MorseRowwise, mi, ni, M2, N2, kb, 0, - &A1[LDA1*jc+ic], LDA1, + A1 + LDA1 * jc + ic, LDA1, A2, LDA2, - &V[i], LDV, - &T[LDT*i], LDT, + V + i, LDV, + T + i * LDT, LDT, WORK, LDWORK); } return MORSE_SUCCESS; diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h index fd21173de2f4b639dd058e353e01271925de3bad..4c6ee39cae6b975da25162e7edb852db83cb484a 100644 --- a/coreblas/include/coreblas_z.h +++ b/coreblas/include/coreblas_z.h @@ -63,6 +63,8 @@ int CORE_zgeadd(MORSE_enum trans, int M, int N, const MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB); +int CORE_zlascal( MORSE_enum uplo, int m, int n, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int lda ); int CORE_zgelqt(int M, int N, int IB, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *T, int LDT, diff --git a/cudablas/compute/cuda_zgelqt.c b/cudablas/compute/cuda_zgelqt.c index ff18cc171eba1573d417cedcd925400e719d737a..87307f51076498abd5547c37ad23871adb6c921b 100644 --- a/cudablas/compute/cuda_zgelqt.c +++ b/cudablas/compute/cuda_zgelqt.c @@ -26,17 +26,17 @@ #if defined(CHAMELEON_USE_MAGMA) int CUDA_zgelqt( - magma_int_t m, magma_int_t n, magma_int_t nb, - magmaDoubleComplex *da, magma_int_t ldda, - magmaDoubleComplex *v, magma_int_t ldv, - magmaDoubleComplex *dt, magma_int_t lddt, - magmaDoubleComplex *t, magma_int_t ldt, - magmaDoubleComplex *dd, - magmaDoubleComplex *d, magma_int_t ldd, - magmaDoubleComplex *tau, - magmaDoubleComplex *hwork, - magmaDoubleComplex *dwork, - CUstream stream) + magma_int_t m, magma_int_t n, magma_int_t nb, + magmaDoubleComplex *da, magma_int_t ldda, + magmaDoubleComplex *v, magma_int_t ldv, + magmaDoubleComplex *dt, magma_int_t lddt, + magmaDoubleComplex *t, magma_int_t ldt, + magmaDoubleComplex *dd, + magmaDoubleComplex *d, magma_int_t ldd, + magmaDoubleComplex *tau, + magmaDoubleComplex *hwork, + magmaDoubleComplex *dwork, + CUstream stream) { #define da_ref(a_1,a_2) ( da+(a_2)*(ldda) + (a_1)) #define v_ref(a_1,a_2) ( v+(a_2)*(ldv) + (a_1)) @@ -47,17 +47,17 @@ int CUDA_zgelqt( double _Complex one=1.; if (m < 0) { - return -1; + return -1; } else if (n < 0) { - return -2; + return -2; } else if (ldda < max(1,m)) { - return -4; + return -4; } k = min(m,n); if (k == 0) { - hwork[0] = *((magmaDoubleComplex*) &one); - return MAGMA_SUCCESS; + hwork[0] = *((magmaDoubleComplex*) &one); + return MAGMA_SUCCESS; } /* lower parts of little T must be zero: memset to 0 for simplicity */ @@ -92,7 +92,7 @@ int CUDA_zgelqt( magma_queue_sync( stream ); /* Form the triangular factor of the block reflector on the host - H = H'(i+ib-1) . . . H(i+1) H(i) */ + H = H'(i+ib-1) . . . H(i+1) H(i) */ CORE_zgelqt(ib, cols, ib, (double _Complex*) v_ref(0,i), ib, (double _Complex*) t_ref(0,0), ib, @@ -100,7 +100,7 @@ int CUDA_zgelqt( (double _Complex*) hwork); /* put 0s in the lower triangular part of a panel (and 1s on the - diagonal); copy the lower triangular in d */ + diagonal); copy the lower triangular in d */ CORE_zgesplit(MorseRight, MorseUnit, ib, min(ib,cols), (double _Complex*) v_ref(0,i), ib, (double _Complex*) d, ib); diff --git a/cudablas/compute/cuda_zgeqrt.c b/cudablas/compute/cuda_zgeqrt.c index 1ba8caddc26adb0be4db9bd7b15365d83f96a876..6215333e63be8308ca7c80d34a2bd71896e08f7e 100644 --- a/cudablas/compute/cuda_zgeqrt.c +++ b/cudablas/compute/cuda_zgeqrt.c @@ -26,17 +26,17 @@ #if defined(CHAMELEON_USE_MAGMA) int CUDA_zgeqrt( - magma_int_t m, magma_int_t n, magma_int_t nb, - magmaDoubleComplex *da, magma_int_t ldda, - magmaDoubleComplex *v, magma_int_t ldv, - magmaDoubleComplex *dt, magma_int_t lddt, - magmaDoubleComplex *t, magma_int_t ldt, - magmaDoubleComplex *dd, - magmaDoubleComplex *d, magma_int_t ldd, - magmaDoubleComplex *tau, - magmaDoubleComplex *hwork, - magmaDoubleComplex *dwork, - CUstream stream) + magma_int_t m, magma_int_t n, magma_int_t nb, + magmaDoubleComplex *da, magma_int_t ldda, + magmaDoubleComplex *v, magma_int_t ldv, + magmaDoubleComplex *dt, magma_int_t lddt, + magmaDoubleComplex *t, magma_int_t ldt, + magmaDoubleComplex *dd, + magmaDoubleComplex *d, magma_int_t ldd, + magmaDoubleComplex *tau, + magmaDoubleComplex *hwork, + magmaDoubleComplex *dwork, + CUstream stream) { #define da_ref(a_1,a_2) ( da+(a_2)*(ldda) + (a_1)) #define v_ref(a_1,a_2) ( v+(a_2)*(ldv) + (a_1)) @@ -45,7 +45,6 @@ int CUDA_zgeqrt( int i, k, ib, old_i, old_ib, rows, cols; double _Complex one=1.; - int i1, i2; if (m < 0) { return -1; @@ -62,7 +61,7 @@ int CUDA_zgeqrt( } /* lower parts of little T must be zero: memset to 0 for simplicity */ - memset(t_ref(0,0), 0, nb*nb*sizeof(magmaDoubleComplex)); + memset(t_ref(0,0), 0, nb*n*sizeof(magmaDoubleComplex)); cudaMemsetAsync(dt_ref(0,0), 0, nb*n*sizeof(magmaDoubleComplex), stream); if ( (nb > 1) && (nb < k) ) { @@ -101,7 +100,7 @@ int CUDA_zgeqrt( (double _Complex*) hwork); /* Put 0s in the upper triangular part of a panel (and 1s on the - diagonal); copy the upper triangular in d. */ + diagonal); copy the upper triangular in d. */ CORE_zgesplit(MorseLeft, MorseUnit, min(rows,ib), ib, (double _Complex*) v_ref(i, 0), ldv, (double _Complex*) d, ib); diff --git a/cudablas/compute/cuda_ztsmlq.c b/cudablas/compute/cuda_ztsmlq.c index 0a44d580020444d72d8740f8989aa73f0245ded7..6c525138af35fc407cb20b26da094d53b804a5b5 100644 --- a/cudablas/compute/cuda_ztsmlq.c +++ b/cudablas/compute/cuda_ztsmlq.c @@ -55,7 +55,7 @@ int CUDA_ztsmlq( NW = IB; } else { - NW = N1; + NW = M1; } if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) { diff --git a/include/morse_kernels.h b/include/morse_kernels.h index 0680893add97d4c6ca45f27cdf902ea3e3cf7cc5..2d29233992f8ea1e80d7f262caf5b89437f1f5a9 100644 --- a/include/morse_kernels.h +++ b/include/morse_kernels.h @@ -69,6 +69,7 @@ typedef enum morse_kernel_e { MORSE_UNMQR, MORSE_GEADD, + MORSE_LASCAL, MORSE_LACPY, MORSE_LAG2C, MORSE_LAG2Z, diff --git a/include/morse_z.h b/include/morse_z.h index 8335b892513343c1adef902a5c7142122f44b9b4..3d3a2dbf578468d5086cd0895ecd4f91d621c82b 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -81,6 +81,7 @@ double MORSE_zlanhe(MORSE_enum norm, MORSE_enum uplo, int N, MORSE_Complex64_t * #endif double MORSE_zlansy(MORSE_enum norm, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA); double MORSE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, int M, int N, MORSE_Complex64_t *A, int LDA); +int MORSE_zlascal(MORSE_enum uplo, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA); int MORSE_zlaset(MORSE_enum uplo, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_Complex64_t *A, int LDA); //int MORSE_zlaswp(int N, MORSE_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX); //int MORSE_zlaswpc(int N, MORSE_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX); @@ -157,6 +158,7 @@ double MORSE_zlanhe_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A); #endif double MORSE_zlansy_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A); double MORSE_zlantr_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A); +int MORSE_zlascal_Tile(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A); int MORSE_zlaset_Tile(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_desc_t *A); //int MORSE_zlaswp_Tile(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX); //int MORSE_zlaswpc_Tile(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX); @@ -230,6 +232,7 @@ int MORSE_zlanhe_Tile_Async(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, d #endif int MORSE_zlansy_Tile_Async(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *value, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zlantr_Tile_Async(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A, double *value, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zlascal_Tile_Async(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zlaset_Tile_Async(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); //int MORSE_zlaswp_Tile_Async(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX, MORSE_sequence_t *sequence, MORSE_request_t *request); //int MORSE_zlaswpc_Tile_Async(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/include/runtime_z.h b/include/runtime_z.h index 1d5c16016b888e7726c3f681349a00ce8c1b7100..3ed09528ba9d80d1a42c2b5bc4248da91c7991e7 100644 --- a/include/runtime_z.h +++ b/include/runtime_z.h @@ -50,6 +50,11 @@ void MORSE_TASK_zgeadd(const MORSE_option_t *options, MORSE_enum trans, int m, int n, int nb, MORSE_Complex64_t alpha, const MORSE_desc_t *A, int Am, int An, int lda, MORSE_Complex64_t beta, const MORSE_desc_t *B, int Bm, int Bn, int ldb); +void MORSE_TASK_zlascal(const MORSE_option_t *options, + MORSE_enum uplo, + int m, int n, int nb, + MORSE_Complex64_t alpha, + const MORSE_desc_t *A, int Am, int An, int lda); void MORSE_TASK_zbrdalg(const MORSE_option_t *options, MORSE_enum uplo, int N, int NB, diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt index 1a9bef5d8d610a98653af83b4c040d6cdb46b5b3..e67975d6b8b507587fe9978088d4c0d9244ddc05 100644 --- a/runtime/quark/CMakeLists.txt +++ b/runtime/quark/CMakeLists.txt @@ -10,8 +10,8 @@ # # @file CMakeLists.txt # -# @project MORSE -# MORSE is a software package provided by: +# @project CHAMELEON +# CHAMELEON is a software package provided by: # Inria Bordeaux - Sud-Ouest, # Univ. of Tennessee, # King Abdullah Univesity of Science and Technology @@ -22,6 +22,7 @@ # @author Cedric Castagnede # @author Emmanuel Agullo # @author Mathieu Faverge +# @author Florent Pruvost # @date 13-07-2012 # ### @@ -107,6 +108,7 @@ set(ZSRC # LAPACK ################## codelets/codelet_zgeadd.c + codelets/codelet_zlascal.c codelets/codelet_zgelqt.c codelets/codelet_zgeqrt.c codelets/codelet_zgessm.c diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c index 1d950a22e1905703fd06efc7ce5aefc2afcfbf7a..875471e893bde9d5784a203c6816e947ee89a426 100644 --- a/runtime/quark/codelets/codelet_zaxpy.c +++ b/runtime/quark/codelets/codelet_zaxpy.c @@ -31,8 +31,8 @@ void MORSE_TASK_zaxpy(const MORSE_option_t *options, const MORSE_desc_t *A, int Am, int An, int incA, const MORSE_desc_t *B, int Bm, int Bn, int incB) { - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_AXPY; + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_AXPY; QUARK_Insert_Task(opt->quark, CORE_zaxpy_quark, (Quark_Task_Flags*)opt, sizeof(int), &M, VALUE, sizeof(MORSE_Complex64_t), alpha, VALUE, diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c index 3fb14a416ad1efb7df68fcb26204013ceed4ab81..a0cc3b1f88ceb70d2c13dea1bd613d1384a742c0 100644 --- a/runtime/quark/codelets/codelet_zgelqt.c +++ b/runtime/quark/codelets/codelet_zgelqt.c @@ -31,7 +31,7 @@ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c index 0d817631ffe3acebf408cce9064f06e260a1b059..d1906b28da022d7e311541ae8d25b8caf9b3ca8f 100644 --- a/runtime/quark/codelets/codelet_zgeqrt.c +++ b/runtime/quark/codelets/codelet_zgeqrt.c @@ -31,7 +31,7 @@ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c index 799b7ac4bc1a01efc5cdf01c791cee68aa44fe7f..2b2fd9da49a5ace36c50cb0c38f6d8b5d7cbefb9 100644 --- a/runtime/quark/codelets/codelet_zlacpy.c +++ b/runtime/quark/codelets/codelet_zlacpy.c @@ -36,6 +36,19 @@ * @ingroup CORE_MORSE_Complex64_t * **/ +static inline void CORE_zlacpy_quark(Quark *quark) +{ + MORSE_enum uplo; + int M; + int N; + MORSE_Complex64_t *A; + int LDA; + MORSE_Complex64_t *B; + int LDB; + + quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB); + CORE_zlacpy(uplo, M, N, A, LDA, B, LDB); +} void MORSE_TASK_zlacpy(const MORSE_option_t *options, MORSE_enum uplo, int m, int n, int nb, @@ -55,18 +68,3 @@ void MORSE_TASK_zlacpy(const MORSE_option_t *options, 0); } - -void CORE_zlacpy_quark(Quark *quark) -{ - MORSE_enum uplo; - int M; - int N; - MORSE_Complex64_t *A; - int LDA; - MORSE_Complex64_t *B; - int LDB; - - quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB); - CORE_zlacpy(uplo, M, N, A, LDA, B, LDB); -} - diff --git a/runtime/quark/codelets/codelet_zlascal.c b/runtime/quark/codelets/codelet_zlascal.c new file mode 100644 index 0000000000000000000000000000000000000000..ab96da473c760287503e0a995452e0ba75203d52 --- /dev/null +++ b/runtime/quark/codelets/codelet_zlascal.c @@ -0,0 +1,70 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014, 2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_zlascal.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + **/ + +#include "runtime/quark/include/morse_quark.h" + +/***************************************************************************//** + * + * @ingroup CORE_MORSE_Complex64_t + * + **/ +static inline void CORE_zlascal_quark(Quark *quark) +{ + MORSE_enum uplo; + int M; + int N; + MORSE_Complex64_t alpha; + MORSE_Complex64_t *A; + int LDA; + + quark_unpack_args_6(quark, uplo, M, N, alpha, A, LDA); + CORE_zlascal(uplo, M, N, alpha, A, LDA); +} + +void MORSE_TASK_zlascal(const MORSE_option_t *options, + MORSE_enum uplo, + int m, int n, int nb, + MORSE_Complex64_t alpha, + const MORSE_desc_t *A, int Am, int An, int lda) +{ + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_LASCAL; + QUARK_Insert_Task(opt->quark, CORE_zlascal_quark, (Quark_Task_Flags*)opt, + sizeof(MORSE_enum), &uplo, VALUE, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(MORSE_Complex64_t), alpha, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INOUT, + sizeof(int), &lda, VALUE, + 0); +} + + diff --git a/runtime/quark/codelets/codelet_ztslqt.c b/runtime/quark/codelets/codelet_ztslqt.c index e59162cb369a1bb1a55732f8b5baf51618964e2e..1f2e181b7e360ac03ecd406e934d9522937e6452 100644 --- a/runtime/quark/codelets/codelet_ztslqt.c +++ b/runtime/quark/codelets/codelet_ztslqt.c @@ -30,10 +30,8 @@ **/ #include "runtime/quark/include/morse_quark.h" -#undef REAL -#define COMPLEX -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * @@ -119,11 +117,11 @@ void MORSE_TASK_ztslqt(const MORSE_option_t *options, sizeof(int), &m, VALUE, sizeof(int), &n, VALUE, sizeof(int), &ib, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_L, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_L | LOCALITY, sizeof(int), &lda1, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT, sizeof(int), &lda2, VALUE, - sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT, + sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT, sizeof(int), &ldt, VALUE, sizeof(MORSE_Complex64_t)*nb, NULL, SCRATCH, sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, diff --git a/runtime/quark/codelets/codelet_ztsmlq.c b/runtime/quark/codelets/codelet_ztsmlq.c index 645630434f22698efa7e3037f70203bb64ed18fd..81dc27110abc0f7c761542fba7e4adafd2c395cf 100644 --- a/runtime/quark/codelets/codelet_ztsmlq.c +++ b/runtime/quark/codelets/codelet_ztsmlq.c @@ -32,7 +32,7 @@ **/ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * diff --git a/runtime/quark/codelets/codelet_ztsmqr.c b/runtime/quark/codelets/codelet_ztsmqr.c index ade4d922ea8dfe3898312e2c1daeb9552396edb4..aaff66e2830d3a352d9858a652025ce14e88d4d5 100644 --- a/runtime/quark/codelets/codelet_ztsmqr.c +++ b/runtime/quark/codelets/codelet_ztsmqr.c @@ -32,7 +32,7 @@ **/ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * diff --git a/runtime/quark/codelets/codelet_ztsqrt.c b/runtime/quark/codelets/codelet_ztsqrt.c index aeaf06fd4beb98199a9f0312dc4fa3d954abfe15..76546d31e37c9a07f8e77ab1a2f61ffddbf52d13 100644 --- a/runtime/quark/codelets/codelet_ztsqrt.c +++ b/runtime/quark/codelets/codelet_ztsqrt.c @@ -30,10 +30,8 @@ **/ #include "runtime/quark/include/morse_quark.h" -#undef REAL -#define COMPLEX -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * @@ -108,11 +106,11 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options, sizeof(int), &m, VALUE, sizeof(int), &n, VALUE, sizeof(int), &ib, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_U | LOCALITY, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_U | LOCALITY, sizeof(int), &lda1, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT, sizeof(int), &lda2, VALUE, - sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT, + sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT, sizeof(int), &ldt, VALUE, sizeof(MORSE_Complex64_t)*nb, NULL, SCRATCH, sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, diff --git a/runtime/quark/codelets/codelet_zttlqt.c b/runtime/quark/codelets/codelet_zttlqt.c index 423708140a7f35fbe9f0aa8f9ec9bff90aa9382e..98b42550badcdc8053a720b467e32b04562ca32f 100644 --- a/runtime/quark/codelets/codelet_zttlqt.c +++ b/runtime/quark/codelets/codelet_zttlqt.c @@ -30,10 +30,8 @@ **/ #include "runtime/quark/include/morse_quark.h" -#undef REAL -#define COMPLEX -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * @@ -120,9 +118,9 @@ void MORSE_TASK_zttlqt(const MORSE_option_t *options, sizeof(int), &m, VALUE, sizeof(int), &n, VALUE, sizeof(int), &ib, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT/**/, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT, sizeof(int), &lda1, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT/**/|LOCALITY, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY, sizeof(int), &lda2, VALUE, sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT, sizeof(int), &ldt, VALUE, diff --git a/runtime/quark/codelets/codelet_zttmlq.c b/runtime/quark/codelets/codelet_zttmlq.c index 8d9c9a3d27b472ee37620a79999d146fff6a75b5..cab48f44c9701c937ceeb596b87498a3bd4a7bdf 100644 --- a/runtime/quark/codelets/codelet_zttmlq.c +++ b/runtime/quark/codelets/codelet_zttmlq.c @@ -30,7 +30,7 @@ **/ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * @@ -102,7 +102,7 @@ * @param[in] LDV * The leading dimension of the array V. LDV >= max(1,K). * - * @param[out] T + * @param[in] T * The IB-by-N1 triangular factor T of the block reflector. * T is upper triangular by block (economic storage); * The rest of the array is not referenced. diff --git a/runtime/quark/codelets/codelet_zttmqr.c b/runtime/quark/codelets/codelet_zttmqr.c index 15b93b4b4478c4ec0142a3d9071aaa7f460d7169..849a5454d18cd3961205a47d95cf08cd63552314 100644 --- a/runtime/quark/codelets/codelet_zttmqr.c +++ b/runtime/quark/codelets/codelet_zttmqr.c @@ -30,7 +30,7 @@ **/ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * diff --git a/runtime/quark/codelets/codelet_zttqrt.c b/runtime/quark/codelets/codelet_zttqrt.c index e6ed35f0b17d92959a998febb3b1dd00465c9220..06106fc5a84299a1874116eb04670662470ecd69 100644 --- a/runtime/quark/codelets/codelet_zttqrt.c +++ b/runtime/quark/codelets/codelet_zttqrt.c @@ -30,10 +30,8 @@ **/ #include "runtime/quark/include/morse_quark.h" -#undef REAL -#define COMPLEX -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * @@ -120,9 +118,9 @@ void MORSE_TASK_zttqrt(const MORSE_option_t *options, sizeof(int), &m, VALUE, sizeof(int), &n, VALUE, sizeof(int), &ib, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT/**/, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT, sizeof(int), &lda1, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT/**/|LOCALITY, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY, sizeof(int), &lda2, VALUE, sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT, sizeof(int), &ldt, VALUE, diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c index 939933714fb24567eadd5b67cfc81219dfed2469..1ae6b0899667469aad7ca8ba0376430622e50af2 100644 --- a/runtime/quark/codelets/codelet_zunmlq.c +++ b/runtime/quark/codelets/codelet_zunmlq.c @@ -32,7 +32,7 @@ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * @@ -126,20 +126,20 @@ void MORSE_TASK_zunmlq(const MORSE_option_t *options, quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_UNMLQ; QUARK_Insert_Task(opt->quark, CORE_zunmlq_quark, (Quark_Task_Flags*)opt, - sizeof(MORSE_enum), &side, VALUE, - sizeof(MORSE_enum), &trans, VALUE, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT | QUARK_REGION_U, - sizeof(int), &lda, VALUE, - sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), INPUT, - sizeof(int), &ldt, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn), INOUT, - sizeof(int), &ldc, VALUE, - sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &nb, VALUE, + sizeof(MORSE_enum), &side, VALUE, + sizeof(MORSE_enum), &trans, VALUE, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(int), &k, VALUE, + sizeof(int), &ib, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT | QUARK_REGION_U, + sizeof(int), &lda, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), INPUT, + sizeof(int), &ldt, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn), INOUT, + sizeof(int), &ldc, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, + sizeof(int), &nb, VALUE, 0); } diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c index fb9f0f10ddd5493046e570eeb82731e796d28d65..01e9f150b909659302bd941f3cca8d80edc79154 100644 --- a/runtime/quark/codelets/codelet_zunmqr.c +++ b/runtime/quark/codelets/codelet_zunmqr.c @@ -8,7 +8,7 @@ * **/ - /** +/** * * @file codelet_zunmqr.c * @@ -31,7 +31,7 @@ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** * * @ingroup CORE_MORSE_Complex64_t * @@ -126,20 +126,20 @@ void MORSE_TASK_zunmqr(const MORSE_option_t *options, quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_UNMQR; QUARK_Insert_Task(opt->quark, CORE_zunmqr_quark, (Quark_Task_Flags*)opt, - sizeof(MORSE_enum), &side, VALUE, - sizeof(MORSE_enum), &trans, VALUE, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT | QUARK_REGION_L, - sizeof(int), &lda, VALUE, - sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), INPUT, - sizeof(int), &ldt, VALUE, - sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn), INOUT, - sizeof(int), &ldc, VALUE, - sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &nb, VALUE, + sizeof(MORSE_enum), &side, VALUE, + sizeof(MORSE_enum), &trans, VALUE, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(int), &k, VALUE, + sizeof(int), &ib, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT | QUARK_REGION_L, + sizeof(int), &lda, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), INPUT, + sizeof(int), &ldt, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn), INOUT, + sizeof(int), &ldc, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, + sizeof(int), &nb, VALUE, 0); } @@ -166,4 +166,3 @@ void CORE_zunmqr_quark(Quark *quark) CORE_zunmqr(side, trans, m, n, k, ib, A, lda, T, ldt, C, ldc, WORK, ldwork); } - diff --git a/runtime/quark/control/runtime_descriptor.c b/runtime/quark/control/runtime_descriptor.c index 5ce2b288bef23e67d1aba6eea5d0417a618ab5eb..6e4dfa9d20ede9b54e254972890e5d297df4b53e 100644 --- a/runtime/quark/control/runtime_descriptor.c +++ b/runtime/quark/control/runtime_descriptor.c @@ -16,7 +16,7 @@ * MORSE is a software package provided by Univ. of Tennessee, * Univ. of California Berkeley and Univ. of Colorado Denver * - * @version + * @version * @author Vijay Joshi * @author Cedric Castagnede * @date 2012-09-15 diff --git a/runtime/quark/include/core_blas_dag.h b/runtime/quark/include/core_blas_dag.h index 0ab24fcce0a1db092447abb2fabb0f6005f1056d..68358d10f2061f06e2fa83f02b56d28aafcb2248 100644 --- a/runtime/quark/include/core_blas_dag.h +++ b/runtime/quark/include/core_blas_dag.h @@ -37,6 +37,7 @@ #define DAG_CORE_AXPY DAG_SET_PROPERTIES( "AXPY" , "white" ) #define DAG_CORE_BUILD DAG_SET_PROPERTIES( "BUILD" , "white" ) #define DAG_CORE_GEADD DAG_SET_PROPERTIES( "GEADD" , "white" ) +#define DAG_CORE_LASCAL DAG_SET_PROPERTIES( "LASCAL" , "white" ) #define DAG_CORE_GELQT DAG_SET_PROPERTIES( "GELQT" , "green" ) #define DAG_CORE_GEMM DAG_SET_PROPERTIES( "GEMM" , "yellow" ) #define DAG_CORE_GEQRT DAG_SET_PROPERTIES( "GEQRT" , "green" ) diff --git a/runtime/quark/include/quark_zblas.h b/runtime/quark/include/quark_zblas.h index 36d4aaa2c7eadd07b97892f55b00b8f2c61501d8..e5fe96bac53a1ab8c8eea44c212fc75d5309788f 100644 --- a/runtime/quark/include/quark_zblas.h +++ b/runtime/quark/include/quark_zblas.h @@ -69,7 +69,6 @@ void CORE_zher2k_quark(Quark *quark); void CORE_zhegst_quark(Quark *quark); void CORE_zherfb_quark(Quark *quark); void CORE_zhessq_quark(Quark *quark); -void CORE_zlacpy_quark(Quark *quark); void CORE_zlatro_quark(Quark *quark); void CORE_zlange_quark(Quark *quark); void CORE_zlange_max_quark(Quark *quark); diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index cfe20135d19b109390ea8d32c1a905d13ac7e49a..764e55567b562b90594252c8911d6150e22dfe2d 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -27,6 +27,7 @@ # ### +cmake_minimum_required(VERSION 2.8) # check if magma_dgetrf_incpiv_gpu is accessible in libmagma and activate it in chameleon if ( CBLAS_FOUND AND LAPACKE_FOUND AND LAPACK_FOUND AND CUDA_FOUND AND CUDA_CUBLAS_LIBRARIES AND MAGMA_FOUND ) @@ -83,10 +84,10 @@ precisions_rules_py(RUNTIME_COMMON_GENERATED "${ZSRC}" TARGETDIR "control") set(RUNTIME_COMMON - control/runtime_control.c + control/runtime_async.c control/runtime_context.c + control/runtime_control.c control/runtime_descriptor.c - control/runtime_async.c control/runtime_options.c control/runtime_profiling.c control/runtime_workspace.c @@ -127,6 +128,7 @@ set(ZSRC # LAPACK ################## codelets/codelet_zgeadd.c + codelets/codelet_zlascal.c codelets/codelet_zgelqt.c codelets/codelet_zgeqrt.c codelets/codelet_zgessm.c @@ -171,7 +173,6 @@ set(ZSRC ################## codelets/codelet_zbuild.c ) -list(REMOVE_DUPLICATES ZSRC) precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}" PRECISIONS "${CHAMELEON_PRECISION}" diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index 010334af9e332f9e0072709143c9e2bb5715f886..fa66cff4d9f3be011bfbe7e7a51f6c29ebdde7e4 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -31,6 +31,7 @@ CHAMELEON_CL_CB(zasum, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); CHAMELEON_CL_CB(zaxpy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[1]), 0, M); CHAMELEON_CL_CB(zgeadd, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); +CHAMELEON_CL_CB(zlascal, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N); CHAMELEON_CL_CB(zgelqt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (4./3.)*M*N*K); CHAMELEON_CL_CB(zgemm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), starpu_matrix_get_ny(task->handles[0]), 2. *M*N*K); /* If A^t, computation is wrong */ CHAMELEON_CL_CB(zgeqrt, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, (4./3.)*M*M*N); diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c new file mode 100644 index 0000000000000000000000000000000000000000..9a2fc682195bd31af7de870004d650c8071680f6 --- /dev/null +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -0,0 +1,109 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_zlascal.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Dalal Sukkari + * @date 2010-11-15 + * @precisions normal z -> c d s + * + **/ +#include "runtime/starpu/include/morse_starpu.h" +#include "runtime/starpu/include/runtime_codelet_z.h" + +/** + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_zlascal adds to matrices together. + * + * A <- alpha * A + * + ******************************************************************************* + * + * @param[in] M + * Number of rows of the matrices A and B. + * + * @param[in] N + * Number of columns of the matrices A and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,M) + * + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ + +void MORSE_TASK_zlascal(const MORSE_option_t *options, + MORSE_enum uplo, + int m, int n, int nb, + MORSE_Complex64_t alpha, + const MORSE_desc_t *A, int Am, int An, int lda) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zlascal; + void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL; + + if ( morse_desc_islocal( A, Am, An )) + { + starpu_insert_task( + codelet, + STARPU_VALUE, &uplo, sizeof(MORSE_enum), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(MORSE_Complex64_t), + STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, + 0); + } +} + + +static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) +{ + MORSE_enum uplo; + int M; + int N; + MORSE_Complex64_t alpha; + MORSE_Complex64_t *A; + int LDA; + + A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); + CORE_zlascal(uplo, M, N, alpha, A, LDA); + return; +} + +/* + * Codelet definition + */ +CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 33433cf0224292a78b110563b3222400846693e6..7af0ac2e49860997f9a7c1d190b4c2765ace7bd0 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -88,6 +88,7 @@ ZCODELETS_HEADER(unmqr) * Auxiliary functions */ ZCODELETS_HEADER(geadd) +ZCODELETS_HEADER(lascal) ZCODELETS_HEADER(tradd) ZCODELETS_HEADER(lacpy) ZCODELETS_HEADER(lange) diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 6c50399235d7087513f857f397a91cdf10b9357f..ccb9f5b675e919c9059a6fcc3ef343d671d045fd 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -26,7 +26,7 @@ foreach(cat ${TEST_CATEGORIES}) add_test(test_${cat}_${prec}syrk ./${prec}${TEST_CMD_${cat}} SYRK 1.0 -2.0 600 500 650 625) add_test(test_${cat}_${prec}syr2k ./${prec}${TEST_CMD_${cat}} SYR2K 1.0 -2.0 600 500 650 625 700) - if ( prec STREQUAL "c" OR prec STREQUAL "z" ) + if ( ${prec} STREQUAL "c" OR ${prec} STREQUAL "z" ) add_test(test_${cat}_${prec}hemm ./${prec}${TEST_CMD_${cat}} HEMM 1.0 -2.0 600 500 650 625 600) add_test(test_${cat}_${prec}herk ./${prec}${TEST_CMD_${cat}} HERK 1.0 -2.0 600 500 650 625) add_test(test_${cat}_${prec}her2k ./${prec}${TEST_CMD_${cat}} HER2K 1.0 -2.0 600 500 650 625 700)