diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index c88600e60ed70487ad602cb25459a062cdaa4d0f..35c94da450e2055f327429449a90bbd1455a9769 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -65,7 +65,9 @@ set(ZSRC pztrmm.c pztrsm.c pztrsmpl.c + pztradd.c ### + zgeadd.c zgemm.c zhemm.c zher2k.c @@ -73,13 +75,13 @@ set(ZSRC zsymm.c zsyr2k.c zsyrk.c + ztradd.c ztrmm.c ztrsm.c ztrsmpl.c ################## # LAPACK ################## - pzgeadd.c pzgelqf.c pzgelqfrh.c pzgeqrf.c diff --git a/compute/pzgeadd.c b/compute/pzgeadd.c deleted file mode 100644 index 458643db06e8524844eb1bf2237455b8c341ec67..0000000000000000000000000000000000000000 --- a/compute/pzgeadd.c +++ /dev/null @@ -1,71 +0,0 @@ -/** - * - * @copyright (c) 2009-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. - * All rights reserved. - * @copyright (c) 2012-2014 Inria. All rights reserved. - * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. - * - **/ - -/** - * - * @file pzgeadd.c - * - * MORSE auxiliary routines - * MORSE is a software package provided by Univ. of Tennessee, - * Univ. of California Berkeley and Univ. of Colorado Denver - * - * @version 2.5.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for MORSE 1.0.0 - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> s d c - * - **/ -#include "control/common.h" - -#define A(m,n) A, m, n -#define B(m,n) B, m, n -/***************************************************************************//** - * - **/ - -/***************************************************************************//** - * - **/ -void morse_pzgeadd(MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, - MORSE_sequence_t *sequence, MORSE_request_t *request) -{ - MORSE_context_t *morse; - MORSE_option_t options; - - int X, Y; - int m, n; - int ldam, ldbm; - - morse = morse_context_self(); - if (sequence->status != MORSE_SUCCESS) - return; - RUNTIME_options_init(&options, morse, sequence, request); - - for (m = 0; m < A->mt; m++) { - X = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - ldbm = BLKLDD(B, m); - - for (n = 0; n < A->nt; n++) { - Y = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_zgeadd( - &options, - X, Y, - alpha, A(m, n), ldam, - B(m, n), ldbm); - } - } - RUNTIME_options_finalize(&options, morse); - MORSE_TASK_dataflush_all(); -} diff --git a/compute/pzlange.c b/compute/pzlange.c index 60207614a02945da408d00007e9bcfaad0f11957..f56bb98de5aab67bdfccdf2becaddb0f14645ace 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -114,9 +114,9 @@ void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, for(m = 0; m < A->mt; m++) { MORSE_TASK_dgeadd( &options, - 1, tempkn, 1.0, - VECNORMS_STEP1(m, n), 1, - VECNORMS_STEP2(0, n), 1); + MorseNoTrans, 1, tempkn, A->mb, + 1.0, VECNORMS_STEP1(m, n), 1, + 1.0, VECNORMS_STEP2(0, n), 1); } /* @@ -215,18 +215,18 @@ void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, for(n = A->myrank % A->q + A->q; n < A->nt; n+=A->q) { MORSE_TASK_dgeadd( &options, - tempkm, 1, 1.0, - VECNORMS_STEP1(m, n), tempkm, - VECNORMS_STEP1(m, A->myrank % A->q), tempkm); + MorseNoTrans, tempkm, 1, A->mb, + 1.0, VECNORMS_STEP1(m, n), tempkm, + 1.0, VECNORMS_STEP1(m, A->myrank % A->q), tempkm); } /* compute vector sums between tiles in rows between ranks */ for(n = 0; n < A->q; n++) { MORSE_TASK_dgeadd( &options, - tempkm, 1, 1.0, - VECNORMS_STEP1(m, n), tempkm, - VECNORMS_STEP2(m, 0), tempkm); + MorseNoTrans, tempkm, 1, A->mb, + 1.0, VECNORMS_STEP1(m, n), tempkm, + 1.0, VECNORMS_STEP2(m, 0), tempkm); } /* diff --git a/compute/pzlanhe.c b/compute/pzlanhe.c index 5227aca892cd7490d25f7bff515c63c926b8d4e5..773ccc6d79a43a30289e6e3f5300f3ee69952310 100644 --- a/compute/pzlanhe.c +++ b/compute/pzlanhe.c @@ -160,9 +160,9 @@ void morse_pzlanhe(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re for(n = 0; n < A->nt; n++) { MORSE_TASK_dgeadd( &options, - tempkm, 1, 1.0, - VECNORMS_STEP1(m, n), tempkm, - VECNORMS_STEP2(m, 0), tempkm); + MorseNoTrans, tempkm, 1, A->mb, + 1.0, VECNORMS_STEP1(m, n), tempkm, + 1.0, VECNORMS_STEP2(m, 0), tempkm); } /* * Compute max norm of each segment of the final vector in the diff --git a/compute/pzlansy.c b/compute/pzlansy.c index 3af87351109127bc49535faf7fbe1a15edbbd618..3044c44022dfd49b3258c71b65f4290fd463d2f6 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -160,9 +160,9 @@ void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re for(n = 0; n < A->nt; n++) { MORSE_TASK_dgeadd( &options, - tempkm, 1, 1.0, - VECNORMS_STEP1(m, n), tempkm, - VECNORMS_STEP2(m, 0), tempkm); + MorseNoTrans, tempkm, 1, A->mb, + 1.0, VECNORMS_STEP1(m, n), tempkm, + 1.0, VECNORMS_STEP2(m, 0), tempkm); } /* * Compute max norm of each segment of the final vector in the diff --git a/compute/pzlantr.c b/compute/pzlantr.c index f330c896c2c27ae43169a007cde580b7219893ea..89a72def273415fb77e9bd224cdc014eb6c259fa 100644 --- a/compute/pzlantr.c +++ b/compute/pzlantr.c @@ -128,9 +128,9 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; MORSE_TASK_dgeadd( &options, - 1, tempkn, 1.0, - VECNORMS_STEP1(m, n), 1, - VECNORMS_STEP2(0, n), 1); + MorseNoTrans, 1, tempkn, A->mb, + 1.0, VECNORMS_STEP1(m, n), 1, + 1.0, VECNORMS_STEP2(0, n), 1); } } } @@ -178,9 +178,9 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, for(m = n; m < A->mt; m++) { MORSE_TASK_dgeadd( &options, - 1, tempkn, 1.0, - VECNORMS_STEP1(m, n), 1, - VECNORMS_STEP2(0, n), 1); + MorseNoTrans, 1, tempkn, A->mb, + 1.0, VECNORMS_STEP1(m, n), 1, + 1.0, VECNORMS_STEP2(0, n), 1); } } } @@ -292,9 +292,9 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, for(n = m; n < A->nt; n++) { MORSE_TASK_dgeadd( &options, - tempkm, 1, 1.0, - VECNORMS_STEP1(m, n), tempkm, - VECNORMS_STEP2(m, 0), tempkm); + MorseNoTrans, tempkm, 1, A->mb, + 1.0, VECNORMS_STEP1(m, n), tempkm, + 1.0, VECNORMS_STEP2(m, 0), tempkm); } } @@ -347,9 +347,9 @@ void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; MORSE_TASK_dgeadd( &options, - tempkm, 1, 1.0, - VECNORMS_STEP1(m, n), tempkm, - VECNORMS_STEP2(m, 0), tempkm); + MorseNoTrans, tempkm, 1, A->mb, + 1.0, VECNORMS_STEP1(m, n), tempkm, + 1.0, VECNORMS_STEP2(m, 0), tempkm); } } } diff --git a/compute/pztradd.c b/compute/pztradd.c new file mode 100644 index 0000000000000000000000000000000000000000..6e80ab3160e9bf7e1b82dd701844e66389208f7f --- /dev/null +++ b/compute/pztradd.c @@ -0,0 +1,196 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pztradd.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.7.1 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Emmanuel Agullo + * @author Mathieu Faverge + * @date 2011-11-03 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m, n) A, m, n +#define B(m, n) B, m, n + +/***************************************************************************//** + * Parallel tile matrix-matrix multiplication - dynamic scheduling + **/ +void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, + MORSE_Complex64_t alpha, MORSE_desc_t *A, + MORSE_Complex64_t beta, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + + int tempmm, tempnn, tempmn, tempnm; + int m, n; + int ldam, ldan, ldbm, ldbn; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + switch(uplo){ + case MorseLower: + if (trans == MorseNoTrans) { + for (n = 0; n < min(B->mt,B->nt); n++) { + tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb; + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldan = BLKLDD(A, n); + ldbn = BLKLDD(B, n); + + MORSE_TASK_ztradd( + &options, + uplo, trans, tempnm, tempnn, B->mb, + alpha, A(n, n), ldan, + beta, B(n, n), ldbn); + + for (m = n+1; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + + MORSE_TASK_zgeadd( + &options, + trans, tempmm, tempnn, B->mb, + alpha, A(m, n), ldam, + beta, B(m, n), ldbm); + } + } + } + else { + for (n = 0; n < min(B->mt,B->nt); n++) { + tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb; + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldan = BLKLDD(A, n); + ldbn = BLKLDD(B, n); + + MORSE_TASK_ztradd( + &options, + uplo, trans, tempnm, tempnn, B->mb, + alpha, A(n, n), ldan, + beta, B(n, n), ldbn); + + for (m = n+1; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + ldbm = BLKLDD(B, m); + + MORSE_TASK_zgeadd( + &options, + trans, tempmm, tempnn, B->mb, + alpha, A(n, m), ldan, + beta, B(m, n), ldbm); + } + } + } + break; + case MorseUpper: + if (trans == MorseNoTrans) { + for (m = 0; m < min(B->mt,B->nt); m++) { + tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb; + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + + MORSE_TASK_ztradd( + &options, + uplo, trans, tempmm, tempmn, B->mb, + alpha, A(m, m), ldam, + beta, B(m, m), ldbm); + + for (n = m+1; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + MORSE_TASK_zgeadd( + &options, + trans, tempmm, tempnn, B->mb, + alpha, A(m, n), ldam, + beta, B(m, n), ldbm); + } + } + } + else { + for (m = 0; m < min(B->mt,B->nt); m++) { + tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb; + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + + MORSE_TASK_ztradd( + &options, + uplo, trans, tempmm, tempmn, B->mb, + alpha, A(m, m), ldam, + beta, B(m, m), ldbm); + + for (n = m+1; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldan = BLKLDD(A, n); + + MORSE_TASK_zgeadd( + &options, + trans, tempmm, tempnn, B->mb, + alpha, A(n, m), ldan, + beta, B(m, n), ldbm); + } + } + } + break; + case MorseUpperLower: + default: + if (trans == MorseNoTrans) { + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + MORSE_TASK_zgeadd( + &options, + trans, tempmm, tempnn, B->mb, + alpha, A(m, n), ldam, + beta, B(m, n), ldbm); + } + } + } + else { + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldan = BLKLDD(A, n); + + MORSE_TASK_zgeadd( + &options, + trans, tempmm, tempnn, B->mb, + alpha, A(n, m), ldan, + beta, B(m, n), ldbm); + } + } + } + } +} diff --git a/compute/zgeadd.c b/compute/zgeadd.c new file mode 100644 index 0000000000000000000000000000000000000000..ef87bb7728f2041db5ec12599603a591a83ac1ef --- /dev/null +++ b/compute/zgeadd.c @@ -0,0 +1,370 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgeadd.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Mathieu Faverge + * @date 2011-11-03 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgeadd - Performs a matrix addition similarly to the pzgeadd() + * function from the PBLAS library: + * + * \f[ C = \alpha op( A ) + \beta B \f], + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ) + * + * alpha and beta are scalars, and A, and B are matrices, with op( A ) and B + * two m by n matrices. + * + ******************************************************************************* + * + * @param[in] trans + * Specifies whether the matrix A is transposed, not transposed or + * conjugate transposed: + * = MorseNoTrans: A is not transposed; + * = MorseTrans: A is transposed; + * = MorseConjTrans: A is conjugate transposed. + * + * @param[in] M + * M specifies the number of rows of the matrix op( A ) and of the matrix B. M >= 0. + * + * @param[in] N + * N specifies the number of columns of the matrix op( A ) and of the matrix B. N >= 0. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in] A + * A is a LDA-by-ka matrix, where ka is N when trans = MorseNoTrans, + * and is M otherwise. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,K), where K is M + * when trans = MorseNoTrans, and is N when otherwise. + * + * @param[in] beta + * beta specifies the scalar beta + * + * @param[in,out] B + * B is a LDB-by-N matrix. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgeadd_Tile + * @sa MORSE_cgeadd + * @sa MORSE_dgeadd + * @sa MORSE_sgeadd + * + ******************************************************************************/ +int MORSE_zgeadd(MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB) +{ + int NB; + int Am, An; + int status; + MORSE_desc_t descA, descB; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeadd", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if ((trans != MorseNoTrans) && (trans != MorseTrans) && (trans != MorseConjTrans)) { + morse_error("MORSE_zgeadd", "illegal value of trans"); + return -1; + } + if ( trans == MorseNoTrans ) { + Am = M; An = N; + } else { + Am = N; An = M; + } + if (M < 0) { + morse_error("MORSE_zgeadd", "illegal value of M"); + return -2; + } + if (N < 0) { + morse_error("MORSE_zgeadd", "illegal value of N"); + return -3; + } + if (LDA < max(1, Am)) { + morse_error("MORSE_zgeadd", "illegal value of LDA"); + return -6; + } + if (LDB < max(1, M)) { + morse_error("MORSE_zgeadd", "illegal value of LDB"); + return -9; + } + + /* Quick return */ + if (M == 0 || N == 0 || + ((alpha == (MORSE_Complex64_t)0.0) && beta == (MORSE_Complex64_t)1.0)) + return MORSE_SUCCESS; + + /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGEMM, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgeadd", "morse_tune() failed"); + return status; + } + + /* Set MT & NT & KT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */ + morse_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); + /* } else { */ + /* morse_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, */ + /* sequence, &request); */ + /* morse_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, */ + /* sequence, &request); */ + /* } */ + + /* Call the tile interface */ + MORSE_zgeadd_Tile_Async( + trans, alpha, &descA, beta, &descB, sequence, &request); + + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */ + morse_zooptile2lap( descB, B, NB, NB, LDB, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); + /* } else { */ + /* morse_ziptile2lap( descA, A, NB, NB, LDA, An, sequence, &request); */ + /* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request); */ + /* morse_dynamic_sync(); */ + /* } */ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgeadd_Tile - Performs a matrix addition similarly to the pzgeadd() + * function from the PBLAS library. + * Tile equivalent of MORSE_zgeadd(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] trans + * Specifies whether the matrix A is transposed, not transposed or + * conjugate transposed: + * = MorseNoTrans: A is not transposed; + * = MorseTrans: A is transposed; + * = MorseConjTrans: A is conjugate transposed. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in] A + * A is a LDA-by-ka matrix, where ka is N when trans = MorseNoTrans, + * and is M otherwise. + * + * @param[in] beta + * beta specifies the scalar beta + * + * @param[in,out] B + * B is a LDB-by-N matrix. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgeadd + * @sa MORSE_zgeadd_Tile_Async + * @sa MORSE_cgeadd_Tile + * @sa MORSE_dgeadd_Tile + * @sa MORSE_sgeadd_Tile + * + ******************************************************************************/ +int MORSE_zgeadd_Tile(MORSE_enum trans, + MORSE_Complex64_t alpha, MORSE_desc_t *A, + MORSE_Complex64_t beta, MORSE_desc_t *B) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeadd_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgeadd_Tile_Async(trans, alpha, A, beta, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgeadd_Tile_Async - Performs a matrix addition similarly to the + * pzgeadd() function from the PBLAS library. + * Non-blocking equivalent of MORSE_zgeadd_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgeadd + * @sa MORSE_zgeadd_Tile + * @sa MORSE_cgeadd_Tile_Async + * @sa MORSE_dgeadd_Tile_Async + * @sa MORSE_sgeadd_Tile_Async + * + ******************************************************************************/ +int MORSE_zgeadd_Tile_Async(MORSE_enum trans, + MORSE_Complex64_t alpha, MORSE_desc_t *A, + MORSE_Complex64_t beta, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + int M, N; + int Am, An, Ai, Aj, Amb, Anb; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeadd_Tile_Async", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgeadd_Tile_Async", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgeadd_Tile_Async", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgeadd_Tile_Async", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_zgeadd_Tile_Async", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if ((trans != MorseNoTrans) && (trans != MorseTrans) && (trans != MorseConjTrans)) { + morse_error("MORSE_zgeadd_Tile_Async", "illegal value of trans"); + return morse_request_fail(sequence, request, -1); + } + + if ( trans == MorseNoTrans ) { + Am = A->m; + An = A->n; + Amb = A->mb; + Anb = A->nb; + Ai = A->i; + Aj = A->j; + } else { + Am = A->n; + An = A->m; + Amb = A->nb; + Anb = A->mb; + Ai = A->j; + Aj = A->i; + } + + if ( (Amb != B->mb) || (Anb != B->nb) ) { + morse_error("MORSE_zgeadd_Tile_Async", "tile sizes have to match"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ( (Am != B->m) || (An != B->n) ) { + morse_error("MORSE_zgeadd_Tile_Async", "sizes of matrices have to match"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ( (Ai != B->i) || (Aj != B->j) ) { + morse_error("MORSE_zgeadd_Tile_Async", "start indexes have to match"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + + M = B->m; + N = B->n; + + /* Quick return */ + if (M == 0 || N == 0 || + ((alpha == (MORSE_Complex64_t)0.0) && beta == (MORSE_Complex64_t)1.0)) + return MORSE_SUCCESS; + + morse_pztradd(MorseUpperLower, trans, alpha, A, beta, B, sequence, request); + + return MORSE_SUCCESS; +} diff --git a/compute/zgemm.c b/compute/zgemm.c index 8fc3e48377ff796c540c6c9cc7e06309e34e039e..bc0be5dcc6540a947fad4f95a0f5a4c3c535b150 100644 --- a/compute/zgemm.c +++ b/compute/zgemm.c @@ -157,12 +157,12 @@ int MORSE_zgemm(MORSE_enum transA, MORSE_enum transB, int M, int N, int K, morse_error("MORSE_zgemm", "illegal value of transB"); return -2; } - if ( transA == MorseNoTrans ) { + if ( transA == MorseNoTrans ) { Am = M; An = K; } else { Am = K; An = M; } - if ( transB == MorseNoTrans ) { + if ( transB == MorseNoTrans ) { Bm = K; Bn = N; } else { Bm = N; Bn = K; diff --git a/compute/ztradd.c b/compute/ztradd.c new file mode 100644 index 0000000000000000000000000000000000000000..5e8855189034c8c72685e1aaccc8701d361cdef0 --- /dev/null +++ b/compute/ztradd.c @@ -0,0 +1,386 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file ztradd.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Mathieu Faverge + * @date 2011-11-03 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_ztradd - Performs a matrix addition similarly to the pztradd() + * function from the PBLAS library: + * + * \f[ C = \alpha op( A ) + \beta B \f], + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ) + * + * alpha and beta are scalars, and A, and B are two trapezoidal matrices, with + * op( A ) and B two m by n matrices. + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A and B matrices: + * = MorseUpperLower: A and B are general matrices. + * = MorseUpper: op(A) and B are upper trapezoidal matrices. + * = MorseLower: op(A) and B are lower trapezoidal matrices. + * + * @param[in] trans + * Specifies whether the matrix A is transposed, not transposed or + * conjugate transposed: + * = MorseNoTrans: A is not transposed; + * = MorseTrans: A is transposed; + * = MorseConjTrans: A is conjugate transposed. + * + * @param[in] M + * M specifies the number of rows of the matrix op( A ) and of the matrix B. M >= 0. + * + * @param[in] N + * N specifies the number of columns of the matrix op( A ) and of the matrix B. N >= 0. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in] A + * A is a LDA-by-ka matrix, where ka is N when trans = MorseNoTrans, + * and is M otherwise. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,K), where K is M + * when trans = MorseNoTrans, and is N when otherwise. + * + * @param[in] beta + * beta specifies the scalar beta + * + * @param[in,out] B + * B is a LDB-by-N matrix. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_ztradd_Tile + * @sa MORSE_ctradd + * @sa MORSE_dtradd + * @sa MORSE_stradd + * + ******************************************************************************/ +int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB) +{ + int NB; + int Am, An; + int status; + MORSE_desc_t descA, descB; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztradd", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if ((uplo != MorseUpperLower) && (uplo != MorseUpper) && (uplo != MorseLower)) { + morse_error("MORSE_ztradd", "illegal value of uplo"); + return -1; + } + if ((trans != MorseNoTrans) && (trans != MorseTrans) && (trans != MorseConjTrans)) { + morse_error("MORSE_ztradd", "illegal value of trans"); + return -2; + } + if ( trans == MorseNoTrans ) { + Am = M; An = N; + } else { + Am = N; An = M; + } + if (M < 0) { + morse_error("MORSE_ztradd", "illegal value of M"); + return -3; + } + if (N < 0) { + morse_error("MORSE_ztradd", "illegal value of N"); + return -4; + } + if (LDA < max(1, Am)) { + morse_error("MORSE_ztradd", "illegal value of LDA"); + return -7; + } + if (LDB < max(1, M)) { + morse_error("MORSE_ztradd", "illegal value of LDB"); + return -10; + } + + /* Quick return */ + if (M == 0 || N == 0 || + ((alpha == (MORSE_Complex64_t)0.0) && beta == (MORSE_Complex64_t)1.0)) + return MORSE_SUCCESS; + + /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGEMM, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_ztradd", "morse_tune() failed"); + return status; + } + + /* Set MT & NT & KT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */ + morse_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); + /* } else { */ + /* morse_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, */ + /* sequence, &request); */ + /* morse_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, */ + /* sequence, &request); */ + /* } */ + + /* Call the tile interface */ + MORSE_ztradd_Tile_Async( + uplo, trans, alpha, &descA, beta, &descB, sequence, &request); + + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */ + morse_zooptile2lap( descB, B, NB, NB, LDB, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); + /* } else { */ + /* morse_ziptile2lap( descA, A, NB, NB, LDA, An, sequence, &request); */ + /* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request); */ + /* morse_dynamic_sync(); */ + /* } */ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_ztradd_Tile - Performs a matrix addition similarly to the pztradd() + * function from the PBLAS library. + * Tile equivalent of MORSE_ztradd(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A and B matrices: + * = MorseUpperLower: A and B are general matrices. + * = MorseUpper: op(A) and B are upper trapezoidal matrices. + * = MorseLower: op(A) and B are lower trapezoidal matrices. + * + * @param[in] trans + * Specifies whether the matrix A is transposed, not transposed or + * conjugate transposed: + * = MorseNoTrans: A is not transposed; + * = MorseTrans: A is transposed; + * = MorseConjTrans: A is conjugate transposed. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in] A + * A is a LDA-by-ka matrix, where ka is N when trans = MorseNoTrans, + * and is M otherwise. + * + * @param[in] beta + * beta specifies the scalar beta + * + * @param[in,out] B + * B is a LDB-by-N matrix. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_ztradd + * @sa MORSE_ztradd_Tile_Async + * @sa MORSE_ctradd_Tile + * @sa MORSE_dtradd_Tile + * @sa MORSE_stradd_Tile + * + ******************************************************************************/ +int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, + MORSE_Complex64_t alpha, MORSE_desc_t *A, + MORSE_Complex64_t beta, MORSE_desc_t *B) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztradd_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_ztradd_Tile_Async(uplo, trans, alpha, A, beta, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_ztradd_Tile_Async - Performs a matrix addition similarly to the + * pztradd() function from the PBLAS library. + * Non-blocking equivalent of MORSE_ztradd_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_ztradd + * @sa MORSE_ztradd_Tile + * @sa MORSE_ctradd_Tile_Async + * @sa MORSE_dtradd_Tile_Async + * @sa MORSE_stradd_Tile_Async + * + ******************************************************************************/ +int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, + MORSE_Complex64_t alpha, MORSE_desc_t *A, + MORSE_Complex64_t beta, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + int M, N; + int Am, An, Ai, Aj, Amb, Anb; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztradd_Tile_Async", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_ztradd_Tile_Async", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_ztradd_Tile_Async", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_ztradd_Tile_Async", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_ztradd_Tile_Async", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if ((trans != MorseNoTrans) && (trans != MorseTrans) && (trans != MorseConjTrans)) { + morse_error("MORSE_ztradd_Tile_Async", "illegal value of trans"); + return morse_request_fail(sequence, request, -1); + } + + if ( trans == MorseNoTrans ) { + Am = A->m; + An = A->n; + Amb = A->mb; + Anb = A->nb; + Ai = A->i; + Aj = A->j; + } else { + Am = A->n; + An = A->m; + Amb = A->nb; + Anb = A->mb; + Ai = A->j; + Aj = A->i; + } + + if ( (Amb != B->mb) || (Anb != B->nb) ) { + morse_error("MORSE_ztradd_Tile_Async", "tile sizes have to match"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ( (Am != B->m) || (An != B->n) ) { + morse_error("MORSE_ztradd_Tile_Async", "sizes of matrices have to match"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ( (Ai != B->i) || (Aj != B->j) ) { + morse_error("MORSE_ztradd_Tile_Async", "start indexes have to match"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + + M = B->m; + N = B->n; + + /* Quick return */ + if (M == 0 || N == 0 || + ((alpha == (MORSE_Complex64_t)0.0) && beta == (MORSE_Complex64_t)1.0)) + return MORSE_SUCCESS; + + morse_pztradd(uplo, trans, alpha, A, beta, B, sequence, request); + + return MORSE_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index 1128ea3c9c6a83544bbba181268d9fbd42e789e8..3d5cf4ae049879e69809060216fade5170232a41 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -84,7 +84,6 @@ int morse_zshift(MORSE_context_t *morse, int m, int n, MORSE_Complex64_t *A, /***************************************************************************//** * Declarations of parallel functions (dynamic scheduling) - alphabetical order **/ -void morse_pzgeadd(MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbarrier_tl2pnl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbarrier_pnl2tl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbarrier_tl2row(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); @@ -134,6 +133,7 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrsmpl(MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *L, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index dc0f6f519ff23ee14ad0dd83858c74d389bf4982..96f88014170f292f3456f0a724752776ad4c0465 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -46,7 +46,7 @@ set(ZSRC core_zhemm.c core_zher2k.c core_zherk.c - core_zhessq.c + core_zhessq.c core_zlacpy.c core_zlag2c.c core_zlange.c @@ -69,6 +69,7 @@ set(ZSRC core_zsyrk.c core_zsyssq.c core_zsytf2_nopiv.c + core_ztradd.c core_ztrasm.c core_ztrmm.c core_ztrsm.c diff --git a/coreblas/compute/core_zgeadd.c b/coreblas/compute/core_zgeadd.c index 138ffe59dac61eb3922f1a4ea420badb5ec1f0c7..f7714d50ca9006028575daad5abb4f9d2df7fc84 100644 --- a/coreblas/compute/core_zgeadd.c +++ b/coreblas/compute/core_zgeadd.c @@ -21,40 +21,55 @@ * from Plasma 2.5.0 for MORSE 1.0.0 * @author Mathieu Faverge * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 + * @date 2015-11-03 * @precisions normal z -> c d s * **/ #include "coreblas/include/coreblas.h" -/***************************************************************************//** +/** + ****************************************************************************** * * @ingroup CORE_MORSE_Complex64_t * - * CORE_zgeadd adds to matrices together. + * CORE_zgeadd adds to matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, * - * B <- alpha * A + B + * where op(X) = X, X', or conj(X') * ******************************************************************************* * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = MorseNoTrans: op(A) = A + * = MorseTrans: op(A) = A' + * = MorseConjTrans: op(A) = conj(A') + * * @param[in] M - * Number of rows of the matrices A and B. + * Number of rows of the matrices op(A) and B. * * @param[in] N - * Number of columns of the matrices A and B. + * Number of columns of the matrices op(A) and B. * * @param[in] alpha * Scalar factor of A. * * @param[in] A - * Matrix of size LDA-by-N. + * Matrix of size LDA-by-N, if trans = MorseNoTrans, LDA-by-M + * otherwise. * * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,M) + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = MorseNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. * * @param[in,out] B * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B * * @param[in] LDB * Leading dimension of the array B. LDB >= max(1,M) @@ -66,39 +81,75 @@ * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************/ - -int CORE_zgeadd(int M, int N, MORSE_Complex64_t alpha, +#if defined(MORSE_HAVE_WEAK) +#pragma weak CORE_zgeadd = PCORE_zgeadd +#define CORE_zgeadd PCORE_zgeadd +#endif +int CORE_zgeadd(MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, const MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB) { - int j; + int i, j; - if (M < 0) { - coreblas_error(1, "Illegal value of M"); + if ((trans != MorseNoTrans) && + (trans != MorseTrans) && + (trans != MorseConjTrans)) + { + coreblas_error(1, "illegal value of trans"); return -1; } - if (N < 0) { - coreblas_error(2, "Illegal value of N"); + + if (M < 0) { + coreblas_error(2, "Illegal value of M"); return -2; } - if ( (LDA < max(1,M)) && (M > 0) ) { - coreblas_error(5, "Illegal value of LDA"); - return -5; + if (N < 0) { + coreblas_error(3, "Illegal value of N"); + return -3; + } + if ( ((trans == MorseNoTrans) && (LDA < max(1,M)) && (M > 0)) || + ((trans != MorseNoTrans) && (LDA < max(1,N)) && (N > 0)) ) + { + coreblas_error(6, "Illegal value of LDA"); + return -6; } if ( (LDB < max(1,M)) && (M > 0) ) { - coreblas_error(7, "Illegal value of LDB"); - return -7; + coreblas_error(8, "Illegal value of LDB"); + return -8; } - if (M == LDA && M == LDB) - cblas_zaxpy(M*N, CBLAS_SADDR(alpha), A, 1, B, 1); - else { - for (j = 0; j < N; j++) - cblas_zaxpy(M, CBLAS_SADDR(alpha), A + j*LDA, 1, B + j*LDB, 1); - } + switch( trans ) { +#if defined(PRECISION_z) || defined(PRECISION_c) + case MorseConjTrans: + for (j=0; j<N; j++, A++) { + for(i=0; i<M; i++, B++) { + *B = beta * (*B) + alpha * conj(A[LDA*i]); + } + B += LDB-M; + } + break; +#endif /* defined(PRECISION_z) || defined(PRECISION_c) */ + + case MorseTrans: + for (j=0; j<N; j++, A++) { + for(i=0; i<M; i++, B++) { + *B = beta * (*B) + alpha * A[LDA*i]; + } + B += LDB-M; + } + break; + case MorseNoTrans: + default: + for (j=0; j<N; j++) { + for(i=0; i<M; i++, B++, A++) { + *B = beta * (*B) + alpha * (*A); + } + A += LDA-M; + B += LDB-M; + } + } return MORSE_SUCCESS; } - - - diff --git a/coreblas/compute/core_ztradd.c b/coreblas/compute/core_ztradd.c new file mode 100644 index 0000000000000000000000000000000000000000..6334a874231fabfbec63af57f9107510335e8290 --- /dev/null +++ b/coreblas/compute/core_ztradd.c @@ -0,0 +1,224 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file core_ztradd.c + * + * PLASMA core_blas kernel + * PLASMA is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Mathieu Faverge + * @date 2015-11-03 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/coreblas.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_ztradd adds to matrices together as in PBLAS pztradd. + * + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A and B matrices: + * = MorseUpperLower: A and B are general matrices. + * = MorseUpper: op(A) and B are upper trapezoidal matrices. + * = MorseLower: op(A) and B are lower trapezoidal matrices. + * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = MorseNoTrans: op(A) = A + * = MorseTrans: op(A) = A' + * = MorseConjTrans: op(A) = conj(A') + * + * @param[in] M + * Number of rows of the matrices A and B. + * + * @param[in] N + * Number of columns of the matrices A and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,M) + * + * @param[in] beta + * Scalar factor of B. + * + * @param[in,out] B + * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B + * + * @param[in] LDB + * Leading dimension of the array B. LDB >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ +#if defined(MORSE_HAVE_WEAK) +#pragma weak CORE_ztradd = PCORE_ztradd +#define CORE_ztradd PCORE_ztradd +#define CORE_zgeadd PCORE_zgeadd +int +CORE_zgeadd(MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, + const MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, + MORSE_Complex64_t *B, int LDB); +#endif +int CORE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, + const MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, + MORSE_Complex64_t *B, int LDB) +{ + int i, j; + + if (uplo == MorseUpperLower){ + int rc = CORE_zgeadd( trans, M, N, alpha, A, LDA, beta, B, LDB ); + if (rc != MORSE_SUCCESS) + return rc-1; + else + return rc; + } + + if ((uplo != MorseUpper) && + (uplo != MorseLower)) + { + coreblas_error(1, "illegal value of uplo"); + return -1; + } + + if ((trans != MorseNoTrans) && + (trans != MorseTrans) && + (trans != MorseConjTrans)) + { + coreblas_error(2, "illegal value of trans"); + return -2; + } + + if (M < 0) { + coreblas_error(3, "Illegal value of M"); + return -3; + } + if (N < 0) { + coreblas_error(4, "Illegal value of N"); + return -4; + } + if ( ((trans == MorseNoTrans) && (LDA < max(1,M)) && (M > 0)) || + ((trans != MorseNoTrans) && (LDA < max(1,N)) && (N > 0)) ) + { + coreblas_error(7, "Illegal value of LDA"); + return -7; + } + if ( (LDB < max(1,M)) && (M > 0) ) { + coreblas_error(9, "Illegal value of LDB"); + return -9; + } + + /** + * MorseLower + */ + if (uplo == MorseLower) { + switch( trans ) { +#if defined(PRECISION_z) || defined(PRECISION_c) + case MorseConjTrans: + for (j=0; j<N; j++, A++) { + for(i=j; i<M; i++, B++) { + *B = beta * (*B) + alpha * conj(A[LDA*i]); + } + B += LDB-M+j+1; + } + break; +#endif /* defined(PRECISION_z) || defined(PRECISION_c) */ + + case MorseTrans: + for (j=0; j<N; j++, A++) { + for(i=j; i<M; i++, B++) { + *B = beta * (*B) + alpha * A[LDA*i]; + } + B += LDB-M+j+1; + } + break; + + case MorseNoTrans: + default: + for (j=0; j<N; j++) { + for(i=j; i<M; i++, B++, A++) { + *B = beta * (*B) + alpha * (*A); + } + B += LDB-M+j+1; + A += LDA-M+j+1; + } + } + } + /** + * MorseUpper + */ + else { + switch( trans ) { +#if defined(PRECISION_z) || defined(PRECISION_c) + case MorseConjTrans: + for (j=0; j<N; j++, A++) { + int mm = min( j+1, M ); + for(i=0; i<mm; i++, B++) { + *B = beta * (*B) + alpha * conj(A[LDA*i]); + } + B += LDB-mm; + } + break; +#endif /* defined(PRECISION_z) || defined(PRECISION_c) */ + + case MorseTrans: + for (j=0; j<N; j++, A++) { + int mm = min( j+1, M ); + for(i=0; i<mm; i++, B++) { + *B = beta * (*B) + alpha * (A[LDA*i]); + } + B += LDB-mm; + } + break; + + case MorseNoTrans: + default: + for (j=0; j<N; j++) { + int mm = min( j+1, M ); + for(i=0; i<mm; i++, B++, A++) { + *B = beta * (*B) + alpha * (*A); + } + B += LDB-mm; + A += LDA-mm; + } + } + } + return MORSE_SUCCESS; +} diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h index 5193a39970f70061c62e33c370f9a7797f8cc843..fd21173de2f4b639dd058e353e01271925de3bad 100644 --- a/coreblas/include/coreblas_z.h +++ b/coreblas/include/coreblas_z.h @@ -58,8 +58,10 @@ int CORE_zgblrx(MORSE_enum uplo, int N, int CORE_zaxpy(int M, MORSE_Complex64_t alpha, const MORSE_Complex64_t *A, int incA, MORSE_Complex64_t *B, int incB); -int CORE_zgeadd(int M, int N, MORSE_Complex64_t alpha, +int CORE_zgeadd(MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, const MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB); int CORE_zgelqt(int M, int N, int IB, MORSE_Complex64_t *A, int LDA, @@ -273,6 +275,11 @@ void CORE_zswpab(int i, int n1, int n2, MORSE_Complex64_t *A, MORSE_Complex64_t *work); int CORE_zswptr_ontile(MORSE_desc_t descA, int i1, int i2, const int *ipiv, int inc, const MORSE_Complex64_t *Akk, int ldak); +int CORE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, + const MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, + MORSE_Complex64_t *B, int LDB); void CORE_ztrasm(MORSE_enum storev, MORSE_enum uplo, MORSE_enum diag, int M, int N, const MORSE_Complex64_t *A, int lda, double *work); diff --git a/include/morse_z.h b/include/morse_z.h index 51cb8cb0de51ef66dcec88645daab3afa64e8403..5587fa6fd2011e17f5cac4f68c8050b3775f560a 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -42,6 +42,7 @@ extern "C" { /** **************************************************************************** * Declarations of math functions (LAPACK layout) - alphabetical order **/ +int MORSE_zgeadd(MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB); //int MORSE_zgebrd(int M, int N, MORSE_Complex64_t *A, int LDA, double *D, double *E, MORSE_desc_t *descT); //int MORSE_zgecon(MORSE_enum norm, int N, MORSE_Complex64_t *A, int LDA, double anorm, double *rcond); //int MORSE_zpocon(MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, double anorm, double *rcond); @@ -100,6 +101,7 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64 int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC); int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); +int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB); int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); int MORSE_ztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); int MORSE_ztrsmpl(int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descL, int *IPIV, MORSE_Complex64_t *B, int LDB); @@ -116,6 +118,7 @@ int MORSE_zunmqr(MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_C /** **************************************************************************** * Declarations of math functions (tile layout) - alphabetical order **/ +int MORSE_zgeadd_Tile(MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B); //int MORSE_zgebrd_Tile(MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T); //int MORSE_zgecon_Tile(MORSE_enum norm, MORSE_desc_t *A, double anorm, double *rcond); //int MORSE_zpocon_Tile(MORSE_enum uplo, MORSE_desc_t *A, double anorm, double *rcond); @@ -174,6 +177,7 @@ int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C); int MORSE_zsysv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); +int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B); int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B); int MORSE_ztrsm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B); int MORSE_ztrsmpl_Tile(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_desc_t *B); @@ -187,6 +191,7 @@ int MORSE_zunmqr_Tile(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_ /** **************************************************************************** * Declarations of math functions (tile layout, asynchronous execution) - alphabetical order **/ +int MORSE_zgeadd_Tile_Async(MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); //int MORSE_zgebrd_Tile_Async(MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); //int MORSE_zgecon_Tile_Async(MORSE_enum norm, MORSE_desc_t *A, double anorm, double *rcond, MORSE_sequence_t *sequence, MORSE_request_t *request); //int MORSE_zpocon_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, double anorm, double *rcond, MORSE_sequence_t *sequence, MORSE_request_t *request); @@ -245,6 +250,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, M int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_ztrsm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_ztrsmpl_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/include/runtime_z.h b/include/runtime_z.h index cb7d4c963ac80139aec7b95d0a026e85185bb5fc..2495355c59d1226b57f996c83e280adb70e5ebfa 100644 --- a/include/runtime_z.h +++ b/include/runtime_z.h @@ -47,9 +47,9 @@ void MORSE_TASK_dzasum(MORSE_option_t *options, MORSE_desc_t *A, int Am, int An, int lda, MORSE_desc_t *B, int Bm, int Bn); void MORSE_TASK_zgeadd(MORSE_option_t *options, - int m, int n, MORSE_Complex64_t alpha, - MORSE_desc_t *A, int Am, int An, int lda, - MORSE_desc_t *B, int Bm, int Bn, int ldb); + MORSE_enum trans, int m, int n, int nb, + MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda, + MORSE_Complex64_t beta, MORSE_desc_t *B, int Bm, int Bn, int ldb); void MORSE_TASK_zbrdalg(MORSE_option_t *options, MORSE_enum uplo, int N, int NB, @@ -310,6 +310,10 @@ void MORSE_TASK_ztrdalg(MORSE_option_t *options, MORSE_desc_t *S, int Sm, int Sn, int i, int j, int m, int grsiz, int BAND, int *PCOL, int *ACOL, int *MCOL); +void MORSE_TASK_ztradd(MORSE_option_t *options, + MORSE_enum uplo, MORSE_enum trans, int m, int n, int nb, + MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda, + MORSE_Complex64_t beta, MORSE_desc_t *B, int Bm, int Bn, int ldb); void MORSE_TASK_ztrasm(MORSE_option_t *options, MORSE_enum storev, MORSE_enum uplo, MORSE_enum diag, int M, int N, MORSE_desc_t *A, int Am, int An, int lda, diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt index 25fe19568d4c1f9c3d1dfc4b326c001f76bf57ea..0e94ef5b8ca4c7e7fcf474a632677ea0dfe630c9 100644 --- a/runtime/quark/CMakeLists.txt +++ b/runtime/quark/CMakeLists.txt @@ -131,6 +131,7 @@ set(ZSRC codelets/codelet_zssssm.c codelets/codelet_zsyssq.c codelets/codelet_zsytrf_nopiv.c + codelets/codelet_ztradd.c codelets/codelet_ztrasm.c codelets/codelet_ztrssq.c codelets/codelet_ztrtri.c diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c index 53f2cd8779eff44509ebad7fadbe2f8cb00a07db..3fecba2480264c3d726f3b73a6456adf3e479b7c 100644 --- a/runtime/quark/codelets/codelet_zgeadd.c +++ b/runtime/quark/codelets/codelet_zgeadd.c @@ -28,33 +28,49 @@ **/ #include "runtime/quark/include/morse_quark.h" -/***************************************************************************//** +/** + ****************************************************************************** * * @ingroup CORE_MORSE_Complex64_t * - * CORE_zgeadd adds to matrices together. + * MORSE_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, * - * B <- alpha * A + B + * where op(X) = X, X', or conj(X') * ******************************************************************************* * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = MorseNoTrans: op(A) = A + * = MorseTrans: op(A) = A' + * = MorseConjTrans: op(A) = conj(A') + * * @param[in] M - * Number of rows of the matrices A and B. + * Number of rows of the matrices op(A) and B. * * @param[in] N - * Number of columns of the matrices A and B. + * Number of columns of the matrices op(A) and B. * * @param[in] alpha * Scalar factor of A. * * @param[in] A - * Matrix of size LDA-by-N. + * Matrix of size LDA-by-N, if trans = MorseNoTrans, LDA-by-M + * otherwise. * * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,M) + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = MorseNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. * * @param[in,out] B * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B * * @param[in] LDB * Leading dimension of the array B. LDB >= max(1,M) @@ -66,20 +82,21 @@ * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************/ - void MORSE_TASK_zgeadd(MORSE_option_t *options, - int m, int n, MORSE_Complex64_t alpha, - MORSE_desc_t *A, int Am, int An, int lda, - MORSE_desc_t *B, int Bm, int Bn, int ldb) + MORSE_enum trans, int m, int n, int nb, + MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda, + MORSE_Complex64_t beta, MORSE_desc_t *B, int Bm, int Bn, int ldb) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_GEADD; QUARK_Insert_Task(opt->quark, CORE_zgeadd_quark, (Quark_Task_Flags*)opt, + sizeof(MORSE_enum), &trans, VALUE, sizeof(int), &m, VALUE, sizeof(int), &n, VALUE, sizeof(MORSE_Complex64_t), &alpha, VALUE, sizeof(MORSE_Complex64_t)*lda*n, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT, sizeof(int), &lda, VALUE, + sizeof(MORSE_Complex64_t), &beta, VALUE, sizeof(MORSE_Complex64_t)*ldb*n, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), INOUT, sizeof(int), &ldb, VALUE, 0); @@ -88,16 +105,18 @@ void MORSE_TASK_zgeadd(MORSE_option_t *options, void CORE_zgeadd_quark(Quark *quark) { + MORSE_enum trans; int M; int N; MORSE_Complex64_t alpha; MORSE_Complex64_t *A; int LDA; + MORSE_Complex64_t beta; MORSE_Complex64_t *B; int LDB; - quark_unpack_args_7(quark, M, N, alpha, A, LDA, B, LDB); - CORE_zgeadd(M, N, alpha, A, LDA, B, LDB); + quark_unpack_args_9(quark, trans, M, N, alpha, A, LDA, beta, B, LDB); + CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); return; } diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c new file mode 100644 index 0000000000000000000000000000000000000000..8694811ae47c22634a670085a6f202d696bd11d4 --- /dev/null +++ b/runtime/quark/codelets/codelet_ztradd.c @@ -0,0 +1,128 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztradd.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Mathieu Faverge + * @date 2011-11-03 + * @precisions normal z -> c d s + * + **/ +#include "runtime/quark/include/morse_quark.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_MORSE_Complex64_t + * + * MORSE_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A and B matrices: + * = MorseUpperLower: A and B are general matrices. + * = MorseUpper: op(A) and B are upper trapezoidal matrices. + * = MorseLower: op(A) and B are lower trapezoidal matrices. + * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = MorseNoTrans: op(A) = A + * = MorseTrans: op(A) = A' + * = MorseConjTrans: op(A) = conj(A') + * + * @param[in] M + * Number of rows of the matrices op(A) and B. + * + * @param[in] N + * Number of columns of the matrices op(A) and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N, if trans = MorseNoTrans, LDA-by-M + * otherwise. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = MorseNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. + * + * @param[in,out] B + * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B + * + * @param[in] LDB + * Leading dimension of the array B. LDB >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ +void MORSE_TASK_ztradd(MORSE_option_t *options, + MORSE_enum uplo, MORSE_enum trans, int m, int n, int nb, + MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda, + MORSE_Complex64_t beta, MORSE_desc_t *B, int Bm, int Bn, int ldb) +{ + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_GEADD; + QUARK_Insert_Task(opt->quark, CORE_ztradd_quark, (Quark_Task_Flags*)opt, + sizeof(MORSE_enum), &uplo, VALUE, + sizeof(MORSE_enum), &trans, VALUE, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(MORSE_Complex64_t), &alpha, VALUE, + sizeof(MORSE_Complex64_t)*lda*n, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT, + sizeof(int), &lda, VALUE, + sizeof(MORSE_Complex64_t), &beta, VALUE, + sizeof(MORSE_Complex64_t)*ldb*n, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), INOUT, + sizeof(int), &ldb, VALUE, + 0); +} + + +void CORE_ztradd_quark(Quark *quark) +{ + MORSE_enum uplo; + MORSE_enum trans; + int M; + int N; + MORSE_Complex64_t alpha; + MORSE_Complex64_t *A; + int LDA; + MORSE_Complex64_t beta; + MORSE_Complex64_t *B; + int LDB; + + quark_unpack_args_9(quark, uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); + CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); + return; +} + diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index fbb9b504d0fe956befcd0b94cbce3990d7b01301..c64907bea11011ad98153f686da2df67b60abd8c 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -151,6 +151,7 @@ set(ZSRC codelets/codelet_zssssm.c codelets/codelet_zsyssq.c codelets/codelet_zsytrf_nopiv.c + codelets/codelet_ztradd.c codelets/codelet_ztrasm.c codelets/codelet_ztrssq.c codelets/codelet_ztrtri.c diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index a6c8c30c66a559072df661ed8a0d2bfa7e5a72ab..c8470b4efbfd01d5d7776731d95c95276653ad3a 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -30,32 +30,48 @@ #include "runtime/starpu/include/runtime_codelet_z.h" /** + ****************************************************************************** * * @ingroup CORE_MORSE_Complex64_t * - * CORE_zgeadd adds to matrices together. + * MORSE_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. * - * B <- alpha * A + B + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') * ******************************************************************************* * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = MorseNoTrans: op(A) = A + * = MorseTrans: op(A) = A' + * = MorseConjTrans: op(A) = conj(A') + * * @param[in] M - * Number of rows of the matrices A and B. + * Number of rows of the matrices op(A) and B. * * @param[in] N - * Number of columns of the matrices A and B. + * Number of columns of the matrices op(A) and B. * * @param[in] alpha * Scalar factor of A. * * @param[in] A - * Matrix of size LDA-by-N. + * Matrix of size LDA-by-N, if trans = MorseNoTrans, LDA-by-M + * otherwise. * * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,M) + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = MorseNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. * * @param[in,out] B * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B * * @param[in] LDB * Leading dimension of the array B. LDB >= max(1,M) @@ -67,11 +83,10 @@ * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************/ - void MORSE_TASK_zgeadd(MORSE_option_t *options, - int m, int n, MORSE_Complex64_t alpha, - MORSE_desc_t *A, int Am, int An, int lda, - MORSE_desc_t *B, int Bm, int Bn, int ldb) + MORSE_enum trans, int m, int n, int nb, + MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda, + MORSE_Complex64_t beta, MORSE_desc_t *B, int Bm, int Bn, int ldb) { struct starpu_codelet *codelet = &cl_zgeadd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; @@ -81,11 +96,13 @@ void MORSE_TASK_zgeadd(MORSE_option_t *options, { starpu_insert_task( codelet, + STARPU_VALUE, &trans, sizeof(MORSE_enum), STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(MORSE_Complex64_t), STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), STARPU_VALUE, &lda, sizeof(int), + STARPU_VALUE, &beta, sizeof(MORSE_Complex64_t), STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, @@ -97,18 +114,20 @@ void MORSE_TASK_zgeadd(MORSE_option_t *options, static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) { + MORSE_enum trans; int M; int N; MORSE_Complex64_t alpha; MORSE_Complex64_t *A; int LDA; + MORSE_Complex64_t beta; MORSE_Complex64_t *B; int LDB; A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &M, &N, &alpha, &LDA, &LDB); - CORE_zgeadd(M, N, alpha, A, LDA, B, LDB); + starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); + CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); return; } diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c new file mode 100644 index 0000000000000000000000000000000000000000..724373c953137bae763ec6c282c13b075d20dffb --- /dev/null +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -0,0 +1,143 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztradd.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Mathieu Faverge + * @date 2011-11-03 + * @precisions normal z -> c d s + * + **/ +#include "runtime/starpu/include/morse_starpu.h" +#include "runtime/starpu/include/runtime_codelet_z.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_MORSE_Complex64_t + * + * MORSE_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A and B matrices: + * = MorseUpperLower: A and B are general matrices. + * = MorseUpper: op(A) and B are upper trapezoidal matrices. + * = MorseLower: op(A) and B are lower trapezoidal matrices. + * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = MorseNoTrans: op(A) = A + * = MorseTrans: op(A) = A' + * = MorseConjTrans: op(A) = conj(A') + * + * @param[in] M + * Number of rows of the matrices op(A) and B. + * + * @param[in] N + * Number of columns of the matrices op(A) and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N, if trans = MorseNoTrans, LDA-by-M + * otherwise. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = MorseNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. + * + * @param[in,out] B + * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B + * + * @param[in] LDB + * Leading dimension of the array B. LDB >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ +void MORSE_TASK_ztradd(MORSE_option_t *options, + MORSE_enum uplo, MORSE_enum trans, int m, int n, int nb, + MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda, + MORSE_Complex64_t beta, MORSE_desc_t *B, int Bm, int Bn, int ldb) +{ + struct starpu_codelet *codelet = &cl_ztradd; + void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; + + if ( morse_desc_islocal( A, Am, An ) || + morse_desc_islocal( B, Bm, Bn ) ) + { + starpu_insert_task( + codelet, + STARPU_VALUE, &uplo, sizeof(MORSE_enum), + STARPU_VALUE, &trans, sizeof(MORSE_enum), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(MORSE_Complex64_t), + STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_VALUE, &beta, sizeof(MORSE_Complex64_t), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, + 0); + } +} + + +static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) +{ + MORSE_enum uplo; + MORSE_enum trans; + int M; + int N; + MORSE_Complex64_t alpha; + MORSE_Complex64_t *A; + int LDA; + MORSE_Complex64_t beta; + MORSE_Complex64_t *B; + int LDB; + + A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); + CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); + return; +} + +/* + * Codelet definition + */ +CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 3d93bc46ceb482e4972ca4d90996730243019a47..d9a1002e261687c64bda55bf3257af252dc706ee 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -88,6 +88,7 @@ ZCODELETS_HEADER(unmqr) * Auxiliary functions */ ZCODELETS_HEADER(geadd) +ZCODELETS_HEADER(tradd) ZCODELETS_HEADER(lacpy) ZCODELETS_HEADER(lange) ZCODELETS_HEADER(lange_max) diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index e8316ef5c723d45fbea88cc5f23f5b59fb28ae9b..ed4358328b7fc30c98e652ea11423d439c0f6775 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -80,6 +80,7 @@ set(ZSRC ################## # OTHERS ################## + testing_zgeadd.c #testing_zgecfi.c #testing_zgesvd.c #testing_zgetmi.c diff --git a/testing/testing_zauxiliary.c b/testing/testing_zauxiliary.c index 33216c930463054d5900b74e99813244ea9d1f1a..503fa34b2540cec94e6aa92674cedcb97df22589 100644 --- a/testing/testing_zauxiliary.c +++ b/testing/testing_zauxiliary.c @@ -219,6 +219,8 @@ int main (int argc, char **argv) info += testing_ztrsm( argc, argv ); } else if ( strcmp(func, "PEMV") == 0 ) { info += testing_zpemv( argc, argv ); + } else if ( strcmp(func, "GEADD") == 0 ) { + info = testing_zgeadd( argc, argv ); /* * Linear system */ diff --git a/testing/testing_zauxiliary.h b/testing/testing_zauxiliary.h index 6cdccee416ef85a184f2f47f53f0ebc77b4a7f05..f566abbfe9a3760997cf4dc9455533503d9c3974 100644 --- a/testing/testing_zauxiliary.h +++ b/testing/testing_zauxiliary.h @@ -88,6 +88,7 @@ int testing_zsyr2k(int argc, char **argv); int testing_ztrmm(int argc, char **argv); int testing_ztrsm(int argc, char **argv); int testing_zpemv(int argc, char **argv); +int testing_zgeadd(int argc, char **argv); int testing_zposv(int argc, char **argv); int testing_zgels(int argc, char **argv); diff --git a/testing/testing_zgeadd.c b/testing/testing_zgeadd.c new file mode 100644 index 0000000000000000000000000000000000000000..84225fe325e4b9f3ed4baf66cb3d7cb9103ae4dc --- /dev/null +++ b/testing/testing_zgeadd.c @@ -0,0 +1,328 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file testing_zgemm.c + * + * MORSE testing routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + **/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> + +#include <morse.h> +#include <coreblas/include/cblas.h> +#include <coreblas/include/lapacke.h> +#include <coreblas/include/coreblas.h> +#include "testing_zauxiliary.h" +#if defined(CHAMELEON_USE_MPI) +#include <mpi.h> +#endif + +#undef REAL +#define COMPLEX + +static int check_tr_solution(MORSE_enum uplo, MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *Bref, MORSE_Complex64_t *Bmorse, int LDB); +static int check_ge_solution(MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *Bref, MORSE_Complex64_t *Bmorse, int LDB); + +int testing_zgeadd(int argc, char **argv) +{ + /* Check for number of arguments*/ + if ( argc != 6 ) { + USAGE("GEADD", "alpha beta M N LDA LDB", + " - alpha : alpha coefficient\n" + " - beta : beta coefficient\n" + " - M : number of rows of matrices A and C\n" + " - N : number of columns of matrices B and C\n" + " - LDA : leading dimension of matrix A\n" + " - LDB : leading dimension of matrix B\n" ); + return -1; + } + + MORSE_Complex64_t alpha = (MORSE_Complex64_t) atol(argv[0]); + MORSE_Complex64_t beta = (MORSE_Complex64_t) atol(argv[1]); + int M = atoi(argv[2]); + int N = atoi(argv[3]); + int LDA = atoi(argv[4]); + int LDB = atoi(argv[5]); + + double eps; + int info_solution; + int t, u; + int LDAxN = LDA*max(M,N); + int LDBxN = LDB*N; + + MORSE_Complex64_t *A = (MORSE_Complex64_t *)malloc(LDAxN*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *B = (MORSE_Complex64_t *)malloc(LDBxN*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Binit = (MORSE_Complex64_t *)malloc(LDBxN*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Bfinal = (MORSE_Complex64_t *)malloc(LDBxN*sizeof(MORSE_Complex64_t)); + + /* Check if unable to allocate memory */ + if ((!A)||(!B)||(!Binit)||(!Bfinal)){ + printf("Out of Memory \n "); + return -2; + } + + eps = LAPACKE_dlamch_work('e'); + + printf("\n"); + printf("------ TESTS FOR MORSE ZGEADD ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrices A and B are randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n", eps); + printf(" Computational tests pass if scaled residuals are less than 10.\n"); + + /*---------------------------------------------------------- + * TESTING ZGEADD + */ + + /* Initialize A, B */ + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B); + +#ifdef COMPLEX + for (t=0; t<3; t++) { +#else + for (t=0; t<2; t++) { +#endif + memcpy( Binit, B, LDBxN*sizeof(MORSE_Complex64_t)); + memcpy( Bfinal, B, LDBxN*sizeof(MORSE_Complex64_t)); + + /* MORSE ZGEADD */ + MORSE_zgeadd(trans[t], M, N, alpha, A, LDA, beta, Bfinal, LDB); + + /* Check the solution */ + info_solution = check_ge_solution(trans[t], M, N, + alpha, A, LDA, + beta, Binit, Bfinal, LDB); + + if (info_solution == 0) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGEADD (%s) ............... PASSED !\n", transstr[t]); + printf("***************************************************\n"); + } + else { + printf("************************************************\n"); + printf(" - TESTING ZGEADD (%s) ... FAILED !\n", transstr[t]); + printf("************************************************\n"); + } + } +#ifdef _UNUSED_ + } +#endif + + /*---------------------------------------------------------- + * TESTING TRADD + */ + + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B); + +#ifdef COMPLEX + for (t=0; t<3; t++) { +#else + for (t=0; t<2; t++) { +#endif + for (u=0; u<2; u++) { + memcpy( Binit, B, LDBxN*sizeof(MORSE_Complex64_t)); + memcpy( Bfinal, B, LDBxN*sizeof(MORSE_Complex64_t)); + + /* MORSE ZGEADD */ + MORSE_ztradd(uplo[u], trans[t], M, N, alpha, A, LDA, beta, Bfinal, LDB); + + /* Check the solution */ + info_solution = check_tr_solution(uplo[u], trans[t], M, N, + alpha, A, LDA, + beta, Binit, Bfinal, LDB); + + if (info_solution == 0) { + printf("***************************************************\n"); + printf(" ---- TESTING ZTRADD (%s, %s) ............... PASSED !\n", uplostr[u], transstr[t]); + printf("***************************************************\n"); + } + else { + printf("************************************************\n"); + printf(" - TESTING ZTRADD (%s, %s) ... FAILED !\n", uplostr[u], transstr[t]); + printf("************************************************\n"); + } + } + } +#ifdef _UNUSED_ + } +#endif + + free(A); free(B); + free(Binit); free(Bfinal); + + return 0; +} + +/*-------------------------------------------------------------- + * Check the solution + */ + +static int check_tr_solution(MORSE_enum uplo, MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *Bref, MORSE_Complex64_t *Bmorse, int LDB) +{ + int info_solution; + double Anorm, Binitnorm, Bmorsenorm, Rnorm, result; + double eps; + MORSE_Complex64_t mzone; + + double *work = (double *)malloc(max(M, N)* sizeof(double)); + int Am, An; + + mzone = -1.0; + + if (trans == MorseNoTrans) { + Am = M; An = N; + } else { + Am = N; An = M; + } + + /* if ( ((trans == MorseNoTrans) && (uplo == MorseLower)) || */ + /* ((trans != MorseNoTrans) && (uplo == MorseUpper)) ) */ + /* { */ + /* Anorm = LAPACKE_zlantr_work(LAPACK_COL_MAJOR, 'I', 'L', 'N', */ + /* Am, An, A, LDA, work); */ + /* } */ + /* else */ + /* { */ + /* Anorm = LAPACKE_zlantr_work(LAPACK_COL_MAJOR, 'I', 'U', 'N', */ + /* Am, An, A, LDA, work); */ + /* } */ + + /* Binitnorm = LAPACKE_zlantr_work(LAPACK_COL_MAJOR, 'I', morse_lapack_const(uplo[u]), 'N', */ + /* M, N, Bref, LDB, work); */ + /* Bmorsenorm = LAPACKE_zlantr_work(LAPACK_COL_MAJOR, 'I', morse_lapack_const(uplo[u]), 'N', */ + /* M, N, Bmorse, LDB, work); */ + + Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', + Am, An, A, LDA, work); + Binitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', + M, N, Bref, LDB, work); + Bmorsenorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', + M, N, Bmorse, LDB, work); + + CORE_ztradd(uplo, trans, M, N, + alpha, A, LDA, + beta, Bref, LDB); + cblas_zaxpy( LDB*N, CBLAS_SADDR(mzone), Bmorse, 1, Bref, 1); + + Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'M', M, N, Bref, LDB, work); + + eps = LAPACKE_dlamch_work('e'); + + printf("Rnorm %e, Anorm %e, Bnorm %e, (alpha A + beta B) norm %e\n", + Rnorm, Anorm, Binitnorm, Bmorsenorm); + + result = Rnorm / (max(Anorm, Binitnorm) * eps); + printf("============\n"); + printf("Checking the norm of the difference against reference ZGEADD \n"); + printf("-- || R||_max/(max(||A||_oo,||B||_oo).eps) = %e \n", + result); + + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + printf("-- The solution is suspicious ! \n"); + info_solution = 1; + } + else { + printf("-- The solution is CORRECT ! \n"); + info_solution= 0 ; + } + + free(work); + + return info_solution; +} + +/*-------------------------------------------------------------- + * Check the solution + */ + +static int check_ge_solution(MORSE_enum trans, int M, int N, + MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t beta, MORSE_Complex64_t *Bref, MORSE_Complex64_t *Bmorse, int LDB) +{ + int info_solution; + double Anorm, Binitnorm, Bmorsenorm, Rnorm, result; + double eps; + MORSE_Complex64_t mzone; + + double *work = (double *)malloc(max(M, N)* sizeof(double)); + int Am, An; + + mzone = -1.0; + + if (trans == MorseNoTrans) { + Am = M; An = N; + } else { + Am = N; An = M; + } + + Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', + Am, An, A, LDA, work); + Binitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', + M, N, Bref, LDB, work); + Bmorsenorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', + M, N, Bmorse, LDB, work); + + CORE_zgeadd(trans, M, N, + alpha, A, LDA, + beta, Bref, LDB); + cblas_zaxpy( LDB*N, CBLAS_SADDR(mzone), Bmorse, 1, Bref, 1); + + Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'M', M, N, Bref, LDB, work); + + eps = LAPACKE_dlamch_work('e'); + + printf("Rnorm %e, Anorm %e, Bnorm %e, (alpha A + beta B) norm %e\n", + Rnorm, Anorm, Binitnorm, Bmorsenorm); + + result = Rnorm / (max(Anorm, Binitnorm) * eps); + printf("============\n"); + printf("Checking the norm of the difference against reference ZGEADD \n"); + printf("-- || R||_max/(max(||A||_oo,||B||_oo).eps) = %e \n", + result); + + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + printf("-- The solution is suspicious ! \n"); + info_solution = 1; + } + else { + printf("-- The solution is CORRECT ! \n"); + info_solution= 0 ; + } + + free(work); + + return info_solution; +} diff --git a/testing/testing_zpotri.c b/testing/testing_zpotri.c index 4ba13000a5d253235ff060aecbe76f5bcb705bd0..ec0c4991ed5856e955733220f0aa88b3ebce3a79 100644 --- a/testing/testing_zpotri.c +++ b/testing/testing_zpotri.c @@ -149,7 +149,7 @@ BLAS_dfpinfo(enum blas_cmach_type cmach) { l = 128; m = -125; } else { - t = 53; + t = 53; l = 1024; m = -1021; } @@ -207,7 +207,7 @@ int testing_zpotri(int argc, char **argv) eps = BLAS_dfpinfo( blas_eps ); uplo = MorseUpper; - + /*------------------------------------------------------------- * TESTING ZPOTRI */ @@ -345,7 +345,7 @@ static int check_inverse(int N, MORSE_Complex64_t *A1, MORSE_Complex64_t *A2, in *(A2+j+i*LDA) = *(A2+i+j*LDA); cblas_zhemm(CblasColMajor, CblasLeft, CblasLower, N, N, CBLAS_SADDR(alpha), A2, LDA, A1, LDA, CBLAS_SADDR(beta), work, N); } - + /* Add the identity matrix to work */ for(i=0; i<N; i++) *(work+i+i*N) = *(work+i+i*N) + zone;