diff --git a/CMakeLists.txt b/CMakeLists.txt index 770b404d86abd6731d769cfbfb0b75adb59f6aff..8dbc5f4b3fda1685968154087aa656d7ea21f88d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -208,6 +208,11 @@ if (CHAMELEON_RUNTIME_SYNC) message("-- ${BoldGreen}CHAMELEON_RUNTIME_SYNC is set to ON, turn it OFF to avoid synchronisation in the tasks submission${ColourReset}") endif() +option(CHAMELEON_KERNELS_TRACE "Enable kernel traces to debug the task execution order" OFF) +if (CHAMELEON_KERNELS_TRACE) + message("-- ${BoldGreen}CHAMELEON_KERNELS_TRACE is set to ON, turn it OFF to get better perfomance${ColourReset}") +endif() + # Options to enable/disable doc, examples, and testings # ----------------------------------------------------- option(CHAMELEON_ENABLE_DOC "Enable documentation build" OFF) diff --git a/cmake_modules/gitlab-ci-initial-cache.cmake b/cmake_modules/gitlab-ci-initial-cache.cmake index 3f2d2bd71bdec0bfb64208e1d09f510713ca7b5c..87cf140a8cf31dfc1fec203440e99057f20f3796 100644 --- a/cmake_modules/gitlab-ci-initial-cache.cmake +++ b/cmake_modules/gitlab-ci-initial-cache.cmake @@ -2,6 +2,8 @@ set(BUILD_SHARED_LIBS "ON" CACHE BOOL "") set(CMAKE_INSTALL_PREFIX "$ENV{PWD}/install" CACHE PATH "") set(CMAKE_VERBOSE_MAKEFILE "ON" CACHE BOOL "") +set(CMAKE_C_FLAGS "-Werror") + option(MORSE_ENABLE_WARNING "Enable warning messages" ON) option(MORSE_ENABLE_COVERAGE "Enable flags for coverage test" ON) diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index db91b1d3b5cfed66eafc42fdcc0e16049fbaea4d..d4b97797a312ddfb6ca28c31ebde693afaba14cd 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -36,6 +36,7 @@ set(CHAMELEON_CONTROL ../control/context.c ../control/control.c ../control/descriptor.c + ../control/descriptor_rec.c ../control/workspace.c ../control/tile.c ../control/chameleon_f77.c diff --git a/control/descriptor.c b/control/descriptor.c index b26b7c7f2143effaa6357f25639e433bb65c97d2..5af45af7fa0ed24516a3d4835c20a385fe320de9 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -26,7 +26,9 @@ * @brief Group descriptor routines exposed to users * */ +#define _GNU_SOURCE 1 #include <stdlib.h> +#include <stdio.h> #include <assert.h> #include <string.h> #include "control/common.h" @@ -70,6 +72,17 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc ) } if ( desc->tiles ) { +#if defined(CHAMELEON_KERNELS_TRACE) + CHAM_tile_t *tile = desc->tiles; + int ii, jj; + for( jj=0; jj<desc->lnt; jj++ ) { + for( ii=0; ii<desc->lmt; ii++, tile++ ) { + if ( tile->name ) { + free( tile->name ); + } + } + } +#endif free( desc->tiles ); } return CHAMELEON_SUCCESS; @@ -91,6 +104,9 @@ void chameleon_desc_init_tiles( CHAM_desc_t *desc ) tile->n = jj == desc->lnt-1 ? desc->ln - jj * desc->nb : desc->nb; tile->mat = (rank == desc->myrank) ? desc->get_blkaddr( desc, ii, jj ) : NULL; tile->ld = desc->get_blkldd( desc, ii ); +#if defined(CHAMELEON_KERNELS_TRACE) + asprintf( &(tile->name), "%s(%d,%d)", desc->name, ii, jj); +#endif } } } @@ -194,38 +210,45 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat, memset( desc, 0, sizeof(CHAM_desc_t) ); + assert( i == 0 ); + assert( j == 0 ); + assert( bsiz == (mb * nb) ); + chamctxt = chameleon_context_self(); if (chamctxt == NULL) { chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON not initialized"); return CHAMELEON_ERR_NOT_INITIALIZED; } - // If one of the function get_* is NULL, we switch back to the default, like in chameleon_desc_init() + /* If one of the function get_* is NULL, we switch back to the default */ desc->get_blktile = chameleon_desc_gettile; desc->get_blkaddr = get_blkaddr ? get_blkaddr : chameleon_getaddr_ccrb; desc->get_blkldd = get_blkldd ? get_blkldd : chameleon_getblkldd_ccrb; desc->get_rankof = get_rankof ? get_rankof : chameleon_getrankof_2d; - // Matrix properties + + /* Matrix properties */ desc->dtyp = dtyp; - // Should be given as parameter to follow get_blkaddr (unused) - desc->styp = ChamCCRB; + /* Should be given as parameter to follow get_blkaddr (unused) */ + desc->styp = (get_blkaddr == chameleon_getaddr_cm ) ? ChamCM : ChamCCRB; desc->mb = mb; desc->nb = nb; - desc->bsiz = bsiz; - // Large matrix parameters - desc->lm = lm; - desc->ln = ln; - // Large matrix derived parameters - desc->lmt = (lm%mb==0) ? (lm/mb) : (lm/mb+1); - desc->lnt = (ln%nb==0) ? (ln/nb) : (ln/nb+1); - // Submatrix parameters - desc->i = i; - desc->j = j; + desc->bsiz = mb * nb; + + /* Matrix parameters */ + desc->i = 0; + desc->j = 0; desc->m = m; desc->n = n; - // Submatrix derived parameters - desc->mt = (m == 0) ? 0 : (i+m-1)/mb - i/mb + 1; - desc->nt = (n == 0) ? 0 : (j+n-1)/nb - j/nb + 1; + + /* Matrix stride parameters */ + desc->lm = m; + desc->ln = n; + + /* Matrix derived parameters */ + desc->mt = chameleon_ceil( m, mb ); + desc->nt = chameleon_ceil( n, nb ); + desc->lmt = desc->mt; + desc->lnt = desc->nt; desc->id = nbdesc; nbdesc++; @@ -233,14 +256,20 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat, desc->myrank = RUNTIME_comm_rank( chamctxt ); - // Grid size + /* Grid size */ desc->p = p; desc->q = q; - // Local dimensions in tiles + /* Local dimensions in tiles */ if ( desc->myrank < (p*q) ) { - desc->llmt = (desc->lmt + p - 1) / p; - desc->llnt = (desc->lnt + q - 1) / q; + int gmt, gnt; + + /* Compute the fictive full number of tiles to derivate the local leading dimension */ + gmt = chameleon_ceil( lm, mb ); + gnt = chameleon_ceil( ln, nb ); + + desc->llmt = chameleon_ceil( gmt, p ); + desc->llnt = chameleon_ceil( gnt, q ); // Local dimensions if ( ((desc->lmt-1) % p) == (desc->myrank / q) ) { @@ -255,8 +284,8 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat, desc->lln = desc->llnt * nb; } - desc->llm1 = (desc->llm/mb); - desc->lln1 = (desc->lln/nb); + desc->llm1 = desc->llm / mb; + desc->lln1 = desc->lln / nb; } else { desc->llmt = 0; desc->llnt = 0; @@ -326,13 +355,13 @@ CHAM_desc_t* chameleon_desc_submatrix( CHAM_desc_t *descA, int i, int j, int m, CHAM_desc_t *descB = malloc(sizeof(CHAM_desc_t)); int mb, nb; - if ( (descA->i + i + m) > descA->lm ) { + if ( (descA->i + i + m) > descA->m ) { chameleon_error("chameleon_desc_submatrix", "The number of rows (i+m) of the submatrix doesn't fit in the parent matrix"); - assert((descA->i + i + m) > descA->lm); + assert((descA->i + i + m) > descA->m); } - if ( (descA->j + j + n) > descA->ln ) { + if ( (descA->j + j + n) > descA->n ) { chameleon_error("chameleon_desc_submatrix", "The number of rows (j+n) of the submatrix doesn't fit in the parent matrix"); - assert((descA->j + j + n) > descA->ln); + assert((descA->j + j + n) > descA->n); } memcpy( descB, descA, sizeof(CHAM_desc_t) ); @@ -825,9 +854,11 @@ CHAM_desc_t *CHAMELEON_Desc_CopyOnZero( const CHAM_desc_t *descin, void *mat ) * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc) +int CHAMELEON_Desc_Destroy(CHAM_desc_t **descptr) { CHAM_context_t *chamctxt; + CHAM_desc_t *desc; + int m, n; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -835,14 +866,34 @@ int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc) return CHAMELEON_ERR_NOT_INITIALIZED; } - if (*desc == NULL) { + if ((descptr == NULL) || (*descptr == NULL)) { chameleon_error("CHAMELEON_Desc_Destroy", "attempting to destroy a NULL descriptor"); return CHAMELEON_ERR_UNALLOCATED; } - chameleon_desc_destroy( *desc ); - free(*desc); - *desc = NULL; + desc = *descptr; + for ( n=0; n<desc->nt; n++ ) { + for ( m=0; m<desc->mt; m++ ) { + CHAM_tile_t *tile; + + tile = desc->get_blktile( desc, m, n ); + + if ( tile->format == CHAMELEON_TILE_DESC ) { + CHAM_desc_t *tiledesc = tile->mat; + + /* Recursive names are allocated internally, we need to free them */ + if ( tiledesc->name ) { + free( (void*)(tiledesc->name) ); + } + CHAMELEON_Desc_Destroy( &tiledesc ); + assert( tiledesc == NULL ); + } + } + } + + chameleon_desc_destroy( desc ); + free(desc); + *descptr = NULL; return CHAMELEON_SUCCESS; } @@ -940,3 +991,48 @@ void CHAMELEON_user_tag_size(int user_tag_width, int user_tag_sep) { RUNTIME_comm_set_tag_sizes( user_tag_width, user_tag_sep ); return; } + +static void +chameleon_desc_print( const CHAM_desc_t *desc, int shift ) +{ + intptr_t base = (intptr_t)desc->mat; + int m, n; + + for ( n=0; n<desc->nt; n++ ) { + for ( m=0; m<desc->mt; m++ ) { + const CHAM_tile_t *tile; + const CHAM_desc_t *tiledesc; + intptr_t ptr; + + tile = desc->get_blktile( desc, m, n ); + tiledesc = tile->mat; + + ptr = ( tile->format == CHAMELEON_TILE_DESC ) ? (intptr_t)(tiledesc->mat) : (intptr_t)(tile->mat); + + fprintf( stdout, "%*s%s(%3d,%3d): %d * %d / ld = %d / offset= %ld\n", + shift, " ", desc->name, m, n, tile->m, tile->n, tile->ld, ptr - base ); + + if ( tile->format == CHAMELEON_TILE_DESC ) { + chameleon_desc_print( tiledesc, shift+2 ); + } + } + } +} + +/** + ***************************************************************************** + * + * @ingroup Descriptor + * + * @brief Print descriptor structure for debug purpose + * + ****************************************************************************** + * + * @param[in] desc + * The input desc for which to describe to print the tile structure + */ +void +CHAMELEON_Desc_Print( const CHAM_desc_t *desc ) +{ + chameleon_desc_print( desc, 2 ); +} diff --git a/control/descriptor_rec.c b/control/descriptor_rec.c new file mode 100644 index 0000000000000000000000000000000000000000..6820b46ed2495d18125eca2ab0f05239665326d8 --- /dev/null +++ b/control/descriptor_rec.c @@ -0,0 +1,102 @@ +/** + * + * @file descriptor_rec.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon descriptors routines + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Gwenole Lucas + * @date 2020-03-03 + * + */ +#define _GNU_SOURCE 1 +#include "control/common.h" +#include <stdlib.h> +#include <stdio.h> +#include "chameleon/runtime.h" + +static int +chameleon_recdesc_create( const char *name, CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, + int *mb, int *nb, + int lm, int ln, int m, int n, int p, int q, + blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd, blkrankof_fct_t get_rankof ) +{ + CHAM_desc_t *desc; + int rc; + + /* Let's make sure we have at least one couple (mb, nb) defined */ + assert( (mb[0] > 0) && (nb[0] > 0) ); + + /* Create the current layer descriptor */ + rc = CHAMELEON_Desc_Create_User( descptr, mat, dtyp, mb[0], nb[0], mb[0] * nb[0], + lm, ln, 0, 0, m, n, 1, 1, + get_blkaddr, get_blkldd, get_rankof ); + desc = *descptr; + desc->name = name; + + if ( rc != CHAMELEON_SUCCESS ) { + return rc; + } + + /* Move to the next tile size to recurse */ + mb++; + nb++; + if ( (mb[0] <= 0) || (nb[0] <= 0) ) { + return CHAMELEON_SUCCESS; + } + + for ( n=0; n<desc->nt; n++ ) { + for ( m=0; m<desc->mt; m++ ) { + CHAM_desc_t *tiledesc; + CHAM_tile_t *tile; + int tempmm, tempnn; + char *subname; + + tile = desc->get_blktile( desc, m, n ); + tempmm = m == desc->mt-1 ? desc->m - m * desc->mb : desc->mb; + tempnn = n == desc->nt-1 ? desc->n - n * desc->nb : desc->nb; + asprintf( &subname, "%s[%d,%d]", name, m, n ); + + chameleon_recdesc_create( subname, &tiledesc, tile->mat, + desc->dtyp, mb, nb, + tile->ld, tempnn, /* Abuse as ln is not used */ + tempmm, tempnn, + 1, 1, /* can recurse only on local data */ + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL); + + tile->format = CHAMELEON_TILE_DESC; + tile->mat = tiledesc; + + if ( rc != CHAMELEON_SUCCESS ) { + return rc; + } + } + } + + return CHAMELEON_SUCCESS; +} + +int +CHAMELEON_Recursive_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, + int *mb, int *nb, int lm, int ln, int m, int n, int p, int q, + blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd, blkrankof_fct_t get_rankof ) +{ + /* + * The first layer must be allocated, otherwise we will give unitialized + * pointers to the lower layers + */ + assert( (mat != CHAMELEON_MAT_ALLOC_TILE) && + (mat != CHAMELEON_MAT_OOC) ); + + return chameleon_recdesc_create( "A", descptr, mat, dtyp, + mb, nb, lm, ln, m, n, p, q, + get_blkaddr, get_blkldd, get_rankof ); +} diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c index 3eb9cffc00d6da2e2e156280be218449791a5883..6c28b72d322c84a18b14cfbd387328d9c48652c7 100644 --- a/coreblas/compute/core_ztile.c +++ b/coreblas/compute/core_ztile.c @@ -28,6 +28,7 @@ TCORE_dlag2z( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B ) { + coreblas_kernel_trace( A, B ); assert( A->format & CHAMELEON_TILE_FULLRANK ); assert( B->format & CHAMELEON_TILE_FULLRANK ); CORE_dlag2z( uplo, M, N, A->mat, A->ld, B->mat, B->ld ); @@ -42,7 +43,8 @@ TCORE_dzasum( cham_store_t storev, const CHAM_tile_t *A, double * work ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_dzasum( storev, uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, work ); } @@ -54,8 +56,9 @@ TCORE_zaxpy( int M, CHAM_tile_t * B, int incB ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zaxpy( M, alpha, CHAM_tile_get_ptr( A ), incA, CHAM_tile_get_ptr( B ), incB ); } @@ -68,6 +71,7 @@ TCORE_zgeadd( cham_trans_t trans, CHAMELEON_Complex64_t beta, CHAM_tile_t * B ) { + coreblas_kernel_trace( A, B ); if ( (A->format & CHAMELEON_TILE_DESC) && (B->format & CHAMELEON_TILE_DESC) ) { @@ -88,8 +92,9 @@ TCORE_zgelqt( int M, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, T ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zgelqt( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( T ), T->ld, TAU, WORK ); } @@ -99,6 +104,7 @@ TCORE_zgemv( cham_trans_t trans, int M, int N, const CHAM_tile_t *x, int incX, CHAMELEON_Complex64_t beta, CHAM_tile_t *y, int incY ) { + coreblas_kernel_trace( A, x, y ); assert( A->format & CHAMELEON_TILE_FULLRANK ); assert( x->format & CHAMELEON_TILE_FULLRANK ); assert( y->format & CHAMELEON_TILE_FULLRANK ); @@ -118,9 +124,10 @@ TCORE_zgemm( cham_trans_t transA, CHAMELEON_Complex64_t beta, CHAM_tile_t * C ) { - if ( ( A->format & CHAMELEON_TILE_FULLRANK ) && - ( B->format & CHAMELEON_TILE_FULLRANK ) && - ( C->format & CHAMELEON_TILE_FULLRANK ) ) + coreblas_kernel_trace( A, B, C ); + if ( ( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) && + ( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) && + ( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) ) { CORE_zgemm( transA, transB, M, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, @@ -164,32 +171,37 @@ TCORE_zgeqrt( int M, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, T ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zgeqrt( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( T ), T->ld, TAU, WORK ); } int TCORE_zgessm( int M, int N, int K, int IB, const int *IPIV, const CHAM_tile_t *L, CHAM_tile_t *A ) { - assert( L->format & CHAMELEON_TILE_FULLRANK ); - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( L, A ); + assert( L->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zgessm( M, N, K, IB, IPIV, CHAM_tile_get_ptr( L ), L->ld, CHAM_tile_get_ptr( A ), A->ld ); } int TCORE_zgessq( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( sclssq->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, sclssq ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zgessq( storev, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( sclssq ) ); } int TCORE_zgetrf( int M, int N, CHAM_tile_t *A, int *IPIV, int *INFO ) { + coreblas_kernel_trace( A ); + int rc = -1; - if ( A->format & CHAMELEON_TILE_FULLRANK ) { + if ( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) { rc = CORE_zgetrf( M, N, CHAM_tile_get_ptr( A ), A->ld, IPIV, INFO ); } #if defined( CHAMELEON_USE_HMAT ) @@ -207,7 +219,9 @@ TCORE_zgetrf( int M, int N, CHAM_tile_t *A, int *IPIV, int *INFO ) int TCORE_zgetrf_incpiv( int M, int N, int IB, CHAM_tile_t *A, int *IPIV, int *INFO ) { - if ( A->format & CHAMELEON_TILE_FULLRANK ) { + coreblas_kernel_trace( A ); + + if ( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) { return CORE_zgetrf_incpiv( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, IPIV, INFO ); } #if defined( CHAMELEON_USE_HMAT ) @@ -224,10 +238,12 @@ TCORE_zgetrf_incpiv( int M, int N, int IB, CHAM_tile_t *A, int *IPIV, int *INFO int TCORE_zgetrf_nopiv( int M, int N, int IB, CHAM_tile_t *A, int *INFO ) { + coreblas_kernel_trace( A ); + int rc = -1; *INFO = 0; - if ( A->format & CHAMELEON_TILE_FULLRANK ) { + if ( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) { rc = CORE_zgetrf_nopiv( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, INFO ); } #if defined( CHAMELEON_USE_HMAT ) @@ -245,8 +261,9 @@ TCORE_zgetrf_nopiv( int M, int N, int IB, CHAM_tile_t *A, int *INFO ) void TCORE_zhe2ge( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zhe2ge( uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld ); } @@ -262,9 +279,9 @@ TCORE_zhemm( cham_side_t side, CHAMELEON_Complex64_t beta, CHAM_tile_t * C ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zhemm( side, uplo, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld ); } @@ -278,8 +295,9 @@ TCORE_zherk( cham_uplo_t uplo, double beta, CHAM_tile_t * C ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, C ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zherk( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, beta, CHAM_tile_get_ptr( C ), C->ld ); } @@ -294,9 +312,10 @@ TCORE_zher2k( cham_uplo_t uplo, double beta, CHAM_tile_t * C ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B, C ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zher2k( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld ); } #endif @@ -313,9 +332,10 @@ TCORE_zherfb( cham_uplo_t uplo, CHAMELEON_Complex64_t *WORK, int ldwork ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, T, C ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zherfb( uplo, N, K, IB, NB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( T ), T->ld, CHAM_tile_get_ptr( C ), C->ld, WORK, ldwork ); } @@ -328,8 +348,9 @@ TCORE_zhessq( cham_store_t storev, const CHAM_tile_t *A, CHAM_tile_t * sclssq ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( sclssq->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, sclssq ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zhessq( storev, uplo, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( sclssq ) ); } #endif @@ -353,7 +374,8 @@ TCORE_zlange( cham_normtype_t norm, double * work, double * normA ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zlange( norm, M, N, CHAM_tile_get_ptr( A ), A->ld, work, normA ); } @@ -366,7 +388,8 @@ TCORE_zlanhe( cham_normtype_t norm, double * work, double * normA ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zlanhe( norm, uplo, N, CHAM_tile_get_ptr( A ), A->ld, work, normA ); } #endif @@ -379,7 +402,8 @@ TCORE_zlansy( cham_normtype_t norm, double * work, double * normA ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zlansy( norm, uplo, N, CHAM_tile_get_ptr( A ), A->ld, work, normA ); } @@ -393,14 +417,16 @@ TCORE_zlantr( cham_normtype_t norm, double * work, double * normA ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zlantr( norm, uplo, diag, M, N, CHAM_tile_get_ptr( A ), A->ld, work, normA ); } int TCORE_zlascal( cham_uplo_t uplo, int m, int n, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zlascal( uplo, m, n, alpha, CHAM_tile_get_ptr( A ), A->ld ); } @@ -412,14 +438,16 @@ TCORE_zlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t beta, CHAM_tile_t * A ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zlaset( uplo, n1, n2, alpha, beta, CHAM_tile_get_ptr( A ), A->ld ); } void TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zlaset2( uplo, n1, n2, alpha, CHAM_tile_get_ptr( A ), A->ld ); } @@ -431,15 +459,17 @@ TCORE_zlatro( cham_uplo_t uplo, const CHAM_tile_t *A, CHAM_tile_t * B ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zlatro( uplo, trans, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld ); } void TCORE_zlauum( cham_uplo_t uplo, int N, CHAM_tile_t *A ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zlauum( uplo, N, CHAM_tile_get_ptr( A ), A->ld ); } @@ -448,14 +478,14 @@ void TCORE_zplghe( double bump, int m, int n, - CHAM_tile_t * tileA, + CHAM_tile_t * A, int bigM, int m0, int n0, unsigned long long int seed ) { - assert( tileA->format & CHAMELEON_TILE_FULLRANK ); - CORE_zplghe( bump, m, n, CHAM_tile_get_ptr( tileA ), tileA->ld, bigM, m0, n0, seed ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + CORE_zplghe( bump, m, n, CHAM_tile_get_ptr( A ), A->ld, bigM, m0, n0, seed ); } #endif @@ -463,33 +493,36 @@ void TCORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, - CHAM_tile_t * tileA, + CHAM_tile_t * A, int bigM, int m0, int n0, unsigned long long int seed ) { - assert( tileA->format & CHAMELEON_TILE_FULLRANK ); - CORE_zplgsy( bump, m, n, CHAM_tile_get_ptr( tileA ), tileA->ld, bigM, m0, n0, seed ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + CORE_zplgsy( bump, m, n, CHAM_tile_get_ptr( A ), A->ld, bigM, m0, n0, seed ); } void TCORE_zplrnt( int m, int n, - CHAM_tile_t * tileA, + CHAM_tile_t * A, int bigM, int m0, int n0, unsigned long long int seed ) { - assert( tileA->format & CHAMELEON_TILE_FULLRANK ); - CORE_zplrnt( m, n, CHAM_tile_get_ptr( tileA ), tileA->ld, bigM, m0, n0, seed ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + CORE_zplrnt( m, n, CHAM_tile_get_ptr( A ), A->ld, bigM, m0, n0, seed ); } void TCORE_zpotrf( cham_uplo_t uplo, int n, CHAM_tile_t *A, int *INFO ) { - if ( A->format & CHAMELEON_TILE_FULLRANK ) { + coreblas_kernel_trace( A ); + if ( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) { CORE_zpotrf( uplo, n, CHAM_tile_get_ptr( A ), A->ld, INFO ); } #if defined( CHAMELEON_USE_HMAT ) @@ -517,10 +550,11 @@ TCORE_zssssm( int M1, const CHAM_tile_t *L2, const int * IPIV ) { - assert( A1->format & CHAMELEON_TILE_FULLRANK ); - assert( A2->format & CHAMELEON_TILE_FULLRANK ); - assert( L1->format & CHAMELEON_TILE_FULLRANK ); - assert( L2->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A1, A2, L1, L2 ); + assert( A1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( L1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( L2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zssssm( M1, N1, M2, @@ -549,9 +583,10 @@ TCORE_zsymm( cham_side_t side, CHAMELEON_Complex64_t beta, CHAM_tile_t * C ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B, C ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zsymm( side, uplo, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld ); } @@ -565,8 +600,9 @@ TCORE_zsyrk( cham_uplo_t uplo, CHAMELEON_Complex64_t beta, CHAM_tile_t * C ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, C ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zsyrk( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, beta, CHAM_tile_get_ptr( C ), C->ld ); } @@ -581,9 +617,10 @@ TCORE_zsyr2k( cham_uplo_t uplo, CHAMELEON_Complex64_t beta, CHAM_tile_t * C ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B, C ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_zsyr2k( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld ); } @@ -594,8 +631,9 @@ TCORE_zsyssq( cham_store_t storev, const CHAM_tile_t *A, CHAM_tile_t * sclssq ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( sclssq->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, sclssq ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zsyssq( storev, uplo, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( sclssq ) ); } @@ -603,7 +641,8 @@ TCORE_zsyssq( cham_store_t storev, int TCORE_zsytf2_nopiv( cham_uplo_t uplo, int n, CHAM_tile_t *A ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zsytf2_nopiv( uplo, n, CHAM_tile_get_ptr( A ), A->ld ); } #endif @@ -618,9 +657,10 @@ TCORE_ztplqt( int M, CHAM_tile_t * T, CHAMELEON_Complex64_t *WORK ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B, T ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_ztplqt( M, N, L, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, CHAM_tile_get_ptr( T ), T->ld, WORK ); } @@ -638,10 +678,11 @@ TCORE_ztpmlqt( cham_side_t side, CHAM_tile_t * B, CHAMELEON_Complex64_t *WORK ) { - assert( V->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( V, T, A, B ); + assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_ztpmlqt( side, trans, M, @@ -674,10 +715,11 @@ TCORE_ztpmqrt( cham_side_t side, CHAM_tile_t * B, CHAMELEON_Complex64_t *WORK ) { - assert( V->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( V, T, A, B ); + assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_ztpmqrt( side, trans, M, @@ -706,9 +748,10 @@ TCORE_ztpqrt( int M, CHAM_tile_t * T, CHAMELEON_Complex64_t *WORK ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B, T ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_ztpqrt( M, N, L, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, CHAM_tile_get_ptr( T ), T->ld, WORK ); } @@ -722,6 +765,7 @@ TCORE_ztradd( cham_uplo_t uplo, CHAMELEON_Complex64_t beta, CHAM_tile_t * B ) { + coreblas_kernel_trace( A, B ); if (( A->format & CHAMELEON_TILE_DESC ) && ( B->format & CHAMELEON_TILE_DESC ) ) { @@ -739,7 +783,8 @@ TCORE_ztrasm( cham_store_t storev, const CHAM_tile_t *A, double * work ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_ztrasm( storev, uplo, diag, M, N, CHAM_tile_get_ptr( A ), A->ld, work ); } @@ -754,8 +799,9 @@ TCORE_ztrmm( cham_side_t side, const CHAM_tile_t * A, CHAM_tile_t * B ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( B->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_ztrmm( side, uplo, transA, diag, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld ); } @@ -770,13 +816,16 @@ TCORE_ztrsm( cham_side_t side, const CHAM_tile_t * A, CHAM_tile_t * B ) { - if ( ( A->format & CHAMELEON_TILE_FULLRANK ) && - ( B->format & CHAMELEON_TILE_FULLRANK ) ) + coreblas_kernel_trace( A, B ); + + if ( ( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) && + ( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ) ) { CORE_ztrsm( side, uplo, transA, diag, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld ); } #if defined( CHAMELEON_USE_HMAT ) else if ( A->format & CHAMELEON_TILE_HMAT ) { + assert( !(B->format & CHAMELEON_TILE_DESC) ); hmat_ztrsm( chameleon_lapack_const( side ), chameleon_lapack_const( uplo ), chameleon_lapack_const( transA ), @@ -799,8 +848,9 @@ TCORE_ztrssq( cham_uplo_t uplo, const CHAM_tile_t *A, CHAM_tile_t * sclssq ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( sclssq->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A, sclssq ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); double *W = CHAM_tile_get_ptr( sclssq ); return CORE_ztrssq( uplo, diag, M, N, CHAM_tile_get_ptr( A ), A->ld, W, W + 1 ); } @@ -808,7 +858,7 @@ TCORE_ztrssq( cham_uplo_t uplo, void TCORE_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N, CHAM_tile_t *A, int *info ) { - assert( A->format & CHAMELEON_TILE_FULLRANK ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); CORE_ztrtri( uplo, diag, N, CHAM_tile_get_ptr( A ), A->ld, info ); } @@ -828,10 +878,11 @@ TCORE_ztsmlq_hetra1( cham_side_t side, CHAMELEON_Complex64_t *WORK, int ldwork ) { - assert( A1->format & CHAMELEON_TILE_FULLRANK ); - assert( A2->format & CHAMELEON_TILE_FULLRANK ); - assert( V->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A1, A2, V, T ); + assert( A1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_ztsmlq_hetra1( side, trans, m1, @@ -868,10 +919,11 @@ TCORE_ztsmqr_hetra1( cham_side_t side, CHAMELEON_Complex64_t *WORK, int ldwork ) { - assert( A1->format & CHAMELEON_TILE_FULLRANK ); - assert( A2->format & CHAMELEON_TILE_FULLRANK ); - assert( V->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( A1, A2, V, T ); + assert( A1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_ztsmqr_hetra1( side, trans, m1, @@ -905,9 +957,10 @@ TCORE_ztstrf( int M, int LDWORK, int * INFO ) { - assert( U->format & CHAMELEON_TILE_FULLRANK ); - assert( A->format & CHAMELEON_TILE_FULLRANK ); - assert( L->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( U, A, L ); + assert( U->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( L->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_ztstrf( M, N, IB, NB, CHAM_tile_get_ptr( U ), U->ld, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( L ), L->ld, IPIV, WORK, LDWORK, INFO ); } @@ -925,9 +978,10 @@ TCORE_zunmlq( cham_side_t side, CHAMELEON_Complex64_t *WORK, int LDWORK ) { - assert( V->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( V, T, C ); + assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zunmlq( side, trans, M, N, K, IB, CHAM_tile_get_ptr( V ), V->ld, CHAM_tile_get_ptr( T ), T->ld, CHAM_tile_get_ptr( C ), C->ld, WORK, LDWORK ); } @@ -945,9 +999,10 @@ TCORE_zunmqr( cham_side_t side, CHAMELEON_Complex64_t *WORK, int LDWORK ) { - assert( V->format & CHAMELEON_TILE_FULLRANK ); - assert( T->format & CHAMELEON_TILE_FULLRANK ); - assert( C->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( V, T, C ); + assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zunmqr( side, trans, M, N, K, IB, CHAM_tile_get_ptr( V ), V->ld, CHAM_tile_get_ptr( T ), T->ld, CHAM_tile_get_ptr( C ), C->ld, WORK, LDWORK ); } @@ -963,10 +1018,11 @@ TCORE_zgram( cham_uplo_t uplo, const CHAM_tile_t *D, CHAM_tile_t * A ) { - assert( Di->format & CHAMELEON_TILE_FULLRANK ); - assert( Dj->format & CHAMELEON_TILE_FULLRANK ); - assert( D->format & CHAMELEON_TILE_FULLRANK ); - assert( A->format & CHAMELEON_TILE_FULLRANK ); + coreblas_kernel_trace( Di, Dj, D, A ); + assert( Di->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( Dj->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( D->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); return CORE_zgram( uplo, M, N, Mt, Nt, CHAM_tile_get_ptr( Di ), Di->ld, CHAM_tile_get_ptr( Dj ), Dj->ld, CHAM_tile_get_ptr( D ), CHAM_tile_get_ptr( A ), A->ld ); } diff --git a/coreblas/compute/global.c b/coreblas/compute/global.c index b370d72f8ab7b9dc604d8b478493c5f425aa3c72..7ce1142c0743561f725484f3cd5a91770e6243d3 100644 --- a/coreblas/compute/global.c +++ b/coreblas/compute/global.c @@ -20,8 +20,51 @@ * @date 2020-03-03 * */ +#include "coreblas.h" +#include <stdarg.h> +#include <stdlib.h> + +int _coreblas_silent = 0; static int coreblas_gemm3m_enabled = 0; +__attribute__((unused)) __attribute__((constructor)) static void +__coreblas_lib_init() +{ + char *silent = getenv("CHAMELEON_COREBLAS_SILENT"); + if ( silent && !(strcmp( silent, "0" ) == 0) ) { + _coreblas_silent = 1; + } +} + +#if defined(CHAMELEON_KERNELS_TRACE) +void __coreblas_kernel_trace( const char *func, ... ) +{ + char output[1024]; + int first = 1; + int size = 0; + int len = 1024; + va_list va_list; + const CHAM_tile_t *tile; + + if (_coreblas_silent) { + return; + } + + size += snprintf( output, len, "[coreblas] Execute %s(", func ); + + va_start( va_list, func ); + while((tile = va_arg(va_list, const CHAM_tile_t*)) != 0) { + size += snprintf( output+size, len-size, "%s%s", + first ? "" : ", ", + tile->name ); + } + va_end( va_list ); + + fprintf( stderr, "%s)\n", output ); + fflush(stderr); +} +#endif + void set_coreblas_gemm3m_enabled( int v ) { coreblas_gemm3m_enabled = v; diff --git a/coreblas/include/coreblas.h b/coreblas/include/coreblas.h index e203d81b2c812aff5499513dd141f1aa5f51b623..771f7856e69abe062fe5c4b3e627a90b109f3b49 100644 --- a/coreblas/include/coreblas.h +++ b/coreblas/include/coreblas.h @@ -87,6 +87,21 @@ extern char *chameleon_lapack_constants[]; void set_coreblas_gemm3m_enabled( int v ); int get_coreblas_gemm3m_enabled( void ); + +#if defined(CHAMELEON_KERNELS_TRACE) + +void __coreblas_kernel_trace( const char *func, ... ); +#define coreblas_kernel_trace( ... ) \ + do { \ + __coreblas_kernel_trace( __chameleon_func__, ##__VA_ARGS__, NULL ); \ + } while(0) + +#else + +#define coreblas_kernel_trace( ... ) do {} while(0) + +#endif + END_C_DECLS #endif /* _coreblas_h_ */ diff --git a/include/chameleon.h b/include/chameleon.h index 4a7a72bbda201e4e94389972a69255393021e1ce..49e9e4d06cf945c8c636a8717d5f44db5336dcea 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -140,6 +140,7 @@ int CHAMELEON_Desc_Acquire( const CHAM_desc_t *desc ); int CHAMELEON_Desc_Release( const CHAM_desc_t *desc ); int CHAMELEON_Desc_Flush ( const CHAM_desc_t *desc, const RUNTIME_sequence_t *sequence ); +void CHAMELEON_Desc_Print( const CHAM_desc_t *desc ); /* Workspaces */ int CHAMELEON_Dealloc_Workspace (CHAM_desc_t **desc); @@ -163,6 +164,10 @@ int CHAMELEON_Request_Create (RUNTIME_request_t **request); int CHAMELEON_Request_Destroy (RUNTIME_request_t *request); int CHAMELEON_Request_Set (RUNTIME_request_t *request, int param, int value); +int CHAMELEON_Recursive_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, + int *mb, int *nb, int lm, int ln, int m, int n, int p, int q, + blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd, blkrankof_fct_t get_rankof ); + /** * * @ingroup Control diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index 15073d1c4604947b704f0f3cf5ff55f15074012a..dc878d47b68f1a0e514ca9b9bf6f9c95de0e1ece 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -34,6 +34,8 @@ /* Debug scheduling engine */ #cmakedefine CHAMELEON_RUNTIME_SYNC +/* Debug coreblas execution order if not provided by the runtime */ +#cmakedefine CHAMELEON_KERNELS_TRACE /* Communication engine */ #cmakedefine CHAMELEON_USE_MPI @@ -86,4 +88,10 @@ #define END_C_DECLS /* empty */ #endif +#ifdef _MSC_VER +#define __chameleon_func__ __FUNCTION__ +#else +#define __chameleon_func__ __func__ +#endif + #endif /* CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED */ diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index e1d424838324598f69fa1a72fe4ec5908d26617c..a43e46d699092a3308f7cfb8896fddc39e8457f2 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -35,6 +35,9 @@ BEGIN_C_DECLS #define CHAMELEON_TILE_HMAT (1 << 2) typedef struct chameleon_tile_s { +#if defined(CHAMELEON_KERNELS_TRACE) + char *name; +#endif int8_t format; int m, n, ld; void *mat; @@ -67,6 +70,7 @@ typedef int (*blkrankof_fct_t)( const CHAM_desc_t*, int, int ); typedef CHAM_tile_t* (*blktile_fct_t) ( const CHAM_desc_t*, int, int ); struct chameleon_desc_s { + const char *name; // function to get chameleon tiles address blktile_fct_t get_blktile; // function to get chameleon tiles address diff --git a/include/chameleon/types.h b/include/chameleon/types.h index 76f5bae73e656635de0b31081cec3db3ab5ba7b7..0a38ebb9a7a97e7eb7f6d8294826a990a592f1fe 100644 --- a/include/chameleon/types.h +++ b/include/chameleon/types.h @@ -126,6 +126,10 @@ static inline int chameleon_min( int a, int b ) { if ( a < b ) return a; else return b; } +static inline int chameleon_ceil( int a, int b ) { + return ( a + b - 1 ) / b; +} + typedef double cham_fixdbl_t; END_C_DECLS diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c index 66e6cc44a1d51bb61b1c4b60829c0ced06e3f678..5e5a2e5289825fde459a212e2ef54357eda94a85 100644 --- a/runtime/openmp/control/runtime_options.c +++ b/runtime/openmp/control/runtime_options.c @@ -25,15 +25,15 @@ void RUNTIME_options_init( RUNTIME_option_t *option, CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { - option->sequence = sequence; - option->request = request; - option->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; - option->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; - option->priority = RUNTIME_PRIORITY_MIN; - option->ws_wsize = 0; - option->ws_hsize = 0; - option->ws_worker = NULL; - option->ws_host = NULL; + option->sequence = sequence; + option->request = request; + option->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; + option->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; + option->priority = RUNTIME_PRIORITY_MIN; + option->ws_wsize = 0; + option->ws_hsize = 0; + option->ws_worker = NULL; + option->ws_host = NULL; return; } diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index c377736025b31907b9be901f6a4209c49f1e6a63..740fdc0fba5bbdcde1ee0258821013d59836ed4b 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -39,7 +39,6 @@ cl_zlauum_cpu_func(void *descr[], void *cl_arg) { struct cl_zlauum_args_s clargs; CHAM_tile_t *tileA; - int info = 0; tileA = cti_interface_get(descr[0]); diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index 2ed7183a7b627f2b3e78a5cc6bf641991bd57d80..9b802391aeb26b55c0968f7ebe7b5e281e67aa8f 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -241,8 +241,8 @@ void RUNTIME_desc_destroy( CHAM_desc_t *desc ) for (n = 0; n < lnt; n++) { for (m = 0; m < lmt; m++) { - if (*handle != NULL) { - starpu_data_unregister(*handle); + if ( *handle != NULL ) { + starpu_data_unregister_submit(*handle); *handle = NULL; } handle++; @@ -384,14 +384,27 @@ void RUNTIME_data_flush( const RUNTIME_sequence_t *sequence, { int64_t mm = m + (A->i / A->mb); int64_t nn = n + (A->j / A->nb); - + int64_t shift = ((int64_t)A->lmt) * nn + mm; starpu_data_handle_t *handle = A->schedopt; - handle += ((int64_t)A->lmt) * nn + mm; + CHAM_tile_t *tile = A->tiles; + handle += shift; + tile += shift; if (*handle == NULL) { return; } + /* + * TODO: check later, a better check would be to check if we + * partitionned the handle or not + * + * Right now, we can't flush a partitionned handle, we would need to + * unpartition first, so we flush only the children. + */ + if ( tile->format & CHAMELEON_TILE_DESC ) { + CHAMELEON_Desc_Flush( tile->mat, sequence ); + } + #if defined(CHAMELEON_USE_MPI) starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); #endif diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c index a15e0b78117814f8e2a8ee886f3c40668e914bb3..92d2d42036d0a525e76649d8319cccfe904771b0 100644 --- a/runtime/starpu/interface/cham_tile_interface.c +++ b/runtime/starpu/interface/cham_tile_interface.c @@ -19,7 +19,6 @@ #include "chameleon_starpu.h" #if defined(CHAMELEON_USE_HMAT) #include "coreblas/hmat.h" -#endif static inline void cti_hmat_destroy( starpu_cham_tile_interface_t *cham_tile_interface ) @@ -47,7 +46,7 @@ static inline size_t cti_get_hmat_required_size( starpu_cham_tile_interface_t *cham_tile_interface ) { size_t size = 0; -#if defined(CHAMELEON_USE_HMAT) + if ( (cham_tile_interface->tile.format & CHAMELEON_TILE_HMAT) && (cham_tile_interface->tile.mat != NULL ) ) { @@ -68,10 +67,15 @@ cti_get_hmat_required_size( starpu_cham_tile_interface_t *cham_tile_interface ) STARPU_ASSERT_MSG( 0, "cti_get_hmat_required_size(cham_tile_interface): unknown flttype\n" ); } } -#endif - (void)cham_tile_interface; + return size; } +#else +static inline size_t +cti_get_hmat_required_size( starpu_cham_tile_interface_t *cham_tile_interface __attribute__((unused)) ) { + return 0; +} +#endif static inline CHAM_tile_t * cti_handle_get( starpu_data_handle_t handle ) @@ -287,9 +291,16 @@ cti_display( starpu_data_handle_t handle, FILE *f ) starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); +#if defined(CHAMELEON_KERNELS_TRACE) + fprintf( f, "%s{.m=%u,.n=%u}", + cham_tile_interface->tile.name, + cham_tile_interface->tile.m, + cham_tile_interface->tile.n ); +#else fprintf( f, "%u\t%u\t", cham_tile_interface->tile.m, cham_tile_interface->tile.n ); +#endif } static int @@ -454,7 +465,6 @@ cti_peek_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t cou starpu_data_get_interface_on_node(handle, node); char *tmp = ptr; - size_t size; #if defined(CHAMELEON_USE_MPI_DATATYPES) /* @@ -467,6 +477,7 @@ cti_peek_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t cou #else { CHAM_tile_t dsttile; + size_t size; /* Extract the size of the information to unpack */ memcpy( &size, tmp, sizeof(size_t) ); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index ee99eb5983a823174d044869d947d45e4b0180b7..1079e455cd9f8f17ecb33d4e5a3317efdb8c50b3 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -45,6 +45,7 @@ set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};testing_include" CAC set(ZSRC chameleon_ztesting.c testing_zcheck.c + testing_zprint.c ################## # LAPACK ################## diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 007d8013938501d3e90f079eef6bb60d4740d3e2..9ca61431a485b6eb9251d208ddf5d2581b549f53 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -26,7 +26,7 @@ if (NOT CHAMELEON_SIMULATION) # # Create the list of test based on precision and runtime # - set( TESTS lacpy lange lantr lansy plrnk ) + set( TESTS print lacpy lange lantr lansy plrnk ) if ( ${prec} STREQUAL c OR ${prec} STREQUAL z ) set( TESTS ${TESTS} lanhe ) endif() diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index 9384b7627f1573da57550c3f3bcdb0ce17a37e27..6aa508d5ac73892d9c51a15336b01238af2e70a3 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -64,6 +64,10 @@ static parameter_t parameters[] = { { "nb", "Tile size nb", 'b', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, { "ib", "Inner tile size ib", 'i', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 2, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "l1", "Size of the first level of recursion", '1', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "l2", "Size of the second level of recursion", '2', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "l3", "Size of the third level of recursion", '3', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "lda", "Leading dimension of the matrix A", 'A', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, { "ldb", "Leading dimension of the matrix B", 'B', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, { "ldc", "Leading dimension of the matrix C", 'C', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, diff --git a/testing/input/print.in b/testing/input/print.in new file mode 100644 index 0000000000000000000000000000000000000000..64a60716cc7d056315c315480026a2b041fd4e29 --- /dev/null +++ b/testing/input/print.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LACPY +# nb: Tile size +# ib: Inner tile size +# uplo: Part of the matrix to be copied (0 for Upper, 1 for Lower and 2 for UpperLower) +# M: Number of rows of matrices A and C +# N: Number of columns of matrices B and C +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B + +op = print +nb = 16, 17 +ib = 8 +m = 13:45:16 +n = 15:52:16 +lda = 65 diff --git a/testing/testing_zprint.c b/testing/testing_zprint.c new file mode 100644 index 0000000000000000000000000000000000000000..1db0422641d87723b0bd15dc75619e79f3cf1b53 --- /dev/null +++ b/testing/testing_zprint.c @@ -0,0 +1,132 @@ +/** + * + * @file testing_zprint.c + * + * @copyright 2019-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zprint testing + * + * @version 1.1.0 + * @author Lucas Barros de Assis + * @author Mathieu Faverge + * @date 2020-11-19 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testings.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> + +/** + * Internal function to return address of block (m,n) with m,n = block indices + */ +inline static void *chameleon_getaddr_cm(const CHAM_desc_t *A, int m, int n) +{ + size_t mm = m + A->i / A->mb; + size_t nn = n + A->j / A->nb; + size_t eltsize = CHAMELEON_Element_Size(A->dtyp); + size_t offset = 0; + +#if defined(CHAMELEON_USE_MPI) + assert( A->myrank == A->get_rankof( A, mm, nn) ); + mm = mm / A->p; + nn = nn / A->q; +#endif + + offset = (size_t)(A->llm * A->nb) * nn + (size_t)(A->mb) * mm; + return (void*)((intptr_t)A->mat + (offset*eltsize) ); +} + +inline static int chameleon_getblkldd_cm(const CHAM_desc_t *A, int m) { + (void)m; + return A->llm; +} + +int +testing_zprint( run_arg_list_t *args, int check ) +{ + int hres = 0; + CHAM_desc_t *descA; + + /* Reads arguments */ + intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" ); + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int l1 = run_arg_get_int( args, "l1", nb / 2 ); + int l2 = run_arg_get_int( args, "l2", l1 / 3 ); + int l3 = run_arg_get_int( args, "l3", l2 / 2 ); + int Q = parameters_compute_q( P ); + + int list_nb[] = { nb, l1, l2, l3, 0 }; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + fprintf( stdout, "--- Tile layout ---\n" ); + CHAMELEON_Desc_Create( + &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + + CHAMELEON_Desc_Print( descA ); + + CHAMELEON_Desc_Destroy( &descA ); + + fprintf( stdout, "--- Lapacke layout ---\n" ); + CHAMELEON_Desc_Create_User( + &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q, + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); + + CHAMELEON_Desc_Print( descA ); + CHAMELEON_Desc_Destroy( &descA ); + + fprintf( stdout, "--- Recursive layout (Tile)---\n" ); + CHAMELEON_Recursive_Desc_Create( + &descA, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, + list_nb, list_nb, LDA, N, M, N, P, Q, + NULL, NULL, NULL ); + + CHAMELEON_Desc_Print( descA ); + CHAMELEON_Desc_Destroy( &descA ); + + fprintf( stdout, "--- Recursive layout (Lapack) ---\n" ); + CHAMELEON_Recursive_Desc_Create( + &descA, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, + list_nb, list_nb, LDA, N, M, N, P, Q, + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); + + CHAMELEON_Desc_Print( descA ); + CHAMELEON_Desc_Destroy( &descA ); + + run_arg_add_fixdbl( args, "time", 1. ); + run_arg_add_fixdbl( args, "gflops", 1. ); + + return hres; +} + +testing_t test_zprint; +const char *zprint_params[] = { "mtxfmt", "nb", "l1", "l2", "l3", "m", "n", "lda", NULL }; +const char *zprint_output[] = { NULL }; +const char *zprint_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zprint_init( void ) __attribute__( ( constructor ) ); +void +testing_zprint_init( void ) +{ + test_zprint.name = "zprint"; + test_zprint.helper = "Print descriptors"; + test_zprint.params = zprint_params; + test_zprint.output = zprint_output; + test_zprint.outchk = zprint_outchk; + test_zprint.fptr = testing_zprint; + test_zprint.next = NULL; + + testing_register( &test_zprint ); +}