Commit 96ec99ec authored by Mathieu Faverge's avatar Mathieu Faverge

Merge branch 'feature/gemm_summa' into 'master'

SUMMA GEMM

See merge request !154
parents e02d3679 ef90cff0
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -62,6 +62,7 @@ CHAM_context_t *chameleon_context_create() ...@@ -62,6 +62,7 @@ CHAM_context_t *chameleon_context_create()
chamctxt->nb = 128; chamctxt->nb = 128;
chamctxt->ib = 32; chamctxt->ib = 32;
chamctxt->rhblock = 4; chamctxt->rhblock = 4;
chamctxt->lookahead = 3;
chamctxt->nworkers = 1; chamctxt->nworkers = 1;
chamctxt->ncudas = 0; chamctxt->ncudas = 0;
...@@ -72,11 +73,11 @@ CHAM_context_t *chameleon_context_create() ...@@ -72,11 +73,11 @@ CHAM_context_t *chameleon_context_create()
chamctxt->parallel_enabled = CHAMELEON_FALSE; chamctxt->parallel_enabled = CHAMELEON_FALSE;
chamctxt->profiling_enabled = CHAMELEON_FALSE; chamctxt->profiling_enabled = CHAMELEON_FALSE;
chamctxt->progress_enabled = CHAMELEON_FALSE; chamctxt->progress_enabled = CHAMELEON_FALSE;
chamctxt->generic_enabled = CHAMELEON_FALSE;
chamctxt->householder = ChamFlatHouseholder; chamctxt->householder = ChamFlatHouseholder;
chamctxt->translation = ChamOutOfPlace; chamctxt->translation = ChamOutOfPlace;
/* Initialize scheduler */ /* Initialize scheduler */
RUNTIME_context_create(chamctxt); RUNTIME_context_create(chamctxt);
...@@ -120,6 +121,7 @@ int chameleon_context_destroy(){ ...@@ -120,6 +121,7 @@ int chameleon_context_destroy(){
* @arg CHAMELEON_PROFILING_MODE activate profiling of kernels * @arg CHAMELEON_PROFILING_MODE activate profiling of kernels
* @arg CHAMELEON_PROGRESS activate progress indicator * @arg CHAMELEON_PROGRESS activate progress indicator
* @arg CHAMELEON_GEMM3M Use z/cgemm3m for complexe matrix-matrix products * @arg CHAMELEON_GEMM3M Use z/cgemm3m for complexe matrix-matrix products
* @arg CHAMELEON_GENERIC enable/disable GEMM3M Use z/cgemm3m for complexe matrix-matrix products
* *
******************************************************************************* *******************************************************************************
* *
...@@ -160,6 +162,9 @@ int CHAMELEON_Enable(int option) ...@@ -160,6 +162,9 @@ int CHAMELEON_Enable(int option)
/* case CHAMELEON_PARALLEL: */ /* case CHAMELEON_PARALLEL: */
/* chamctxt->parallel_enabled = CHAMELEON_TRUE; */ /* chamctxt->parallel_enabled = CHAMELEON_TRUE; */
/* break; */ /* break; */
case CHAMELEON_GENERIC:
chamctxt->generic_enabled = CHAMELEON_TRUE;
break;
default: default:
chameleon_error("CHAMELEON_Enable", "illegal parameter value"); chameleon_error("CHAMELEON_Enable", "illegal parameter value");
return CHAMELEON_ERR_ILLEGAL_VALUE; return CHAMELEON_ERR_ILLEGAL_VALUE;
...@@ -225,6 +230,9 @@ int CHAMELEON_Disable(int option) ...@@ -225,6 +230,9 @@ int CHAMELEON_Disable(int option)
case CHAMELEON_PARALLEL_MODE: case CHAMELEON_PARALLEL_MODE:
chamctxt->parallel_enabled = CHAMELEON_FALSE; chamctxt->parallel_enabled = CHAMELEON_FALSE;
break; break;
case CHAMELEON_GENERIC:
chamctxt->generic_enabled = CHAMELEON_FALSE;
break;
default: default:
chameleon_error("CHAMELEON_Disable", "illegal parameter value"); chameleon_error("CHAMELEON_Disable", "illegal parameter value");
return CHAMELEON_ERR_ILLEGAL_VALUE; return CHAMELEON_ERR_ILLEGAL_VALUE;
...@@ -248,6 +256,7 @@ int CHAMELEON_Disable(int option) ...@@ -248,6 +256,7 @@ int CHAMELEON_Disable(int option)
* Feature to be enabled: * Feature to be enabled:
* @arg CHAMELEON_TILE_SIZE: size matrix tile, * @arg CHAMELEON_TILE_SIZE: size matrix tile,
* @arg CHAMELEON_INNER_BLOCK_SIZE: size of tile inner block, * @arg CHAMELEON_INNER_BLOCK_SIZE: size of tile inner block,
* @arg CHAMELEON_LOOKAHEAD: depth of the look ahead in algorithms
* *
* @param[in] value * @param[in] value
* Value of the parameter. * Value of the parameter.
...@@ -321,6 +330,13 @@ int CHAMELEON_Set( int param, int value ) ...@@ -321,6 +330,13 @@ int CHAMELEON_Set( int param, int value )
} }
chamctxt->translation = value; chamctxt->translation = value;
break; break;
case CHAMELEON_LOOKAHEAD:
if (value < 1) {
chameleon_error("CHAMELEON_Set", "illegal value of CHAMELEON_LOOKAHEAD");
return CHAMELEON_ERR_ILLEGAL_VALUE;
}
chamctxt->lookahead = value;
break;
default: default:
chameleon_error("CHAMELEON_Set", "unknown parameter"); chameleon_error("CHAMELEON_Set", "unknown parameter");
return CHAMELEON_ERR_ILLEGAL_VALUE; return CHAMELEON_ERR_ILLEGAL_VALUE;
...@@ -341,6 +357,7 @@ int CHAMELEON_Set( int param, int value ) ...@@ -341,6 +357,7 @@ int CHAMELEON_Set( int param, int value )
* Feature to be enabled: * Feature to be enabled:
* @arg CHAMELEON_TILE_SIZE: size matrix tile, * @arg CHAMELEON_TILE_SIZE: size matrix tile,
* @arg CHAMELEON_INNER_BLOCK_SIZE: size of tile inner block, * @arg CHAMELEON_INNER_BLOCK_SIZE: size of tile inner block,
* @arg CHAMELEON_LOOKAHEAD: depth of the look ahead in algorithms
* *
* @param[out] value * @param[out] value
* Value of the parameter. * Value of the parameter.
...@@ -375,6 +392,9 @@ int CHAMELEON_Get(int param, int *value) ...@@ -375,6 +392,9 @@ int CHAMELEON_Get(int param, int *value)
case CHAMELEON_TRANSLATION_MODE: case CHAMELEON_TRANSLATION_MODE:
*value = chamctxt->translation; *value = chamctxt->translation;
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
case CHAMELEON_LOOKAHEAD:
*value = chamctxt->lookahead;
return CHAMELEON_SUCCESS;
default: default:
chameleon_error("CHAMELEON_Get", "unknown parameter"); chameleon_error("CHAMELEON_Get", "unknown parameter");
return CHAMELEON_ERR_ILLEGAL_VALUE; return CHAMELEON_ERR_ILLEGAL_VALUE;
......
...@@ -68,6 +68,27 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc ) ...@@ -68,6 +68,27 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc )
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
} }
/**
* Internal function to return MPI rank of element A(m,n) with m,n = block indices
*/
int chameleon_getrankof_2d( const CHAM_desc_t *A, int m, int n )
{
int mm = m + A->i / A->mb;
int nn = n + A->j / A->nb;
return (mm % A->p) * A->q + (nn % A->q);
}
/**
* Internal function to return MPI rank of element DIAG(m,0) with m,n = block indices
*/
int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n )
{
int mm = m + A->i / A->mb;
assert( m == n );
return (mm % A->p) * A->q + (mm % A->q);
}
/** /**
****************************************************************************** ******************************************************************************
* *
......
...@@ -44,8 +44,8 @@ inline static int chameleon_getblkldd_ccrb(const CHAM_desc_t *A, int m); ...@@ -44,8 +44,8 @@ inline static int chameleon_getblkldd_ccrb(const CHAM_desc_t *A, int m);
/** /**
* Data distributions * Data distributions
*/ */
inline static int chameleon_getrankof_2d(const CHAM_desc_t *desc, int m, int n); int chameleon_getrankof_2d(const CHAM_desc_t *desc, int m, int n);
inline static int chameleon_getrankof_2d_diag(const CHAM_desc_t *desc, int m, int n); int chameleon_getrankof_2d_diag(const CHAM_desc_t *desc, int m, int n);
int chameleon_desc_init ( CHAM_desc_t *desc, void *mat, int chameleon_desc_init ( CHAM_desc_t *desc, void *mat,
cham_flttype_t dtyp, int mb, int nb, int bsiz, cham_flttype_t dtyp, int mb, int nb, int bsiz,
...@@ -175,27 +175,6 @@ inline static int chameleon_getblkldd_cm(const CHAM_desc_t *A, int m) { ...@@ -175,27 +175,6 @@ inline static int chameleon_getblkldd_cm(const CHAM_desc_t *A, int m) {
return A->llm; return A->llm;
} }
/**
* Internal function to return MPI rank of element A(m,n) with m,n = block indices
*/
inline static int chameleon_getrankof_2d(const CHAM_desc_t *A, int m, int n)
{
int mm = m + A->i / A->mb;
int nn = n + A->j / A->nb;
return (mm % A->p) * A->q + (nn % A->q);
}
/**
* Internal function to return MPI rank of element DIAG(m,0) with m,n = block indices
*/
inline static int chameleon_getrankof_2d_diag(const CHAM_desc_t *A, int m, int n)
{
int mm = m + A->i / A->mb;
assert( m == n );
return (mm % A->p) * A->q + (mm % A->q);
}
/** /**
* Detect if the tile is local or not * Detect if the tile is local or not
*/ */
......
...@@ -182,6 +182,7 @@ typedef enum chameleon_store_e { ...@@ -182,6 +182,7 @@ typedef enum chameleon_store_e {
#define CHAMELEON_BOUND 7 #define CHAMELEON_BOUND 7
#define CHAMELEON_PROGRESS 8 #define CHAMELEON_PROGRESS 8
#define CHAMELEON_GEMM3M 9 #define CHAMELEON_GEMM3M 9
#define CHAMELEON_GENERIC 10
/** /**
* CHAMELEON constants - configuration parameters * CHAMELEON constants - configuration parameters
...@@ -192,6 +193,7 @@ typedef enum chameleon_store_e { ...@@ -192,6 +193,7 @@ typedef enum chameleon_store_e {
#define CHAMELEON_HOUSEHOLDER_MODE 5 #define CHAMELEON_HOUSEHOLDER_MODE 5
#define CHAMELEON_HOUSEHOLDER_SIZE 6 #define CHAMELEON_HOUSEHOLDER_SIZE 6
#define CHAMELEON_TRANSLATION_MODE 7 #define CHAMELEON_TRANSLATION_MODE 7
#define CHAMELEON_LOOKAHEAD 8
/** /**
* @brief QR/LQ factorization trees * @brief QR/LQ factorization trees
......
...@@ -116,6 +116,7 @@ typedef struct chameleon_context_s { ...@@ -116,6 +116,7 @@ typedef struct chameleon_context_s {
cham_bool_t parallel_enabled; cham_bool_t parallel_enabled;
cham_bool_t profiling_enabled; cham_bool_t profiling_enabled;
cham_bool_t progress_enabled; cham_bool_t progress_enabled;
cham_bool_t generic_enabled;
cham_householder_t householder; // "domino" (flat) or tree-based (reduction) Householder cham_householder_t householder; // "domino" (flat) or tree-based (reduction) Householder
cham_translation_t translation; // In place or Out of place layout conversion cham_translation_t translation; // In place or Out of place layout conversion
...@@ -123,6 +124,7 @@ typedef struct chameleon_context_s { ...@@ -123,6 +124,7 @@ typedef struct chameleon_context_s {
int nb; int nb;
int ib; int ib;
int rhblock; // block size for tree-based (reduction) Householder int rhblock; // block size for tree-based (reduction) Householder
int lookahead; // depth of the look ahead in algorithms
void *schedopt; // structure for runtimes void *schedopt; // structure for runtimes
int mpi_outer_init; // MPI has been initialized outside our functions int mpi_outer_init; // MPI has been initialized outside our functions
} CHAM_context_t; } CHAM_context_t;
......
...@@ -85,6 +85,8 @@ void RUNTIME_enable( void *runtime_ctxt, int lever ) ...@@ -85,6 +85,8 @@ void RUNTIME_enable( void *runtime_ctxt, int lever )
default: default:
return; return;
} }
(void)runtime_ctxt;
return; return;
} }
...@@ -107,5 +109,7 @@ void RUNTIME_disable( void *runtime_ctxt, int lever ) ...@@ -107,5 +109,7 @@ void RUNTIME_disable( void *runtime_ctxt, int lever )
default: default:
return; return;
} }
(void)runtime_ctxt;
return; return;
} }
...@@ -56,7 +56,7 @@ void RUNTIME_iteration_pop( CHAM_context_t *chamctxt ) ...@@ -56,7 +56,7 @@ void RUNTIME_iteration_pop( CHAM_context_t *chamctxt )
void RUNTIME_start_profiling(){ void RUNTIME_start_profiling(){
#if defined(HAVE_STARPU_FXT_PROFILING) #if defined(HAVE_STARPU_FXT_PROFILING)
starpu_fxt_start_profiling(); starpu_fxt_start_profiling();
#else #else
fprintf(stderr, "Profiling throught FxT has not been enabled in StarPU runtime (configure StarPU with --with-fxt)\n"); fprintf(stderr, "Profiling throught FxT has not been enabled in StarPU runtime (configure StarPU with --with-fxt)\n");
#endif #endif
...@@ -64,7 +64,7 @@ void RUNTIME_start_profiling(){ ...@@ -64,7 +64,7 @@ void RUNTIME_start_profiling(){
void RUNTIME_stop_profiling(){ void RUNTIME_stop_profiling(){
#if defined(HAVE_STARPU_FXT_PROFILING) #if defined(HAVE_STARPU_FXT_PROFILING)
starpu_fxt_stop_profiling(); starpu_fxt_stop_profiling();
#else #else
fprintf(stderr, "Profiling throught FxT has not been enabled in StarPU runtime (configure StarPU with --with-fxt)\n"); fprintf(stderr, "Profiling throught FxT has not been enabled in StarPU runtime (configure StarPU with --with-fxt)\n");
#endif #endif
......
...@@ -212,7 +212,6 @@ static int compute_gram_sequential(cham_uplo_t uplo, ...@@ -212,7 +212,6 @@ static int compute_gram_sequential(cham_uplo_t uplo,
int LDA) int LDA)
{ {
int m, n; int m, n;
double eps;
double squareij, mean_dij, mhalf; double squareij, mean_dij, mhalf;
double *work = (double *)malloc(N * sizeof(double)); double *work = (double *)malloc(N * sizeof(double));
...@@ -258,4 +257,4 @@ static int compute_gram_sequential(cham_uplo_t uplo, ...@@ -258,4 +257,4 @@ static int compute_gram_sequential(cham_uplo_t uplo,
free(work); free(work);
return 0; return 0;
} }
\ No newline at end of file
...@@ -103,6 +103,8 @@ set(ZSRC_LAP_INT ...@@ -103,6 +103,8 @@ set(ZSRC_LAP_INT
set(ZSRC_TIL_INT set(ZSRC_TIL_INT
# BLAS 3 # BLAS 3
time_zgemm_tile.c time_zgemm_tile.c
time_zhemm_tile.c
time_zsymm_tile.c
# LAPACK # LAPACK
time_zgels_tile.c time_zgels_tile.c
time_zgeqrf_hqr_tile.c time_zgeqrf_hqr_tile.c
......
/**
*
* @file time_zhemm_tile.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @version 0.9.2
* @author Mathieu Faverge
* @date 2014-11-16
* @precisions normal z -> c
*
*/
#define _TYPE CHAMELEON_Complex64_t
#define _PREC double
#define _LAMCH LAPACKE_dlamch_work
#define _NAME "CHAMELEON_zhemm_Tile"
/* See Lawn 41 page 120 */
#define _FMULS FMULS_HEMM( ChamLeft, M, N )
#define _FADDS FADDS_HEMM( ChamLeft, M, N )
#include "./timing.c"
#include "timing_zauxiliary.h"
static int
RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
{
CHAMELEON_Complex64_t alpha, beta;
PASTE_CODE_IPARAM_LOCALS( iparam );
LDA = chameleon_max(M, iparam[IPARAM_LDA]);
LDB = chameleon_max(M, iparam[IPARAM_LDB]);
LDC = chameleon_max(M, iparam[IPARAM_LDC]);
/* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, M, M );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, M, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDC, M, N );
/* Initialize Data */
CHAMELEON_zplghe_Tile( 0, ChamUpper, descA, 5373 );
CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zplrnt_Tile( descC, 6387 );
#if !defined(CHAMELEON_SIMULATION)
LAPACKE_zlarnv_work(1, ISEED, 1, &alpha);
LAPACKE_zlarnv_work(1, ISEED, 1, &beta);
#else
alpha = 1.5;
beta = -2.3;
#endif
/* Save C for check */
PASTE_TILE_TO_LAPACK( descC, C2, check, CHAMELEON_Complex64_t, LDC, N );
START_TIMING();
CHAMELEON_zhemm_Tile( ChamLeft, ChamUpper, alpha, descA, descB, beta, descC );
STOP_TIMING();
#if !defined(CHAMELEON_SIMULATION)
/* Check the solution */
if (check)
{
PASTE_TILE_TO_LAPACK( descA, A, check, CHAMELEON_Complex64_t, LDA, M );
PASTE_TILE_TO_LAPACK( descB, B, check, CHAMELEON_Complex64_t, LDB, N );
PASTE_TILE_TO_LAPACK( descC, C, check, CHAMELEON_Complex64_t, LDC, N );
dparam[IPARAM_RES] = z_check_hemm( ChamLeft, ChamUpper, M, N,
alpha, A, LDA, B, LDB, beta, C, C2, LDC,
&(dparam[IPARAM_ANORM]),
&(dparam[IPARAM_BNORM]),
&(dparam[IPARAM_XNORM]) );
free(A); free(B); free(C); free(C2);
}
#endif
PASTE_CODE_FREE_MATRIX( descA );
PASTE_CODE_FREE_MATRIX( descB );
PASTE_CODE_FREE_MATRIX( descC );
return 0;
}
/**
*
* @file time_zsymm_tile.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @version 0.9.2
* @author Mathieu Faverge
* @date 2014-11-16
* @precisions normal z -> c d s
*
*/
#define _TYPE CHAMELEON_Complex64_t
#define _PREC double
#define _LAMCH LAPACKE_dlamch_work
#define _NAME "CHAMELEON_zsymm_Tile"
/* See Lawn 41 page 120 */
#define _FMULS FMULS_SYMM( ChamLeft, M, N )
#define _FADDS FADDS_SYMM( ChamLeft, M, N )
#include "./timing.c"
#include "timing_zauxiliary.h"
static int
RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
{
CHAMELEON_Complex64_t alpha, beta;
PASTE_CODE_IPARAM_LOCALS( iparam );
LDA = chameleon_max(M, iparam[IPARAM_LDA]);
LDB = chameleon_max(M, iparam[IPARAM_LDB]);
LDC = chameleon_max(M, iparam[IPARAM_LDC]);
/* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, M, M );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, M, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDC, M, N );
/* Initialize Data */
CHAMELEON_zplghe_Tile( 0, ChamUpper, descA, 5373 );
CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zplrnt_Tile( descC, 6387 );
#if !defined(CHAMELEON_SIMULATION)
LAPACKE_zlarnv_work(1, ISEED, 1, &alpha);
LAPACKE_zlarnv_work(1, ISEED, 1, &beta);
#else
alpha = 1.5;
beta = -2.3;
#endif
/* Save C for check */
PASTE_TILE_TO_LAPACK( descC, C2, check, CHAMELEON_Complex64_t, LDC, N );
START_TIMING();
CHAMELEON_zsymm_Tile( ChamLeft, ChamUpper, alpha, descA, descB, beta, descC );
STOP_TIMING();
#if !defined(CHAMELEON_SIMULATION)
/* Check the solution */
if (check)
{
PASTE_TILE_TO_LAPACK( descA, A, check, CHAMELEON_Complex64_t, LDA, M );
PASTE_TILE_TO_LAPACK( descB, B, check, CHAMELEON_Complex64_t, LDB, N );
PASTE_TILE_TO_LAPACK( descC, C, check, CHAMELEON_Complex64_t, LDC, N );
dparam[IPARAM_RES] = z_check_symm( ChamLeft, ChamUpper, M, N,
alpha, A, LDA, B, LDB, beta, C, C2, LDC,
&(dparam[IPARAM_ANORM]),
&(dparam[IPARAM_BNORM]),
&(dparam[IPARAM_XNORM]) );
free(A); free(B); free(C); free(C2);
}
#endif
PASTE_CODE_FREE_MATRIX( descA );
PASTE_CODE_FREE_MATRIX( descB );
PASTE_CODE_FREE_MATRIX( descC );
return 0;
}
...@@ -247,6 +247,70 @@ double z_check_gemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int ...@@ -247,6 +247,70 @@ double z_check_gemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int
return Rnorm; return Rnorm;
} }
#if defined(PRECISION_z) || defined(PRECISION_c)
/*--------------------------------------------------------------
* Check the hemm
*/
double z_check_hemm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm )
{
CHAMELEON_Complex64_t beta_const = -1.0;
double Rnorm;
double *work = (double *)malloc( chameleon_max(M, N)* sizeof(double) );
*Cinitnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
*Cchamnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Ccham, LDC, work );
cblas_zhemm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N,
CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC );
*Clapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
cblas_zaxpy( LDC * N, CBLAS_SADDR(beta_const), Ccham, 1, Cref, 1 );
Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
free(work);
return Rnorm;
}
#endif /* defined(PRECISION_z) || defined(PRECISION_c) */
/*--------------------------------------------------------------
* Check the symm
*/
double z_check_symm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm )
{
CHAMELEON_Complex64_t beta_const = -1.0;
double Rnorm;
double *work = (double *)malloc( chameleon_max(M, N)* sizeof(double) );
*Cinitnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
*Cchamnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Ccham, LDC, work );
cblas_zsymm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N,
CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC );
*Clapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
cblas_zaxpy( LDC * N, CBLAS_SADDR(beta_const), Ccham, 1, Cref, 1 );
Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
free(work);
return Rnorm;
}
/*-------------------------------------------------------------- /*--------------------------------------------------------------
* Check the trsm * Check the trsm
*/ */
......
...@@ -28,6 +28,21 @@ double z_check_gemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int ...@@ -28,6 +28,21 @@ double z_check_gemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int
CHAMELEON_Complex64_t *Cref, int LDC, CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm ); double *Cinitnorm, double *Cchamnorm, double *Clapacknorm );
#if defined(PRECISION_z) || defined(PRECISION_c)
double z_check_hemm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm );
#endif
double z_check_symm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm );
double z_check_trsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, cham_diag_t diag, double z_check_trsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, cham_diag_t diag,
int M, int NRHS, CHAMELEON_Complex64_t alpha, int M, int NRHS, CHAMELEON_Complex64_t alpha,
CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *A, int LDA,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment