Commit ef90cff0 authored by Mathieu Faverge's avatar Mathieu Faverge

SUMMA GEMM

parent e02d3679
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -62,6 +62,7 @@ CHAM_context_t *chameleon_context_create()
chamctxt->nb = 128;
chamctxt->ib = 32;
chamctxt->rhblock = 4;
chamctxt->lookahead = 3;
chamctxt->nworkers = 1;
chamctxt->ncudas = 0;
......@@ -72,11 +73,11 @@ CHAM_context_t *chameleon_context_create()
chamctxt->parallel_enabled = CHAMELEON_FALSE;
chamctxt->profiling_enabled = CHAMELEON_FALSE;
chamctxt->progress_enabled = CHAMELEON_FALSE;
chamctxt->generic_enabled = CHAMELEON_FALSE;
chamctxt->householder = ChamFlatHouseholder;
chamctxt->translation = ChamOutOfPlace;
/* Initialize scheduler */
RUNTIME_context_create(chamctxt);
......@@ -120,6 +121,7 @@ int chameleon_context_destroy(){
* @arg CHAMELEON_PROFILING_MODE activate profiling of kernels
* @arg CHAMELEON_PROGRESS activate progress indicator
* @arg CHAMELEON_GEMM3M Use z/cgemm3m for complexe matrix-matrix products
* @arg CHAMELEON_GENERIC enable/disable GEMM3M Use z/cgemm3m for complexe matrix-matrix products
*
*******************************************************************************
*
......@@ -160,6 +162,9 @@ int CHAMELEON_Enable(int option)
/* case CHAMELEON_PARALLEL: */
/* chamctxt->parallel_enabled = CHAMELEON_TRUE; */
/* break; */
case CHAMELEON_GENERIC:
chamctxt->generic_enabled = CHAMELEON_TRUE;
break;
default:
chameleon_error("CHAMELEON_Enable", "illegal parameter value");
return CHAMELEON_ERR_ILLEGAL_VALUE;
......@@ -225,6 +230,9 @@ int CHAMELEON_Disable(int option)
case CHAMELEON_PARALLEL_MODE:
chamctxt->parallel_enabled = CHAMELEON_FALSE;
break;
case CHAMELEON_GENERIC:
chamctxt->generic_enabled = CHAMELEON_FALSE;
break;
default:
chameleon_error("CHAMELEON_Disable", "illegal parameter value");
return CHAMELEON_ERR_ILLEGAL_VALUE;
......@@ -248,6 +256,7 @@ int CHAMELEON_Disable(int option)
* Feature to be enabled:
* @arg CHAMELEON_TILE_SIZE: size matrix tile,
* @arg CHAMELEON_INNER_BLOCK_SIZE: size of tile inner block,
* @arg CHAMELEON_LOOKAHEAD: depth of the look ahead in algorithms
*
* @param[in] value
* Value of the parameter.
......@@ -321,6 +330,13 @@ int CHAMELEON_Set( int param, int value )
}
chamctxt->translation = value;
break;
case CHAMELEON_LOOKAHEAD:
if (value < 1) {
chameleon_error("CHAMELEON_Set", "illegal value of CHAMELEON_LOOKAHEAD");
return CHAMELEON_ERR_ILLEGAL_VALUE;
}
chamctxt->lookahead = value;
break;
default:
chameleon_error("CHAMELEON_Set", "unknown parameter");
return CHAMELEON_ERR_ILLEGAL_VALUE;
......@@ -341,6 +357,7 @@ int CHAMELEON_Set( int param, int value )
* Feature to be enabled:
* @arg CHAMELEON_TILE_SIZE: size matrix tile,
* @arg CHAMELEON_INNER_BLOCK_SIZE: size of tile inner block,
* @arg CHAMELEON_LOOKAHEAD: depth of the look ahead in algorithms
*
* @param[out] value
* Value of the parameter.
......@@ -375,6 +392,9 @@ int CHAMELEON_Get(int param, int *value)
case CHAMELEON_TRANSLATION_MODE:
*value = chamctxt->translation;
return CHAMELEON_SUCCESS;
case CHAMELEON_LOOKAHEAD:
*value = chamctxt->lookahead;
return CHAMELEON_SUCCESS;
default:
chameleon_error("CHAMELEON_Get", "unknown parameter");
return CHAMELEON_ERR_ILLEGAL_VALUE;
......
......@@ -68,6 +68,27 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc )
return CHAMELEON_SUCCESS;
}
/**
* Internal function to return MPI rank of element A(m,n) with m,n = block indices
*/
int chameleon_getrankof_2d( const CHAM_desc_t *A, int m, int n )
{
int mm = m + A->i / A->mb;
int nn = n + A->j / A->nb;
return (mm % A->p) * A->q + (nn % A->q);
}
/**
* Internal function to return MPI rank of element DIAG(m,0) with m,n = block indices
*/
int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n )
{
int mm = m + A->i / A->mb;
assert( m == n );
return (mm % A->p) * A->q + (mm % A->q);
}
/**
******************************************************************************
*
......
......@@ -44,8 +44,8 @@ inline static int chameleon_getblkldd_ccrb(const CHAM_desc_t *A, int m);
/**
* Data distributions
*/
inline static int chameleon_getrankof_2d(const CHAM_desc_t *desc, int m, int n);
inline static int chameleon_getrankof_2d_diag(const CHAM_desc_t *desc, int m, int n);
int chameleon_getrankof_2d(const CHAM_desc_t *desc, int m, int n);
int chameleon_getrankof_2d_diag(const CHAM_desc_t *desc, int m, int n);
int chameleon_desc_init ( CHAM_desc_t *desc, void *mat,
cham_flttype_t dtyp, int mb, int nb, int bsiz,
......@@ -175,27 +175,6 @@ inline static int chameleon_getblkldd_cm(const CHAM_desc_t *A, int m) {
return A->llm;
}
/**
* Internal function to return MPI rank of element A(m,n) with m,n = block indices
*/
inline static int chameleon_getrankof_2d(const CHAM_desc_t *A, int m, int n)
{
int mm = m + A->i / A->mb;
int nn = n + A->j / A->nb;
return (mm % A->p) * A->q + (nn % A->q);
}
/**
* Internal function to return MPI rank of element DIAG(m,0) with m,n = block indices
*/
inline static int chameleon_getrankof_2d_diag(const CHAM_desc_t *A, int m, int n)
{
int mm = m + A->i / A->mb;
assert( m == n );
return (mm % A->p) * A->q + (mm % A->q);
}
/**
* Detect if the tile is local or not
*/
......
......@@ -182,6 +182,7 @@ typedef enum chameleon_store_e {
#define CHAMELEON_BOUND 7
#define CHAMELEON_PROGRESS 8
#define CHAMELEON_GEMM3M 9
#define CHAMELEON_GENERIC 10
/**
* CHAMELEON constants - configuration parameters
......@@ -192,6 +193,7 @@ typedef enum chameleon_store_e {
#define CHAMELEON_HOUSEHOLDER_MODE 5
#define CHAMELEON_HOUSEHOLDER_SIZE 6
#define CHAMELEON_TRANSLATION_MODE 7
#define CHAMELEON_LOOKAHEAD 8
/**
* @brief QR/LQ factorization trees
......
......@@ -116,6 +116,7 @@ typedef struct chameleon_context_s {
cham_bool_t parallel_enabled;
cham_bool_t profiling_enabled;
cham_bool_t progress_enabled;
cham_bool_t generic_enabled;
cham_householder_t householder; // "domino" (flat) or tree-based (reduction) Householder
cham_translation_t translation; // In place or Out of place layout conversion
......@@ -123,6 +124,7 @@ typedef struct chameleon_context_s {
int nb;
int ib;
int rhblock; // block size for tree-based (reduction) Householder
int lookahead; // depth of the look ahead in algorithms
void *schedopt; // structure for runtimes
int mpi_outer_init; // MPI has been initialized outside our functions
} CHAM_context_t;
......
......@@ -85,6 +85,8 @@ void RUNTIME_enable( void *runtime_ctxt, int lever )
default:
return;
}
(void)runtime_ctxt;
return;
}
......@@ -107,5 +109,7 @@ void RUNTIME_disable( void *runtime_ctxt, int lever )
default:
return;
}
(void)runtime_ctxt;
return;
}
......@@ -56,7 +56,7 @@ void RUNTIME_iteration_pop( CHAM_context_t *chamctxt )
void RUNTIME_start_profiling(){
#if defined(HAVE_STARPU_FXT_PROFILING)
starpu_fxt_start_profiling();
starpu_fxt_start_profiling();
#else
fprintf(stderr, "Profiling throught FxT has not been enabled in StarPU runtime (configure StarPU with --with-fxt)\n");
#endif
......@@ -64,7 +64,7 @@ void RUNTIME_start_profiling(){
void RUNTIME_stop_profiling(){
#if defined(HAVE_STARPU_FXT_PROFILING)
starpu_fxt_stop_profiling();
starpu_fxt_stop_profiling();
#else
fprintf(stderr, "Profiling throught FxT has not been enabled in StarPU runtime (configure StarPU with --with-fxt)\n");
#endif
......
......@@ -212,7 +212,6 @@ static int compute_gram_sequential(cham_uplo_t uplo,
int LDA)
{
int m, n;
double eps;
double squareij, mean_dij, mhalf;
double *work = (double *)malloc(N * sizeof(double));
......@@ -258,4 +257,4 @@ static int compute_gram_sequential(cham_uplo_t uplo,
free(work);
return 0;
}
\ No newline at end of file
}
......@@ -103,6 +103,8 @@ set(ZSRC_LAP_INT
set(ZSRC_TIL_INT
# BLAS 3
time_zgemm_tile.c
time_zhemm_tile.c
time_zsymm_tile.c
# LAPACK
time_zgels_tile.c
time_zgeqrf_hqr_tile.c
......
/**
*
* @file time_zhemm_tile.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @version 0.9.2
* @author Mathieu Faverge
* @date 2014-11-16
* @precisions normal z -> c
*
*/
#define _TYPE CHAMELEON_Complex64_t
#define _PREC double
#define _LAMCH LAPACKE_dlamch_work
#define _NAME "CHAMELEON_zhemm_Tile"
/* See Lawn 41 page 120 */
#define _FMULS FMULS_HEMM( ChamLeft, M, N )
#define _FADDS FADDS_HEMM( ChamLeft, M, N )
#include "./timing.c"
#include "timing_zauxiliary.h"
static int
RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
{
CHAMELEON_Complex64_t alpha, beta;
PASTE_CODE_IPARAM_LOCALS( iparam );
LDA = chameleon_max(M, iparam[IPARAM_LDA]);
LDB = chameleon_max(M, iparam[IPARAM_LDB]);
LDC = chameleon_max(M, iparam[IPARAM_LDC]);
/* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, M, M );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, M, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDC, M, N );
/* Initialize Data */
CHAMELEON_zplghe_Tile( 0, ChamUpper, descA, 5373 );
CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zplrnt_Tile( descC, 6387 );
#if !defined(CHAMELEON_SIMULATION)
LAPACKE_zlarnv_work(1, ISEED, 1, &alpha);
LAPACKE_zlarnv_work(1, ISEED, 1, &beta);
#else
alpha = 1.5;
beta = -2.3;
#endif
/* Save C for check */
PASTE_TILE_TO_LAPACK( descC, C2, check, CHAMELEON_Complex64_t, LDC, N );
START_TIMING();
CHAMELEON_zhemm_Tile( ChamLeft, ChamUpper, alpha, descA, descB, beta, descC );
STOP_TIMING();
#if !defined(CHAMELEON_SIMULATION)
/* Check the solution */
if (check)
{
PASTE_TILE_TO_LAPACK( descA, A, check, CHAMELEON_Complex64_t, LDA, M );
PASTE_TILE_TO_LAPACK( descB, B, check, CHAMELEON_Complex64_t, LDB, N );
PASTE_TILE_TO_LAPACK( descC, C, check, CHAMELEON_Complex64_t, LDC, N );
dparam[IPARAM_RES] = z_check_hemm( ChamLeft, ChamUpper, M, N,
alpha, A, LDA, B, LDB, beta, C, C2, LDC,
&(dparam[IPARAM_ANORM]),
&(dparam[IPARAM_BNORM]),
&(dparam[IPARAM_XNORM]) );
free(A); free(B); free(C); free(C2);
}
#endif
PASTE_CODE_FREE_MATRIX( descA );
PASTE_CODE_FREE_MATRIX( descB );
PASTE_CODE_FREE_MATRIX( descC );
return 0;
}
/**
*
* @file time_zsymm_tile.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @version 0.9.2
* @author Mathieu Faverge
* @date 2014-11-16
* @precisions normal z -> c d s
*
*/
#define _TYPE CHAMELEON_Complex64_t
#define _PREC double
#define _LAMCH LAPACKE_dlamch_work
#define _NAME "CHAMELEON_zsymm_Tile"
/* See Lawn 41 page 120 */
#define _FMULS FMULS_SYMM( ChamLeft, M, N )
#define _FADDS FADDS_SYMM( ChamLeft, M, N )
#include "./timing.c"
#include "timing_zauxiliary.h"
static int
RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
{
CHAMELEON_Complex64_t alpha, beta;
PASTE_CODE_IPARAM_LOCALS( iparam );
LDA = chameleon_max(M, iparam[IPARAM_LDA]);
LDB = chameleon_max(M, iparam[IPARAM_LDB]);
LDC = chameleon_max(M, iparam[IPARAM_LDC]);
/* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, M, M );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, M, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDC, M, N );
/* Initialize Data */
CHAMELEON_zplghe_Tile( 0, ChamUpper, descA, 5373 );
CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zplrnt_Tile( descC, 6387 );
#if !defined(CHAMELEON_SIMULATION)
LAPACKE_zlarnv_work(1, ISEED, 1, &alpha);
LAPACKE_zlarnv_work(1, ISEED, 1, &beta);
#else
alpha = 1.5;
beta = -2.3;
#endif
/* Save C for check */
PASTE_TILE_TO_LAPACK( descC, C2, check, CHAMELEON_Complex64_t, LDC, N );
START_TIMING();
CHAMELEON_zsymm_Tile( ChamLeft, ChamUpper, alpha, descA, descB, beta, descC );
STOP_TIMING();
#if !defined(CHAMELEON_SIMULATION)
/* Check the solution */
if (check)
{
PASTE_TILE_TO_LAPACK( descA, A, check, CHAMELEON_Complex64_t, LDA, M );
PASTE_TILE_TO_LAPACK( descB, B, check, CHAMELEON_Complex64_t, LDB, N );
PASTE_TILE_TO_LAPACK( descC, C, check, CHAMELEON_Complex64_t, LDC, N );
dparam[IPARAM_RES] = z_check_symm( ChamLeft, ChamUpper, M, N,
alpha, A, LDA, B, LDB, beta, C, C2, LDC,
&(dparam[IPARAM_ANORM]),
&(dparam[IPARAM_BNORM]),
&(dparam[IPARAM_XNORM]) );
free(A); free(B); free(C); free(C2);
}
#endif
PASTE_CODE_FREE_MATRIX( descA );
PASTE_CODE_FREE_MATRIX( descB );
PASTE_CODE_FREE_MATRIX( descC );
return 0;
}
......@@ -247,6 +247,70 @@ double z_check_gemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int
return Rnorm;
}
#if defined(PRECISION_z) || defined(PRECISION_c)
/*--------------------------------------------------------------
* Check the hemm
*/
double z_check_hemm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm )
{
CHAMELEON_Complex64_t beta_const = -1.0;
double Rnorm;
double *work = (double *)malloc( chameleon_max(M, N)* sizeof(double) );
*Cinitnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
*Cchamnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Ccham, LDC, work );
cblas_zhemm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N,
CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC );
*Clapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
cblas_zaxpy( LDC * N, CBLAS_SADDR(beta_const), Ccham, 1, Cref, 1 );
Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
free(work);
return Rnorm;
}
#endif /* defined(PRECISION_z) || defined(PRECISION_c) */
/*--------------------------------------------------------------
* Check the symm
*/
double z_check_symm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm )
{
CHAMELEON_Complex64_t beta_const = -1.0;
double Rnorm;
double *work = (double *)malloc( chameleon_max(M, N)* sizeof(double) );
*Cinitnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
*Cchamnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Ccham, LDC, work );
cblas_zsymm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N,
CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC );
*Clapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
cblas_zaxpy( LDC * N, CBLAS_SADDR(beta_const), Ccham, 1, Cref, 1 );
Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work );
free(work);
return Rnorm;
}
/*--------------------------------------------------------------
* Check the trsm
*/
......
......@@ -28,6 +28,21 @@ double z_check_gemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm );
#if defined(PRECISION_z) || defined(PRECISION_c)
double z_check_hemm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm );
#endif
double z_check_symm( cham_side_t side, cham_uplo_t uplo, int M, int N,
CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
const CHAMELEON_Complex64_t *B, int LDB,
CHAMELEON_Complex64_t beta, const CHAMELEON_Complex64_t *Ccham,
CHAMELEON_Complex64_t *Cref, int LDC,
double *Cinitnorm, double *Cchamnorm, double *Clapacknorm );
double z_check_trsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, cham_diag_t diag,
int M, int NRHS, CHAMELEON_Complex64_t alpha,
CHAMELEON_Complex64_t *A, int LDA,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment