Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 0187f482 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

codelet/map: Modify the map codelet to enable variadic arguments

parent 372809c0
No related branches found
No related tags found
1 merge request!201Modify the map function to take into parameters an variadic number of data
......@@ -16,7 +16,7 @@
* @author Cedric Augonnet
* @author Florent Pruvost
* @author Matthieu Kuhn
* @date 2023-08-31
* @date 2024-03-11
*
*/
#ifndef _chameleon_tasks_h_
......@@ -97,9 +97,49 @@ typedef int (*cham_unary_operator_t)( const CHAM_desc_t *desc,
cham_uplo_t uplo, int m, int n,
CHAM_tile_t *data, void *op_args );
typedef int (*cham_map_cpu_fct_t)( void *args, cham_uplo_t uplo, int m, int n, int ndata,
const CHAM_desc_t *desc, CHAM_tile_t *tile, ... );
#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
#include "gpucublas.h"
typedef int (*cham_map_cuda_fct_t)( cublasHandle_t handle, void *args,
cham_uplo_t uplo, int m, int n, int ndata,
const CHAM_desc_t *desc, CHAM_tile_t *tile, ... );
#else
typedef void *cham_map_cuda_fct_t;
#endif
#if defined(CHAMELEON_USE_HIP) && !defined(CHAMELEON_SIMULATION)
#include "gpuhipblas.h"
typedef int (*cham_map_hip_fct_t)( hipblasHandle_t handle, void *args,
cham_uplo_t uplo, int m, int n, int ndata,
const CHAM_desc_t *desc, CHAM_tile_t *tile, ... );
#else
typedef void *cham_map_hip_fct_t;
#endif
/**
* @brief Structure to store the operator functions on any architecture
*/
typedef struct cham_map_operator_s {
const char *name; /**< Name of the operator to be used in debug/tracing mode */
cham_map_cpu_fct_t cpufunc; /**< Pointer to the CPU function of the operator */
cham_map_cuda_fct_t cudafunc; /**< Pointer to the CUDA/cuBLAS function of the operator */
cham_map_hip_fct_t hipfunc; /**< Pointer to the HIP function of the operator */
} cham_map_operator_t;
/**
* @brief Structure to store the data information in the map operation
*/
typedef struct cham_map_data_s {
cham_access_t access; /**< Access type to the descriptor. Must be one of ChamR, ChamW, ChamRW. */
const CHAM_desc_t *desc; /**< Descriptor in which the data is taken to apply the map operation. */
} cham_map_data_t;
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
cham_unary_operator_t op_fct, void *op_args );
cham_uplo_t uplo, int m, int n,
int ndata, cham_map_data_t *data,
cham_map_operator_t *op_fcts, void *op_args );
void INSERT_TASK_gemm( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
......
......@@ -9,43 +9,1025 @@
*
* @brief Chameleon map OpenMP codelet
*
* @version 1.2.0
* @author Philippe Virouleau
* @version 1.3.0
* @author Mathieu Faverge
* @date 2022-02-22
* @date 2024-03-11
*
*/
#include "chameleon_openmp.h"
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
cham_unary_operator_t op_fct, void *op_args )
typedef void (*omp_map_fct_t)( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args );
/*
* @brief Open map function type with one tile
*/
static inline void omp_map_one_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
assert( ndata == 1 );
assert( A->access == ChamR );
#pragma omp task depend( in:tileA[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 1, descA, tileA );
}
}
static inline void omp_map_one_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
assert( ndata == 1 );
assert( A->access == ChamW );
#pragma omp task depend( out:tileA[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 1, descA, tileA );
}
}
static inline void omp_map_one_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
assert( ndata == 1 );
assert( A->access == ChamRW );
#pragma omp task depend( in:tileA[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 1, descA, tileA );
}
}
/*
* @brief Open map function type with two tiles
*/
static inline void omp_map_two_R_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamR );
assert( B->access == ChamR );
#pragma omp task depend( in:tileA[0] ) depend( in:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_R_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamR );
assert( B->access == ChamW );
#pragma omp task depend( in:tileA[0] ) depend( out:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_R_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamR );
assert( B->access == ChamRW );
#pragma omp task depend( in:tileA[0] ) depend( inout:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_W_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamW );
assert( B->access == ChamR );
#pragma omp task depend( out:tileA[0] ) depend( in:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_W_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamW );
assert( B->access == ChamW );
#pragma omp task depend( out:tileA[0] ) depend( out:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_W_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamW );
assert( B->access == ChamRW );
#pragma omp task depend( out:tileA[0] ) depend( inout:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_RW_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamRW );
assert( B->access == ChamR );
#pragma omp task depend( inout:tileA[0] ) depend( in:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_RW_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamRW );
assert( B->access == ChamW );
#pragma omp task depend( inout:tileA[0] ) depend( out:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
static inline void omp_map_two_RW_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
assert( ndata == 2 );
assert( A->access == ChamRW );
assert( B->access == ChamRW );
#pragma omp task depend( inout:tileA[0] ) depend( inout:tileB[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 2, descA, tileA, descB, tileB );
}
}
/*
* @brief Open map function type with three tiles
*/
static inline void omp_map_three_R_R_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamR );
assert( C->access == ChamR );
#pragma omp task depend( in:tileA[0] ) depend( in:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_R_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamW );
#pragma omp task depend( in:tileA[0] ) depend( in:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_R_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamR );
assert( C->access == ChamRW );
#pragma omp task depend( in:tileA[0] ) depend( in:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_W_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamW );
assert( C->access == ChamR );
#pragma omp task depend( in:tileA[0] ) depend( out:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_W_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamW );
assert( C->access == ChamR );
#pragma omp task depend( in:tileA[0] ) depend( out:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_W_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamW );
assert( C->access == ChamRW );
#pragma omp task depend( in:tileA[0] ) depend( out:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_RW_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamRW );
assert( C->access == ChamR );
#pragma omp task depend( in:tileA[0] ) depend( inout:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_RW_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
switch( accessA ) {
case ChamW:
#pragma omp task depend( out: tileA[0] )
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamRW );
assert( C->access == ChamW );
#pragma omp task depend( in:tileA[0] ) depend( inout:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_R_RW_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamR );
assert( B->access == ChamRW );
assert( C->access == ChamRW );
#pragma omp task depend( in:tileA[0] ) depend( inout:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_R_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamR );
assert( C->access == ChamR );
#pragma omp task depend( out:tileA[0] ) depend( in:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_R_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamW );
#pragma omp task depend( out:tileA[0] ) depend( in:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_R_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamR );
assert( C->access == ChamRW );
#pragma omp task depend( out:tileA[0] ) depend( in:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_W_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamW );
assert( C->access == ChamR );
#pragma omp task depend( out:tileA[0] ) depend( out:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_W_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamW );
assert( C->access == ChamR );
#pragma omp task depend( out:tileA[0] ) depend( out:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_W_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamW );
assert( C->access == ChamRW );
#pragma omp task depend( out:tileA[0] ) depend( out:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_RW_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamRW );
assert( C->access == ChamR );
#pragma omp task depend( out:tileA[0] ) depend( inout:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_RW_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamRW );
assert( C->access == ChamW );
#pragma omp task depend( out:tileA[0] ) depend( inout:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_W_RW_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamW );
assert( B->access == ChamRW );
assert( C->access == ChamRW );
#pragma omp task depend( out:tileA[0] ) depend( inout:tileB[0] ) depend( inout:tileC[0] )
{
op_fct( A, uplo, Am, An, tileA, op_args );
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
break;
}
static inline void omp_map_three_RW_R_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
case ChamR:
#pragma omp task depend( in: tileA[0] )
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamR );
assert( C->access == ChamR );
#pragma omp task depend( inout:tileA[0] ) depend( in:tileB[0] ) depend( in:tileC[0] )
{
op_fct( A, uplo, Am, An, tileA, op_args );
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_R_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
break;
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamW );
case ChamRW:
default:
#pragma omp task depend( inout: tileA[0] )
#pragma omp task depend( inout:tileA[0] ) depend( in:tileB[0] ) depend( out:tileC[0] )
{
op_fct( A, uplo, Am, An, tileA, op_args );
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_R_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamR );
assert( C->access == ChamRW );
#pragma omp task depend( inout:tileA[0] ) depend( in:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_W_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
(void)options;
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamW );
assert( C->access == ChamR );
#pragma omp task depend( inout:tileA[0] ) depend( out:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_W_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamW );
assert( C->access == ChamR );
#pragma omp task depend( inout:tileA[0] ) depend( out:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_W_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamW );
assert( C->access == ChamRW );
#pragma omp task depend( inout:tileA[0] ) depend( out:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_RW_R( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamRW );
assert( C->access == ChamR );
#pragma omp task depend( inout:tileA[0] ) depend( inout:tileB[0] ) depend( in:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_RW_W( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamRW );
assert( C->access == ChamW );
#pragma omp task depend( inout:tileA[0] ) depend( inout:tileB[0] ) depend( out:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static inline void omp_map_three_RW_RW_RW( cham_uplo_t uplo, int m, int n, int ndata,
cham_map_data_t *data,
cham_map_operator_t *op_fcts,
void *op_args )
{
cham_map_data_t *A = data;
const CHAM_desc_t *descA = A->desc;
CHAM_tile_t *tileA = descA->get_blktile( descA, m, n );
cham_map_data_t *B = data + 1;
const CHAM_desc_t *descB = B->desc;
CHAM_tile_t *tileB = descB->get_blktile( descB, m, n );
cham_map_data_t *C = data + 2;
const CHAM_desc_t *descC = C->desc;
CHAM_tile_t *tileC = descC->get_blktile( descC, m, n );
assert( ndata == 3 );
assert( A->access == ChamRW );
assert( B->access == ChamRW );
assert( C->access == ChamRW );
#pragma omp task depend( inout:tileA[0] ) depend( inout:tileB[0] ) depend( inout:tileC[0] )
{
op_fcts->cpufunc( op_args, uplo, m, n, 3, descA, tileA, descB, tileB, descC, tileC );
}
}
static omp_map_fct_t omp_map_one[3] = {
omp_map_one_R, omp_map_one_W, omp_map_one_RW
};
static omp_map_fct_t omp_map_two[3][3] = {
{ omp_map_two_R_R, omp_map_two_R_W, omp_map_two_R_RW },
{ omp_map_two_W_R, omp_map_two_W_W, omp_map_two_W_RW },
{ omp_map_two_RW_R, omp_map_two_RW_W, omp_map_two_RW_RW }
};
static omp_map_fct_t omp_map_three[3][3][3] = {
{ { omp_map_three_R_R_R, omp_map_three_R_R_W, omp_map_three_R_R_RW },
{ omp_map_three_R_W_R, omp_map_three_R_W_W, omp_map_three_R_W_RW },
{ omp_map_three_R_RW_R, omp_map_three_R_RW_W, omp_map_three_R_RW_RW } },
{ { omp_map_three_W_R_R, omp_map_three_W_R_W, omp_map_three_W_R_RW },
{ omp_map_three_W_W_R, omp_map_three_W_W_W, omp_map_three_W_W_RW },
{ omp_map_three_W_RW_R, omp_map_three_W_RW_W, omp_map_three_W_RW_RW } },
{ { omp_map_three_RW_R_R, omp_map_three_RW_R_W, omp_map_three_RW_R_RW },
{ omp_map_three_RW_W_R, omp_map_three_RW_W_W, omp_map_three_RW_W_RW },
{ omp_map_three_RW_RW_R, omp_map_three_RW_RW_W, omp_map_three_RW_RW_RW } }
};
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n,
int ndata, cham_map_data_t *data,
cham_map_operator_t *op_fcts, void *op_args )
{
if ( ( ndata < 0 ) || ( ndata > 3 ) ) {
fprintf( stderr, "INSERT_TASK_map() can handle only 1 to 3 parameters\n" );
return;
}
switch( ndata ) {
case 1:
omp_map_one[ data[0].access - 1 ](
uplo, m, n, ndata, data, op_fcts, op_args );
break;
case 2:
omp_map_two[ data[0].access - 1 ][ data[1].access - 1 ](
uplo, m, n, ndata, data, op_fcts, op_args );
break;
case 3:
omp_map_three[ data[0].access - 1 ][ data[1].access - 1 ][ data[2].access - 1 ](
uplo, m, n, ndata, data, op_fcts, op_args );
break;
}
}
......@@ -9,50 +9,120 @@
*
* @brief Chameleon map PaRSEC codelet
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @date 2022-02-22
* @date 2024-03-11
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks.h"
struct parsec_map_args_s {
cham_uplo_t uplo;
int m, n;
cham_map_operator_t *op_fcts;
void *op_args;
const CHAM_desc_t *desc[1];
};
static inline int
CORE_map_one_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
struct parsec_map_args_s *pargs = NULL;
CHAM_tile_t *tileA;
parsec_dtd_unpack_args( this_task, &pargs, &tileA );
pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 1,
pargs->desc[0], tileA );
free( pargs );
}
static inline int
CORE_map_two_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
struct parsec_map_args_s *pargs = NULL;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
parsec_dtd_unpack_args( this_task, &pargs, &tileA, &tileB );
pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 2,
pargs->desc[0], tileA, pargs->desc[1], tileB );
free( pargs );
}
static inline int
CORE_map_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
CORE_map_three_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
const CHAM_desc_t *desc;
cham_uplo_t uplo;
int m;
int n;
void *data;
cham_unary_operator_t op_fct;
void *op_args;
parsec_dtd_unpack_args(
this_task, &desc, &uplo, &m, &n, &data, &op_fct, &op_args );
op_fct( desc, uplo, m, n, data, op_args );
(void)context;
return PARSEC_HOOK_RETURN_DONE;
struct parsec_map_args_s *pargs = NULL;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
CHAM_tile_t *tileC;
parsec_dtd_unpack_args( this_task, &pargs, &tileA, &tileB, &tileC );
pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 3,
pargs->desc[0], tileA, pargs->desc[1], tileB,
pargs->desc[2], tileC );
free( pargs );
}
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
cham_unary_operator_t op_fct, void *op_args )
cham_uplo_t uplo, int m, int n,
int ndata, cham_map_data_t *data,
cham_map_operator_t *op_fcts, void *op_args )
{
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
int parsec_accessA = cham_to_parsec_access( accessA );
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_map_parsec, options->priority, "map",
sizeof(CHAM_desc_t*), &A, VALUE,
sizeof(cham_uplo_t), &uplo, VALUE,
sizeof(int), &Am, VALUE,
sizeof(int), &An, VALUE,
PASSED_BY_REF, RTBLKADDR(A, void, Am, An), chameleon_parsec_get_arena_index( A ) | parsec_accessA,
sizeof(cham_unary_operator_t), &op_fct, VALUE,
sizeof(void*), &op_args, VALUE,
PARSEC_DTD_ARG_END );
parsec_taskpool_t *PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
struct parsec_map_args_s *pargs = NULL;
size_t pargs_size = 0;
int i;
if ( ( ndata < 0 ) || ( ndata > 3 ) ) {
fprintf( stderr, "INSERT_TASK_map() can handle only 1 to 3 parameters\n" );
return;
}
pargs_size = sizeof( struct parsec_map_args_s ) + (ndata - 1) * sizeof( CHAM_desc_t * );
pargs = malloc( pargs_size );
pargs->uplo = uplo;
pargs->m = m;
pargs->n = n;
pargs->op_fcts = op_fcts;
pargs->op_args = op_args;
for( i=0; i<ndata; i++ ) {
pargs->desc[i] = data[i].desc;
}
switch( ndata ) {
case 1:
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_map_one_parsec, options->priority, op_fcts->name,
sizeof(struct parsec_map_args_s*), &pargs, VALUE,
PASSED_BY_REF, RTBLKADDR( data[0].desc, void, m, n ), chameleon_parsec_get_arena_index( data[0].desc ) | cham_to_parsec_access( data[0].access ),
PARSEC_DTD_ARG_END );
break;
case 2:
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_map_two_parsec, options->priority, op_fcts->name,
sizeof(struct parsec_map_args_s*), &pargs, VALUE,
PASSED_BY_REF, RTBLKADDR( data[0].desc, void, m, n ), chameleon_parsec_get_arena_index( data[0].desc ) | cham_to_parsec_access( data[0].access ),
PASSED_BY_REF, RTBLKADDR( data[1].desc, void, m, n ), chameleon_parsec_get_arena_index( data[1].desc ) | cham_to_parsec_access( data[1].access ),
PARSEC_DTD_ARG_END );
break;
case 3:
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_map_three_parsec, options->priority, op_fcts->name,
sizeof(struct parsec_map_args_s*), &pargs, VALUE,
PASSED_BY_REF, RTBLKADDR( data[0].desc, void, m, n ), chameleon_parsec_get_arena_index( data[0].desc ) | cham_to_parsec_access( data[0].access ),
PASSED_BY_REF, RTBLKADDR( data[1].desc, void, m, n ), chameleon_parsec_get_arena_index( data[1].desc ) | cham_to_parsec_access( data[1].access ),
PASSED_BY_REF, RTBLKADDR( data[2].desc, void, m, n ), chameleon_parsec_get_arena_index( data[2].desc ) | cham_to_parsec_access( data[2].access ),
PARSEC_DTD_ARG_END );
break;
}
}
......@@ -9,42 +9,114 @@
*
* @brief Chameleon map Quark codelet
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @date 2022-02-22
* @date 2024-03-11
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks.h"
void CORE_map_quark(Quark *quark)
struct quark_map_args_s {
cham_uplo_t uplo;
int m, n;
cham_map_operator_t *op_fcts;
void *op_args;
const CHAM_desc_t *desc[1];
};
void CORE_map_one_quark(Quark *quark)
{
struct quark_map_args_s *qargs = NULL;
CHAM_tile_t *tileA;
quark_unpack_args_2( quark, qargs, tileA );
qargs->op_fcts->cpufunc( qargs->op_args, qargs->uplo, qargs->m, qargs->n, 1,
qargs->desc[0], tileA );
free( qargs );
}
void CORE_map_two_quark(Quark *quark)
{
struct quark_map_args_s *qargs = NULL;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
quark_unpack_args_3( quark, qargs, tileA, tileB );
qargs->op_fcts->cpufunc( qargs->op_args, qargs->uplo, qargs->m, qargs->n, 2,
qargs->desc[0], tileA, qargs->desc[1], tileB );
free( qargs );
}
void CORE_map_three_quark(Quark *quark)
{
const CHAM_desc_t *desc;
cham_uplo_t uplo;
int m;
int n;
CHAM_tile_t *tile;
cham_unary_operator_t op_fct;
void *op_args;
quark_unpack_args_7( quark, desc, uplo, m, n, tile, op_fct, op_args );
op_fct( desc, uplo, m, n, tile, op_args );
struct quark_map_args_s *qargs = NULL;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
CHAM_tile_t *tileC;
quark_unpack_args_4( quark, qargs, tileA, tileB, tileC );
qargs->op_fcts->cpufunc( qargs->op_args, qargs->uplo, qargs->m, qargs->n, 3,
qargs->desc[0], tileA, qargs->desc[1], tileB,
qargs->desc[2], tileC );
free( qargs );
}
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
cham_unary_operator_t op_fct, void *op_args )
cham_uplo_t uplo, int m, int n,
int ndata, cham_map_data_t *data,
cham_map_operator_t *op_fcts, void *op_args )
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
QUARK_Insert_Task(
opt->quark, CORE_map_quark, (Quark_Task_Flags*)opt,
sizeof(CHAM_desc_t*), &A, VALUE,
sizeof(cham_uplo_t), &uplo, VALUE,
sizeof(int), &Am, VALUE,
sizeof(int), &An, VALUE,
sizeof(void*), RTBLKADDR(A, void, Am, An), cham_to_quark_access( accessA ),
sizeof(cham_unary_operator_t), &op_fct, VALUE,
sizeof(void*), &op_args, VALUE,
0);
struct quark_map_args_s *qargs = NULL;
quark_option_t *opt = (quark_option_t*)(options->schedopt);
size_t qargs_size = 0;
int i;
if ( ( ndata < 0 ) || ( ndata > 3 ) ) {
fprintf( stderr, "INSERT_TASK_map() can handle only 1 to 3 parameters\n" );
return;
}
qargs_size = sizeof( struct quark_map_args_s ) + (ndata - 1) * sizeof( CHAM_desc_t * );
qargs = malloc( qargs_size );
qargs->uplo = uplo;
qargs->m = m;
qargs->n = n;
qargs->op_fcts = op_fcts;
qargs->op_args = op_args;
for( i=0; i<ndata; i++ ) {
qargs->desc[i] = data[i].desc;
}
switch( ndata ) {
case 1:
QUARK_Insert_Task(
opt->quark, CORE_map_one_quark, (Quark_Task_Flags*)opt,
sizeof(void*), &qargs, VALUE,
sizeof(void*), RTBLKADDR( data[0].desc, void, m, n), cham_to_quark_access( data[0].access ),
0 );
break;
case 2:
QUARK_Insert_Task(
opt->quark, CORE_map_two_quark, (Quark_Task_Flags*)opt,
sizeof(void*), &qargs, VALUE,
sizeof(void*), RTBLKADDR( data[0].desc, void, m, n), cham_to_quark_access( data[0].access ),
sizeof(void*), RTBLKADDR( data[1].desc, void, m, n), cham_to_quark_access( data[1].access ),
0 );
break;
case 3:
QUARK_Insert_Task(
opt->quark, CORE_map_three_quark, (Quark_Task_Flags*)opt,
sizeof(void*), &qargs, VALUE,
sizeof(void*), RTBLKADDR( data[0].desc, void, m, n), cham_to_quark_access( data[0].access ),
sizeof(void*), RTBLKADDR( data[1].desc, void, m, n), cham_to_quark_access( data[1].access ),
sizeof(void*), RTBLKADDR( data[2].desc, void, m, n), cham_to_quark_access( data[2].access ),
0 );
break;
}
}
......@@ -12,66 +12,332 @@
* @version 1.3.0
* @author Mathieu Faverge
* @author Florent Pruvost
* @date 2023-07-06
* @date 2024-03-11
*
*/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
CHAMELEON_CL_CB(map, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
struct cl_map_args_s {
cham_uplo_t uplo;
int m, n;
cham_map_operator_t *op_fcts;
void *op_args;
const CHAM_desc_t *desc[1];
};
/*
* Map with a single tile as parameter
*/
#if !defined(CHAMELEON_SIMULATION)
static void cl_map_one_cpu_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
CHAM_tile_t *tileA;
tileA = cti_interface_get( descr[0] );
clargs->op_fcts->cpufunc( clargs->op_args, clargs->uplo, clargs->m, clargs->n, 1,
clargs->desc[0], tileA );
}
#if defined(CHAMELEON_USE_CUDA)
static void
cl_map_one_cuda_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
cublasHandle_t handle = starpu_cublas_get_local_handle();
CHAM_tile_t *tileA;
tileA = cti_interface_get( descr[0] );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
clargs->op_fcts->cudafunc( handle, clargs->op_args, clargs->uplo, clargs->m, clargs->n, 1,
clargs->desc[0], tileA );
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#if defined(CHAMELEON_USE_HIP)
static void
cl_map_one_hip_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
hipblasHandle_t handle = starpu_hipblas_get_local_handle();
CHAM_tile_t *tileA;
tileA = cti_interface_get( descr[0] );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
clargs->op_fcts->hipfunc( handle, clargs->op_args, clargs->uplo, clargs->m, clargs->n, 1,
clargs->desc[0], tileA );
}
#endif /* defined(CHAMELEON_USE_HIP) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CHAMELEON_CL_CB( map_one, cti_handle_get_m( task->handles[0] ), cti_handle_get_n( task->handles[0] ), 0, M * N )
#if defined(CHAMELEON_USE_HIP)
CODELETS_GPU( map_one, cl_map_one_cpu_func, cl_map_one_hip_func, STARPU_HIP_ASYNC )
#else
CODELETS( map_one, cl_map_one_cpu_func, cl_map_one_cuda_func, STARPU_CUDA_ASYNC )
#endif
/*
* Map with two tiles as parameter
*/
#if !defined(CHAMELEON_SIMULATION)
static void cl_map_two_cpu_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get( descr[0] );
tileB = cti_interface_get( descr[1] );
clargs->op_fcts->cpufunc( clargs->op_args, clargs->uplo, clargs->m, clargs->n, 2,
clargs->desc[0], tileA, clargs->desc[1], tileB );
}
#if defined(CHAMELEON_USE_CUDA)
static void
cl_map_two_cuda_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
cublasHandle_t handle = starpu_cublas_get_local_handle();
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get( descr[0] );
tileB = cti_interface_get( descr[1] );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
clargs->op_fcts->cudafunc( handle, clargs->op_args, clargs->uplo, clargs->m, clargs->n, 2,
clargs->desc[0], tileA, clargs->desc[1], tileB );
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#if defined(CHAMELEON_USE_HIP)
static void
cl_map_two_hip_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
hipblasHandle_t handle = starpu_hipblas_get_local_handle();
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get( descr[0] );
tileB = cti_interface_get( descr[1] );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
clargs->op_fcts->hipfunc( handle, clargs->op_args, clargs->uplo, clargs->m, clargs->n, 2,
clargs->desc[0], tileA, clargs->desc[1], tileB );
}
#endif /* defined(CHAMELEON_USE_HIP) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CHAMELEON_CL_CB( map_two, cti_handle_get_m( task->handles[0] ), cti_handle_get_n( task->handles[0] ), 0, M * N )
#if defined(CHAMELEON_USE_HIP)
CODELETS_GPU( map_two, cl_map_two_cpu_func, cl_map_two_hip_func, STARPU_HIP_ASYNC )
#else
CODELETS( map_two, cl_map_two_cpu_func, cl_map_two_cuda_func, STARPU_CUDA_ASYNC )
#endif
/*
* Map with three tiles as parameter
*/
#if !defined(CHAMELEON_SIMULATION)
static void cl_map_cpu_func(void *descr[], void *cl_arg)
static void cl_map_three_cpu_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
CHAM_tile_t *tileC;
tileA = cti_interface_get( descr[0] );
tileB = cti_interface_get( descr[1] );
tileC = cti_interface_get( descr[2] );
clargs->op_fcts->cpufunc( clargs->op_args, clargs->uplo, clargs->m, clargs->n, 3,
clargs->desc[0], tileA, clargs->desc[1], tileB,
clargs->desc[2], tileC );
}
#if defined(CHAMELEON_USE_CUDA)
static void
cl_map_three_cuda_func( void *descr[], void *cl_arg )
{
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
cublasHandle_t handle = starpu_cublas_get_local_handle();
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
CHAM_tile_t *tileC;
tileA = cti_interface_get( descr[0] );
tileB = cti_interface_get( descr[1] );
tileC = cti_interface_get( descr[2] );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
assert( tileC->format & CHAMELEON_TILE_FULLRANK );
clargs->op_fcts->cudafunc( handle, clargs->op_args, clargs->uplo, clargs->m, clargs->n, 3,
clargs->desc[0], tileA, clargs->desc[1], tileB,
clargs->desc[2], tileC );
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#if defined(CHAMELEON_USE_HIP)
static void
cl_map_three_hip_func( void *descr[], void *cl_arg )
{
const CHAM_desc_t *desc;
cham_uplo_t uplo;
int m;
int n;
CHAM_tile_t *tile;
cham_unary_operator_t op_fct;
void *op_args;
tile = cti_interface_get(descr[0]);
starpu_codelet_unpack_args(cl_arg, &desc, &uplo, &m, &n, &op_fct, &op_args );
op_fct( desc, uplo, m, n, tile, op_args );
struct cl_map_args_s *clargs = (struct cl_map_args_s*)cl_arg;
hipblasHandle_t handle = starpu_hipblas_get_local_handle();
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
CHAM_tile_t *tileC;
tileA = cti_interface_get( descr[0] );
tileB = cti_interface_get( descr[1] );
tileC = cti_interface_get( descr[2] );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
assert( tileC->format & CHAMELEON_TILE_FULLRANK );
clargs->op_fcts->hipfunc( handle, clargs->op_args, clargs->uplo, clargs->m, clargs->n, 3,
clargs->desc[0], tileA, clargs->desc[1], tileB,
clargs->desc[2], tileC );
}
#endif /* defined(CHAMELEON_USE_HIP) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(map, cl_map_cpu_func)
CHAMELEON_CL_CB( map_three, cti_handle_get_m( task->handles[0] ), cti_handle_get_n( task->handles[0] ), 0, M * N )
#if defined(CHAMELEON_USE_HIP)
CODELETS_GPU( map_three, cl_map_three_cpu_func, cl_map_three_hip_func, STARPU_HIP_ASYNC )
#else
CODELETS( map_three, cl_map_three_cpu_func, cl_map_three_cuda_func, STARPU_CUDA_ASYNC )
#endif
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
cham_unary_operator_t op_fct, void *op_args )
cham_uplo_t uplo, int m, int n,
int ndata, cham_map_data_t *data,
cham_map_operator_t *op_fcts, void *op_args )
{
struct cl_map_args_s *clargs = NULL;
const char *cl_name = (op_fcts->name == NULL) ? "map" : op_fcts->name;
int exec = 0;
int i, readonly = 1;
size_t clargs_size = 0;
void (*callback)(void*);
struct starpu_codelet *codelet = &cl_map;
void (*callback)(void*) = options->profiling ? cl_map_callback : NULL;
char *cl_name = "map";
if ( ( ndata < 0 ) || ( ndata > 3 ) ) {
fprintf( stderr, "INSERT_TASK_map() can handle only 1 to 3 parameters\n" );
return;
}
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An);
for( i=0; i<ndata; i++ ) {
if ( data[i].access == ChamRW ) {
CHAMELEON_ACCESS_RW( data[i].desc, m, n );
readonly = 0;
}
else if ( data[i].access == ChamW ) {
CHAMELEON_ACCESS_W( data[i].desc, m, n );
readonly = 0;
}
else {
CHAMELEON_ACCESS_R( data[i].desc, m, n );
}
}
exec = __chameleon_need_exec;
/* Force execution for read-only functions */
if ( readonly && __chameleon_need_submit ) {
exec = 1;
}
CHAMELEON_END_ACCESS_DECLARATION;
cl_name = chameleon_codelet_name( cl_name, 1,
A->get_blktile( A, Am, An ) );
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &A, sizeof(CHAM_desc_t*),
STARPU_VALUE, &uplo, sizeof(cham_uplo_t),
STARPU_VALUE, &Am, sizeof(int),
STARPU_VALUE, &An, sizeof(int),
cham_to_starpu_access(accessA), RTBLKADDR(A, ChamByte, Am, An),
STARPU_VALUE, &op_fct, sizeof(cham_unary_operator_t),
STARPU_VALUE, &op_args, sizeof(void*),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
if ( exec ) {
clargs_size = sizeof( struct cl_map_args_s ) + (ndata - 1) * sizeof( CHAM_desc_t * );
clargs = malloc( clargs_size );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->op_fcts = op_fcts;
clargs->op_args = op_args;
for( i=0; i<ndata; i++ ) {
clargs->desc[i] = data[i].desc;
}
}
/* Refine name */
for( i=0; i<ndata; i++ ) {
cl_name = chameleon_codelet_name( cl_name, 1,
(data[i].desc)->get_blktile( data[i].desc, m, n ) );
}
/* Insert the task */
switch( ndata ) {
case 1:
callback = options->profiling ? cl_map_one_callback : NULL;
rt_starpu_insert_task(
&cl_map_one,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, clargs_size,
/* Task handles */
cham_to_starpu_access( data[0].access ), RTBLKADDR( data[0].desc, ChamByte, m, n ),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, cl_name,
#endif
0 );
break;
case 2:
callback = options->profiling ? cl_map_two_callback : NULL;
rt_starpu_insert_task(
&cl_map_two,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, clargs_size,
/* Task handles */
cham_to_starpu_access( data[0].access ), RTBLKADDR( data[0].desc, ChamByte, m, n ),
cham_to_starpu_access( data[1].access ), RTBLKADDR( data[1].desc, ChamByte, m, n ),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, cl_name,
#endif
0 );
break;
case 3:
callback = options->profiling ? cl_map_three_callback : NULL;
rt_starpu_insert_task(
&cl_map_three,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, clargs_size,
/* Task handles */
cham_to_starpu_access( data[0].access ), RTBLKADDR( data[0].desc, ChamByte, m, n ),
cham_to_starpu_access( data[1].access ), RTBLKADDR( data[1].desc, ChamByte, m, n ),
cham_to_starpu_access( data[2].access ), RTBLKADDR( data[2].desc, ChamByte, m, n ),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, cl_name,
STARPU_NAME, cl_name,
#endif
0);
0 );
break;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment