Mentions légales du service

Skip to content
Snippets Groups Projects
Commit de10a29d authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

codelets: Add zlaswp_{get,set} codelets

parent 1a37c04c
No related branches found
No related tags found
1 merge request!404GETRF: add panel permutation to get a full partial pivoting available
......@@ -24,7 +24,7 @@
* @author Alycia Lisito
* @author Romain Peressoni
* @author Matthieu Kuhn
* @date 2023-08-22
* @date 2023-08-31
* @precisions normal z -> c d s
*
*/
......@@ -186,6 +186,16 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
const CHAM_desc_t *tileA, int tileAm, int tileAn );
void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *tIPIV, int tIPIVk,
const CHAM_desc_t *tileA, int tileAm, int tileAn,
const CHAM_desc_t *tileB, int tileBm, int tileBn );
void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *tIPIV, int tIPIVk,
const CHAM_desc_t *tileA, int tileAm, int tileAn,
const CHAM_desc_t *tileB, int tileBm, int tileBn );
void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An,
......
......@@ -76,8 +76,9 @@ set(CODELETS_ZSRC
codelets/codelet_zlanhe.c
codelets/codelet_zlansy.c
codelets/codelet_zlantr.c
codelets/codelet_zlaset2.c
codelets/codelet_zlaset.c
codelets/codelet_zlaset2.c
codelets/codelet_zlaswp.c
codelets/codelet_zlatro.c
codelets/codelet_zlauum.c
codelets/codelet_zplghe.c
......
/**
*
* @file openmp/codelet_zlaswp.c
*
* @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon OpenMP codelets to apply zlaswp on a panel
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-08-31
* @precisions normal z -> c d s
*
*/
#include "chameleon_openmp.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_ztile.h"
void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *U, int Um, int Un )
{
CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
CHAM_tile_t *tileU = U->get_blktile( U, Um, Un );
int *perm = NULL; // get perm from ipiv
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileU->format & CHAMELEON_TILE_FULLRANK );
#pragma omp task firstprivate( m0, k, ipiv, tileA, tileU ) depend( in:perm ) depend( in:tileA[0] ) depend( inout:tileU[0] )
{
TCORE_zlaswp_get( m0, A->m, A->n, k, tileA, tileU, perm );
}
(void)options;
}
void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
int *invp = NULL; // get invp from ipiv
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
#pragma omp task firstprivate( m0, k, ipiv, tileA, tileB ) depend( in:invp ) depend( in:tileA[0] ) depend( inout:tileB[0] )
{
TCORE_zlaswp_set( m0, A->m, A->n, k, tileA, tileB, invp );
}
(void)options;
}
/**
*
* @file parsec/codelet_zlaswp.c
*
* @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon PaRSEC codelets to apply zlaswp on a panel
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-08-31
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_zlaswp_get_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
int m0, m, n, k, lda, ldb, *perm;
CHAMELEON_Complex64_t *A, *B;
parsec_dtd_unpack_args( this_task, &m0, &m, &n, &k, &A, lda, &B, ldb, &perm );
CORE_zlaswp_get( m0, m, n, k, A, lda, B, ldb, perm );
}
void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *U, int Um, int Un )
{
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
CHAM_tile_t *tileU = U->get_blktile( U, Um, Un );
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_zlaswp_get_parsec, options->priority, "laswp_get",
sizeof(int), &m0, VALUE,
sizeof(int), &(tileA->m), VALUE,
sizeof(int), &(tileA->n), VALUE,
sizeof(int), &k, VALUE,
PASSED_BY_REF, RTBLKADDR(A, ChamComplexDouble, Am, An), chameleon_parsec_get_arena_index( A ) | INPUT,
sizeof(int), &(tileA->ld), VALUE,
PASSED_BY_REF, RTBLKADDR(U, ChamComplexDouble, Um, Un), chameleon_parsec_get_arena_index( U ) | INOUT,
sizeof(int), &(tileU->ld), VALUE,
PASSED_BY_REF, RUNTIME_perm_getaddr( ipiv, ipivk ), chameleon_parsec_get_arena_index_perm( ipiv ) | INPUT,
PARSEC_DTD_ARG_END );
}
static inline int
CORE_zlaswp_set_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
int m0, m, n, k, lda, ldb, *invp;
CHAMELEON_Complex64_t *A, *B;
parsec_dtd_unpack_args( this_task, &m0, &m, &n, &k, &A, lda, &B, ldb, &invp );
CORE_zlaswp_set( m0, m, n, k, A, lda, B, ldb, invp );
}
void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_zlaswp_set_parsec, options->priority, "laswp_set",
sizeof(int), &m0, VALUE,
sizeof(int), &(tileB->m), VALUE,
sizeof(int), &(tileB->n), VALUE,
sizeof(int), &k, VALUE,
PASSED_BY_REF, RTBLKADDR(A, ChamComplexDouble, Am, An), chameleon_parsec_get_arena_index( A ) | INPUT,
sizeof(int), &(tileA->ld), VALUE,
PASSED_BY_REF, RTBLKADDR(B, ChamComplexDouble, Bm, Bn), chameleon_parsec_get_arena_index( B ) | INOUT,
sizeof(int), &(tileB->ld), VALUE,
PASSED_BY_REF, RUNTIME_invp_getaddr( ipiv, ipivk ), chameleon_parsec_get_arena_index_invp( ipiv ) | INPUT,
PARSEC_DTD_ARG_END );
}
......@@ -11,12 +11,12 @@
*
* @brief Chameleon PaRSEC runtime header
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Reazul Hoque
* @author Florent Pruvost
* @author Samuel Thibault
* @date 2022-02-22
* @date 2023-08-31
*
*/
#ifndef _chameleon_parsec_h_
......@@ -38,10 +38,28 @@ struct chameleon_parsec_desc_s {
typedef struct chameleon_parsec_desc_s chameleon_parsec_desc_t;
static inline int
chameleon_parsec_get_arena_index(const CHAM_desc_t *desc) {
chameleon_parsec_get_arena_index( const CHAM_desc_t *desc ) {
return ((chameleon_parsec_desc_t *)desc->schedopt)->arena_index;
}
static inline int
chameleon_parsec_get_arena_index_ipiv( const CHAM_ipiv_t *ipiv ) {
assert(0);
return -1;
}
static inline int
chameleon_parsec_get_arena_index_perm( const CHAM_ipiv_t *ipiv ) {
assert(0);
return -1;
}
static inline int
chameleon_parsec_get_arena_index_invp( const CHAM_ipiv_t *ipiv ) {
assert(0);
return -1;
}
static inline int cham_to_parsec_access( cham_access_t accessA ) {
if ( accessA == ChamR ) {
return INPUT;
......
/**
*
* @file quark/codelet_zlaswp.c
*
* @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon Quark codelets to apply zlaswp on a panel
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-08-31
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_ztile.h"
static void CORE_zlaswp_get_quark( Quark *quark )
{
int m0, k, *perm;
CHAM_tile_t *A, *B;
quark_unpack_args_5( quark, m0, k, perm, A, B );
TCORE_zlaswp_get( m0, A->m, A->n, k, A, B, perm );
}
void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *U, int Um, int Un )
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_LASWP;
QUARK_Insert_Task(
opt->quark, CORE_zlaswp_get_quark, (Quark_Task_Flags*)opt,
sizeof(int), &m0, VALUE,
sizeof(int), &k, VALUE,
sizeof(int*), RUNTIME_perm_getaddr( ipiv, ipivk ), INPUT,
sizeof(CHAM_tile_t*), RTBLKADDR(A, ChamComplexDouble, Am, An), INPUT,
sizeof(CHAM_tile_t*), RTBLKADDR(U, ChamComplexDouble, Um, Un), INOUT,
0 );
}
static void CORE_zlaswp_set_quark( Quark *quark )
{
int m0, k, *invp;
CHAM_tile_t *A, *B;
quark_unpack_args_5( quark, m0, k, invp, A, B );
TCORE_zlaswp_set( m0, A->m, A->n, k, A, B, invp );
}
void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_LASWP;
QUARK_Insert_Task(
opt->quark, CORE_zlaswp_set_quark, (Quark_Task_Flags*)opt,
sizeof(int), &m0, VALUE,
sizeof(int), &k, VALUE,
sizeof(int*), RUNTIME_invp_getaddr( ipiv, ipivk ), INPUT,
sizeof(CHAM_tile_t*), RTBLKADDR(A, ChamComplexDouble, Am, An), INPUT,
sizeof(CHAM_tile_t*), RTBLKADDR(B, ChamComplexDouble, Bm, Bn), INOUT,
0 );
}
/**
*
* @file starpu/codelet_zlaswp.c
*
* @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon StarPU codelets to apply zlaswp on a panel
*
* @version 1.3.0
* @author Mathieu Faverge
* @author Matthieu Kuhn
* @date 2023-08-31
* @precisions normal z -> c d s
*
*/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION)
static void cl_zlaswp_get_cpu_func( void *descr[], void *cl_arg )
{
int m0, k, *perm;
CHAM_tile_t *A, *B;
starpu_codelet_unpack_args( cl_arg, &m0, &k );
perm = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
A = (CHAM_tile_t *) cti_interface_get( descr[1] );
B = (CHAM_tile_t *) cti_interface_get( descr[2] );
TCORE_zlaswp_get( m0, A->m, A->n, k, A, B, perm );
}
#endif
/*
* Codelet definition
*/
CODELETS_CPU( zlaswp_get, cl_zlaswp_get_cpu_func )
void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *U, int Um, int Un )
{
struct starpu_codelet *codelet = &cl_zlaswp_get;
//void (*callback)(void*) = options->profiling ? cl_zlaswp_get_callback : NULL;
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &k, sizeof(int),
STARPU_R, RUNTIME_perm_getaddr( ipiv, ipivk ),
STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
STARPU_RW | STARPU_COMMUTE, RTBLKADDR(U, ChamComplexDouble, Um, Un),
STARPU_PRIORITY, options->priority,
//STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
0 );
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_zlaswp_set_cpu_func( void *descr[], void *cl_arg )
{
int m0, k, *invp;
CHAM_tile_t *A, *B;
starpu_codelet_unpack_args( cl_arg, &m0, &k );
invp = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
A = (CHAM_tile_t *) cti_interface_get( descr[1] );
B = (CHAM_tile_t *) cti_interface_get( descr[2] );
TCORE_zlaswp_set( m0, B->m, B->n, k, A, B, invp );
}
#endif
/*
* Codelet definition
*/
CODELETS_CPU( zlaswp_set, cl_zlaswp_set_cpu_func )
void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
int m0, int k,
const CHAM_ipiv_t *ipiv, int ipivk,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
struct starpu_codelet *codelet = &cl_zlaswp_set;
//void (*callback)(void*) = options->profiling ? cl_zlaswp_set_callback : NULL;
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &k, sizeof(int),
STARPU_R, RUNTIME_invp_getaddr( ipiv, ipivk ),
STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
STARPU_RW, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
STARPU_PRIORITY, options->priority,
//STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
0 );
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment