Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 898ea742 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Fix placement of QR kernels

parent b8df9779
No related branches found
No related tags found
1 merge request!81Migration QR/LQ
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
* @copyright 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @file codelet_ztpmqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
......@@ -25,54 +21,6 @@
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
struct starpu_codelet *codelet = &cl_ztpmqrt;
void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
MORSE_ACCESS_RW(A, Am, An);
MORSE_ACCESS_RW(B, Bm, Bn);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztpmqrt",
#endif
0);
(void)ib; (void)nb;
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
{
......@@ -150,8 +98,58 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
void
MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
struct starpu_codelet *codelet = &cl_ztpmqrt;
void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
MORSE_ACCESS_RW(A, Am, An);
MORSE_ACCESS_RW(B, Bm, Bn);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, (( L == 0 ) ? "ztsmqr" : "ztpmqrt"),
#endif
0);
(void)ib; (void)nb;
}
......@@ -25,11 +25,45 @@
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
{
int M;
int N;
int L;
int ib;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
&lda, &ldb, &ldt );
CORE_ztpqrt( M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
void
MORSE_TASK_ztpqrt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
{
struct starpu_codelet *codelet = &cl_ztpqrt;
void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL;
......@@ -56,6 +90,9 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztpqrt",
#endif
......@@ -63,38 +100,3 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
(void)ib; (void)nb;
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
{
int M;
int N;
int L;
int ib;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
&lda, &ldb, &ldt );
CORE_ztpqrt( M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
......@@ -145,39 +145,12 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options,
struct starpu_codelet *codelet = &cl_ztsmqr;
void (*callback)(void*) = options->profiling ? cl_ztsmqr_callback : NULL;
int ldwork = side == MorseLeft ? ib : nb;
int sizeA1 = lda1*n1;
int sizeA2 = lda2*n2;
int sizeV = ldv*k;
int sizeT = ldt*n1;
int execution_rank = A2->get_rankof( A2, A2m, A2n );
int rank_changed=0;
(void)execution_rank;
/* force execution on the rank owning the largest data (tile) */
int threshold;
char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD");
if (env != NULL)
threshold = (unsigned)atoi(env);
else
threshold = 10;
if ( sizeA1 > threshold*sizeA2 ){
execution_rank = A1->get_rankof( A1, A1m, A1n );
rank_changed = 1;
}else if( sizeV > threshold*sizeA2 ){
execution_rank = V->get_rankof( V, Vm, Vn );
rank_changed = 1;
}else if( sizeT > threshold*sizeA2 ){
execution_rank = T->get_rankof( T, Tm, Tn );
rank_changed = 1;
}
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_RW(A1, A1m, A1n);
MORSE_ACCESS_RW(A2, A2m, A2n);
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
if (rank_changed)
MORSE_RANK_CHANGED(execution_rank);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -204,7 +177,7 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, execution_rank,
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztsmqr",
......
......@@ -111,7 +111,6 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options,
MORSE_ACCESS_RW(A1, A1m, A1n);
MORSE_ACCESS_RW(A2, A2m, A2n);
MORSE_ACCESS_W(T, Tm, Tn);
MORSE_RANK_CHANGED(A2->get_rankof(A2, A2m, A2n));
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -131,10 +130,12 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options,
STARPU_VALUE, &h_work, sizeof(MORSE_starpu_ws_t *),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztsqrt",
#endif
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
0);
}
......
......@@ -143,39 +143,12 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options,
struct starpu_codelet *codelet = &cl_zttmqr;
void (*callback)(void*) = options->profiling ? cl_zttmqr_callback : NULL;
int ldwork = side == MorseLeft ? ib : nb;
int sizeA1 = lda1*n1;
int sizeA2 = lda2*n2;
int sizeV = ldv*k;
int sizeT = ldt*n1;
int execution_rank = A2->get_rankof( A2, A2m, A2n );
int rank_changed=0;
(void)execution_rank;
/* force execution on the rank owning the largest data (tile) */
int threshold;
char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD");
if (env != NULL)
threshold = (unsigned)atoi(env);
else
threshold = 10;
if ( sizeA1 > threshold*sizeA2 ){
execution_rank = A1->get_rankof( A1, A1m, A1n );
rank_changed = 1;
}else if( sizeV > threshold*sizeA2 ){
execution_rank = V->get_rankof( V, Vm, Vn );
rank_changed = 1;
}else if( sizeT > threshold*sizeA2 ){
execution_rank = T->get_rankof( T, Tm, Tn );
rank_changed = 1;
}
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_RW(A1, A1m, A1n);
MORSE_ACCESS_RW(A2, A2m, A2n);
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
if (rank_changed)
MORSE_RANK_CHANGED(execution_rank);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -202,7 +175,7 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, execution_rank,
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zttmqr",
......
......@@ -137,6 +137,9 @@ void MORSE_TASK_zttqrt(const MORSE_option_t *options,
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zttqrt",
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment