Commit 898ea742 authored by Mathieu Faverge's avatar Mathieu Faverge

Fix placement of QR kernels

parent b8df9779
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
* @copyright 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @file codelet_ztpmqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
......@@ -25,54 +21,6 @@
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
struct starpu_codelet *codelet = &cl_ztpmqrt;
void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
MORSE_ACCESS_RW(A, Am, An);
MORSE_ACCESS_RW(B, Bm, Bn);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztpmqrt",
#endif
0);
(void)ib; (void)nb;
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
{
......@@ -150,8 +98,58 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
void
MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
struct starpu_codelet *codelet = &cl_ztpmqrt;
void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
MORSE_ACCESS_RW(A, Am, An);
MORSE_ACCESS_RW(B, Bm, Bn);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, (( L == 0 ) ? "ztsmqr" : "ztpmqrt"),
#endif
0);
(void)ib; (void)nb;
}
......@@ -25,11 +25,45 @@
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
{
int M;
int N;
int L;
int ib;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
&lda, &ldb, &ldt );
CORE_ztpqrt( M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
void
MORSE_TASK_ztpqrt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
{
struct starpu_codelet *codelet = &cl_ztpqrt;
void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL;
......@@ -56,6 +90,9 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztpqrt",
#endif
......@@ -63,38 +100,3 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
(void)ib; (void)nb;
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
{
int M;
int N;
int L;
int ib;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
&lda, &ldb, &ldt );
CORE_ztpqrt( M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
......@@ -145,39 +145,12 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options,
struct starpu_codelet *codelet = &cl_ztsmqr;
void (*callback)(void*) = options->profiling ? cl_ztsmqr_callback : NULL;
int ldwork = side == MorseLeft ? ib : nb;
int sizeA1 = lda1*n1;
int sizeA2 = lda2*n2;
int sizeV = ldv*k;
int sizeT = ldt*n1;
int execution_rank = A2->get_rankof( A2, A2m, A2n );
int rank_changed=0;
(void)execution_rank;
/* force execution on the rank owning the largest data (tile) */
int threshold;
char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD");
if (env != NULL)
threshold = (unsigned)atoi(env);
else
threshold = 10;
if ( sizeA1 > threshold*sizeA2 ){
execution_rank = A1->get_rankof( A1, A1m, A1n );
rank_changed = 1;
}else if( sizeV > threshold*sizeA2 ){
execution_rank = V->get_rankof( V, Vm, Vn );
rank_changed = 1;
}else if( sizeT > threshold*sizeA2 ){
execution_rank = T->get_rankof( T, Tm, Tn );
rank_changed = 1;
}
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_RW(A1, A1m, A1n);
MORSE_ACCESS_RW(A2, A2m, A2n);
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
if (rank_changed)
MORSE_RANK_CHANGED(execution_rank);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -204,7 +177,7 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, execution_rank,
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztsmqr",
......
......@@ -111,7 +111,6 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options,
MORSE_ACCESS_RW(A1, A1m, A1n);
MORSE_ACCESS_RW(A2, A2m, A2n);
MORSE_ACCESS_W(T, Tm, Tn);
MORSE_RANK_CHANGED(A2->get_rankof(A2, A2m, A2n));
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -131,10 +130,12 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options,
STARPU_VALUE, &h_work, sizeof(MORSE_starpu_ws_t *),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztsqrt",
#endif
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
0);
}
......
......@@ -143,39 +143,12 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options,
struct starpu_codelet *codelet = &cl_zttmqr;
void (*callback)(void*) = options->profiling ? cl_zttmqr_callback : NULL;
int ldwork = side == MorseLeft ? ib : nb;
int sizeA1 = lda1*n1;
int sizeA2 = lda2*n2;
int sizeV = ldv*k;
int sizeT = ldt*n1;
int execution_rank = A2->get_rankof( A2, A2m, A2n );
int rank_changed=0;
(void)execution_rank;
/* force execution on the rank owning the largest data (tile) */
int threshold;
char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD");
if (env != NULL)
threshold = (unsigned)atoi(env);
else
threshold = 10;
if ( sizeA1 > threshold*sizeA2 ){
execution_rank = A1->get_rankof( A1, A1m, A1n );
rank_changed = 1;
}else if( sizeV > threshold*sizeA2 ){
execution_rank = V->get_rankof( V, Vm, Vn );
rank_changed = 1;
}else if( sizeT > threshold*sizeA2 ){
execution_rank = T->get_rankof( T, Tm, Tn );
rank_changed = 1;
}
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_RW(A1, A1m, A1n);
MORSE_ACCESS_RW(A2, A2m, A2n);
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
if (rank_changed)
MORSE_RANK_CHANGED(execution_rank);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -202,7 +175,7 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, execution_rank,
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zttmqr",
......
......@@ -137,6 +137,9 @@ void MORSE_TASK_zttqrt(const MORSE_option_t *options,
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zttqrt",
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment