From 2740ea28f12be3a9717f75a7a29da6e5564ca302 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 31 Jan 2018 18:51:37 +0100 Subject: [PATCH] Add new tpmlqt and tplqt tasks to starpu --- runtime/starpu/codelets/codelet_zcallback.c | 2 + runtime/starpu/codelets/codelet_ztplqt.c | 102 +++++++++++++ runtime/starpu/codelets/codelet_ztpmlqt.c | 154 ++++++++++++++++++++ runtime/starpu/include/runtime_codelet_z.h | 2 + 4 files changed, 260 insertions(+) create mode 100644 runtime/starpu/codelets/codelet_ztplqt.c create mode 100644 runtime/starpu/codelets/codelet_ztpmlqt.c diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index 905bc8a9a..b32105832 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -67,7 +67,9 @@ CHAMELEON_CL_CB(zssssm, starpu_matrix_get_nx(task->handles[0]), starpu_ma CHAMELEON_CL_CB(zsymm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N) CHAMELEON_CL_CB(zsyr2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M) CHAMELEON_CL_CB(zsyrk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N) +CHAMELEON_CL_CB(ztplqt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K) CHAMELEON_CL_CB(ztpqrt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K) +CHAMELEON_CL_CB(ztpmlqt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K) CHAMELEON_CL_CB(ztpmqrt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K) CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1)) CHAMELEON_CL_CB(ztrmm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N) diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c new file mode 100644 index 000000000..17132351f --- /dev/null +++ b/runtime/starpu/codelets/codelet_ztplqt.c @@ -0,0 +1,102 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztplqt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "chameleon_starpu.h" +#include "runtime_codelet_z.h" + +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg) +{ + int M; + int N; + int L; + int ib; + MORSE_Complex64_t *A; + int lda; + MORSE_Complex64_t *B; + int ldb; + MORSE_Complex64_t *T; + int ldt; + MORSE_Complex64_t *WORK; + + A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, + &lda, &ldb, &ldt ); + + CORE_ztplqt( M, N, L, ib, + A, lda, B, ldb, T, ldt, WORK ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func) + +void +MORSE_TASK_ztplqt( const MORSE_option_t *options, + int M, int N, int L, int ib, int nb, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb, + const MORSE_desc_t *T, int Tm, int Tn, int ldt ) +{ + struct starpu_codelet *codelet = &cl_ztplqt; + void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL; + + MORSE_BEGIN_ACCESS_DECLARATION; + MORSE_ACCESS_RW(A, Am, An); + MORSE_ACCESS_RW(B, Bm, Bn); + MORSE_ACCESS_W(T, Tm, Tn); + MORSE_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, &N, sizeof(int), + STARPU_VALUE, &L, sizeof(int), + STARPU_VALUE, &ib, sizeof(int), + STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_W, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), + STARPU_VALUE, &ldt, sizeof(int), + /* Other options */ + STARPU_SCRATCH, options->ws_worker, + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn), +#endif +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, (L == 0) ? "ztplqs" : "ztplqt", +#endif + 0); + + (void)ib; (void)nb; +} diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c new file mode 100644 index 000000000..2363d67c0 --- /dev/null +++ b/runtime/starpu/codelets/codelet_ztpmlqt.c @@ -0,0 +1,154 @@ +/** + * + * @copyright 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @file codelet_ztpmlqt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "chameleon_starpu.h" +#include "runtime_codelet_z.h" + +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg) +{ + MORSE_enum side; + MORSE_enum trans; + int M; + int N; + int K; + int L; + int ib; + const MORSE_Complex64_t *V; + int ldv; + const MORSE_Complex64_t *T; + int ldt; + MORSE_Complex64_t *A; + int lda; + MORSE_Complex64_t *B; + int ldb; + MORSE_Complex64_t *WORK; + + V = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ + + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, + &ldv, &ldt, &lda, &ldb ); + + CORE_ztpmlqt( side, trans, M, N, K, L, ib, + V, ldv, T, ldt, A, lda, B, ldb, WORK ); +} + +#if defined(CHAMELEON_USE_CUDA) +static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) +{ + MORSE_enum side; + MORSE_enum trans; + int M; + int N; + int K; + int L; + int ib; + const cuDoubleComplex *V; + int ldv; + const cuDoubleComplex *T; + int ldt; + cuDoubleComplex *A; + int lda; + cuDoubleComplex *B; + int ldb; + cuDoubleComplex *W; + + V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); + B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); + W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */ + + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, + &ldv, &ldt, &lda, &ldb ); + + RUNTIME_getStream(stream); + + CUDA_ztpmlqt( + side, trans, M, N, K, L, ib, + V, ldv, T, ldt, A, lda, B, ldb, + W, stream ); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC) + +void +MORSE_TASK_ztpmlqt( const MORSE_option_t *options, + MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int ib, int nb, + const MORSE_desc_t *V, int Vm, int Vn, int ldv, + const MORSE_desc_t *T, int Tm, int Tn, int ldt, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb ) +{ + struct starpu_codelet *codelet = &cl_ztpmlqt; + void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL; + + MORSE_BEGIN_ACCESS_DECLARATION; + MORSE_ACCESS_R(V, Vm, Vn); + MORSE_ACCESS_R(T, Tm, Tn); + MORSE_ACCESS_RW(A, Am, An); + MORSE_ACCESS_RW(B, Bm, Bn); + MORSE_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(MORSE_enum), + STARPU_VALUE, &trans, sizeof(MORSE_enum), + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, &N, sizeof(int), + STARPU_VALUE, &K, sizeof(int), + STARPU_VALUE, &L, sizeof(int), + STARPU_VALUE, &ib, sizeof(int), + STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn), + STARPU_VALUE, &ldv, sizeof(int), + STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), + STARPU_VALUE, &ldt, sizeof(int), + STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + /* Other options */ + STARPU_SCRATCH, options->ws_worker, + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn), +#endif +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, (( L == 0 ) ? "ztsmlq" : "ztpmlqt"), +#endif + 0); + + (void)ib; (void)nb; +} diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 7b973d116..6d3187da1 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -81,7 +81,9 @@ ZCODELETS_HEADER(syssq) ZCODELETS_HEADER(trasm) ZCODELETS_HEADER(trssq) ZCODELETS_HEADER(trtri) +ZCODELETS_HEADER(tplqt) ZCODELETS_HEADER(tpqrt) +ZCODELETS_HEADER(tpmlqt) ZCODELETS_HEADER(tpmqrt) ZCODELETS_HEADER(tslqt) ZCODELETS_HEADER(tsmlq) -- GitLab