Commit 2740ea28 authored by Mathieu Faverge's avatar Mathieu Faverge

Add new tpmlqt and tplqt tasks to starpu

parent 504741b3
...@@ -67,7 +67,9 @@ CHAMELEON_CL_CB(zssssm, starpu_matrix_get_nx(task->handles[0]), starpu_ma ...@@ -67,7 +67,9 @@ CHAMELEON_CL_CB(zssssm, starpu_matrix_get_nx(task->handles[0]), starpu_ma
CHAMELEON_CL_CB(zsymm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N) CHAMELEON_CL_CB(zsymm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N)
CHAMELEON_CL_CB(zsyr2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M) CHAMELEON_CL_CB(zsyr2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M)
CHAMELEON_CL_CB(zsyrk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N) CHAMELEON_CL_CB(zsyrk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N)
CHAMELEON_CL_CB(ztplqt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K)
CHAMELEON_CL_CB(ztpqrt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K) CHAMELEON_CL_CB(ztpqrt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K)
CHAMELEON_CL_CB(ztpmlqt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K)
CHAMELEON_CL_CB(ztpmqrt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K) CHAMELEON_CL_CB(ztpmqrt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K)
CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1)) CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1))
CHAMELEON_CL_CB(ztrmm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N) CHAMELEON_CL_CB(ztrmm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N)
......
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_ztplqt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg)
{
int M;
int N;
int L;
int ib;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
&lda, &ldb, &ldt );
CORE_ztplqt( M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func)
void
MORSE_TASK_ztplqt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
{
struct starpu_codelet *codelet = &cl_ztplqt;
void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL;
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_RW(A, Am, An);
MORSE_ACCESS_RW(B, Bm, Bn);
MORSE_ACCESS_W(T, Tm, Tn);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_W, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, (L == 0) ? "ztplqs" : "ztplqt",
#endif
0);
(void)ib; (void)nb;
}
/**
*
* @copyright 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @file codelet_ztpmlqt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum trans;
int M;
int N;
int K;
int L;
int ib;
const MORSE_Complex64_t *V;
int ldv;
const MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *WORK;
V = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
&ldv, &ldt, &lda, &ldb );
CORE_ztpmlqt( side, trans, M, N, K, L, ib,
V, ldv, T, ldt, A, lda, B, ldb, WORK );
}
#if defined(CHAMELEON_USE_CUDA)
static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum trans;
int M;
int N;
int K;
int L;
int ib;
const cuDoubleComplex *V;
int ldv;
const cuDoubleComplex *T;
int ldt;
cuDoubleComplex *A;
int lda;
cuDoubleComplex *B;
int ldb;
cuDoubleComplex *W;
V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
&ldv, &ldt, &lda, &ldb );
RUNTIME_getStream(stream);
CUDA_ztpmlqt(
side, trans, M, N, K, L, ib,
V, ldv, T, ldt, A, lda, B, ldb,
W, stream );
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC)
void
MORSE_TASK_ztpmlqt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
struct starpu_codelet *codelet = &cl_ztpmlqt;
void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL;
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
MORSE_ACCESS_RW(A, Am, An);
MORSE_ACCESS_RW(B, Bm, Bn);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, (( L == 0 ) ? "ztsmlq" : "ztpmlqt"),
#endif
0);
(void)ib; (void)nb;
}
...@@ -81,7 +81,9 @@ ZCODELETS_HEADER(syssq) ...@@ -81,7 +81,9 @@ ZCODELETS_HEADER(syssq)
ZCODELETS_HEADER(trasm) ZCODELETS_HEADER(trasm)
ZCODELETS_HEADER(trssq) ZCODELETS_HEADER(trssq)
ZCODELETS_HEADER(trtri) ZCODELETS_HEADER(trtri)
ZCODELETS_HEADER(tplqt)
ZCODELETS_HEADER(tpqrt) ZCODELETS_HEADER(tpqrt)
ZCODELETS_HEADER(tpmlqt)
ZCODELETS_HEADER(tpmqrt) ZCODELETS_HEADER(tpmqrt)
ZCODELETS_HEADER(tslqt) ZCODELETS_HEADER(tslqt)
ZCODELETS_HEADER(tsmlq) ZCODELETS_HEADER(tsmlq)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment