Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 22869caf authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Add codelets in all three runtimes

parent eb02ab84
No related branches found
No related tags found
1 merge request!3Add ztpqrt, and ztpgqrt functions
Showing
with 694 additions and 221 deletions
......@@ -98,7 +98,7 @@
*
* @param[out] WORK
* Workspace array of size LDWORK-by-NB.
* LDWORK = N if side =MorseLeft, or M if side = MorseRight.
* LDWORK = N if side = MorseLeft, or M if side = MorseRight.
*
*******************************************************************************
*
......
......@@ -26,6 +26,84 @@
#
###
# List of codelets required by all runtimes
# -----------------------------------------
set(CODELETS_ZSRC
codelets/codelet_ztile_zero.c
codelets/codelet_zasum.c
##################
# BLAS 1
##################
codelets/codelet_zaxpy.c
##################
# BLAS 3
##################
codelets/codelet_zgemm.c
codelets/codelet_zhemm.c
codelets/codelet_zher2k.c
codelets/codelet_zherk.c
codelets/codelet_zsymm.c
codelets/codelet_zsyr2k.c
codelets/codelet_zsyrk.c
codelets/codelet_ztrmm.c
codelets/codelet_ztrsm.c
##################
# LAPACK
##################
codelets/codelet_zgeadd.c
codelets/codelet_zlascal.c
codelets/codelet_zgelqt.c
codelets/codelet_zgeqrt.c
codelets/codelet_zgessm.c
codelets/codelet_zgessq.c
codelets/codelet_zgetrf.c
codelets/codelet_zgetrf_incpiv.c
codelets/codelet_zgetrf_nopiv.c
codelets/codelet_zhe2ge.c
codelets/codelet_zherfb.c
codelets/codelet_zhessq.c
codelets/codelet_zlacpy.c
codelets/codelet_zlange.c
codelets/codelet_zlanhe.c
codelets/codelet_zlansy.c
codelets/codelet_zlantr.c
codelets/codelet_zlaset2.c
codelets/codelet_zlaset.c
codelets/codelet_zlatro.c
codelets/codelet_zlauum.c
codelets/codelet_zplghe.c
codelets/codelet_zplgsy.c
codelets/codelet_zplrnt.c
codelets/codelet_zplssq.c
codelets/codelet_zpotrf.c
codelets/codelet_zssssm.c
codelets/codelet_zsyssq.c
codelets/codelet_zsytrf_nopiv.c
codelets/codelet_ztpqrt.c
codelets/codelet_ztpmqrt.c
codelets/codelet_ztradd.c
codelets/codelet_ztrasm.c
codelets/codelet_ztrssq.c
codelets/codelet_ztrtri.c
codelets/codelet_ztslqt.c
codelets/codelet_ztsmlq.c
codelets/codelet_ztsmqr.c
codelets/codelet_ztsmlq_hetra1.c
codelets/codelet_ztsmqr_hetra1.c
codelets/codelet_ztsqrt.c
codelets/codelet_ztstrf.c
codelets/codelet_zttlqt.c
codelets/codelet_zttmlq.c
codelets/codelet_zttmqr.c
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
# BUILD
##################
codelets/codelet_zbuild.c
)
# Check for the subdirectories
# ----------------------------
if( CHAMELEON_SCHED_QUARK )
......
......@@ -88,77 +88,7 @@ set(RUNTIME_COMMON
# ------------------------------------------------------
set(RUNTIME_SRCS_GENERATED "")
set(ZSRC
codelets/codelet_ztile_zero.c
codelets/codelet_zasum.c
##################
# BLAS 1
##################
codelets/codelet_zaxpy.c
##################
# BLAS 3
##################
codelets/codelet_zgemm.c
codelets/codelet_zhemm.c
codelets/codelet_zher2k.c
codelets/codelet_zherk.c
codelets/codelet_zsymm.c
codelets/codelet_zsyr2k.c
codelets/codelet_zsyrk.c
codelets/codelet_ztrmm.c
codelets/codelet_ztrsm.c
##################
# LAPACK
##################
codelets/codelet_zgeadd.c
codelets/codelet_zlascal.c
codelets/codelet_zgelqt.c
codelets/codelet_zgeqrt.c
codelets/codelet_zgessm.c
codelets/codelet_zgessq.c
codelets/codelet_zgetrf.c
codelets/codelet_zgetrf_incpiv.c
codelets/codelet_zgetrf_nopiv.c
codelets/codelet_zhe2ge.c
codelets/codelet_zherfb.c
codelets/codelet_zhessq.c
codelets/codelet_zlacpy.c
codelets/codelet_zlange.c
codelets/codelet_zlanhe.c
codelets/codelet_zlansy.c
codelets/codelet_zlantr.c
codelets/codelet_zlaset2.c
codelets/codelet_zlaset.c
codelets/codelet_zlatro.c
codelets/codelet_zlauum.c
codelets/codelet_zplghe.c
codelets/codelet_zplgsy.c
codelets/codelet_zplrnt.c
codelets/codelet_zplssq.c
codelets/codelet_zpotrf.c
codelets/codelet_zssssm.c
codelets/codelet_zsyssq.c
codelets/codelet_zsytrf_nopiv.c
codelets/codelet_ztradd.c
codelets/codelet_ztrasm.c
codelets/codelet_ztrssq.c
codelets/codelet_ztrtri.c
codelets/codelet_ztslqt.c
codelets/codelet_ztsmlq.c
codelets/codelet_ztsmqr.c
codelets/codelet_ztsmlq_hetra1.c
codelets/codelet_ztsmqr_hetra1.c
codelets/codelet_ztsqrt.c
codelets/codelet_ztstrf.c
codelets/codelet_zttlqt.c
codelets/codelet_zttmlq.c
codelets/codelet_zttmqr.c
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
# BUILD
##################
codelets/codelet_zbuild.c
${CODELETS_ZSRC}
)
precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
......
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_ztpqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "runtime/parsec/include/morse_parsec.h"
static int
CORE_ztpmqrt_parsec(dague_execution_unit_t *context,
dague_execution_context_t *this_task)
{
MORSE_enum *side;
MORSE_enum *trans;
int *M;
int *N;
int *K;
int *L;
int *ib;
const MORSE_Complex64_t *V;
int *ldv;
const MORSE_Complex64_t *T;
int *ldt;
MORSE_Complex64_t *A;
int *lda;
MORSE_Complex64_t *B;
int *ldb;
MORSE_Complex64_t *WORK;
dague_dtd_unpack_args(
this_task,
UNPACK_VALUE, &side,
UNPACK_VALUE, &trans,
UNPACK_VALUE, &M,
UNPACK_VALUE, &N,
UNPACK_VALUE, &K,
UNPACK_VALUE, &L,
UNPACK_VALUE, &ib,
UNPACK_DATA, &V,
UNPACK_VALUE, &ldv,
UNPACK_DATA, &T,
UNPACK_VALUE, &ldt,
UNPACK_DATA, &A,
UNPACK_VALUE, &lda,
UNPACK_DATA, &B,
UNPACK_VALUE, &ldb,
UNPACK_SCRATCH, &WORK );
CORE_ztpmqrt( *side, *trans, *M, *N, *K, *L, *ib,
V, *ldv, T, *ldt, A, *lda, B, *ldb, WORK );
return 0;
}
void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt);
dague_insert_task(
DAGUE_dtd_handle, CORE_ztpmqrt_parsec, "tpmqrt",
sizeof(MORSE_enum), &side, VALUE,
sizeof(MORSE_enum), &trans, VALUE,
sizeof(int), &M, VALUE,
sizeof(int), &N, VALUE,
sizeof(int), &K, VALUE,
sizeof(int), &L, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT | REGION_FULL,
sizeof(int), &ldv, VALUE,
PASSED_BY_REF, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT | REGION_FULL,
sizeof(int), &ldt, VALUE,
PASSED_BY_REF, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | REGION_FULL,
sizeof(int), &lda, VALUE,
PASSED_BY_REF, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | REGION_FULL,
sizeof(int), &ldb, VALUE,
sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH,
0);
}
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_ztpqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "runtime/parsec/include/morse_parsec.h"
static int
CORE_ztpqrt_parsec(dague_execution_unit_t *context,
dague_execution_context_t *this_task)
{
int *M;
int *N;
int *L;
int *ib;
MORSE_Complex64_t *A;
int *lda;
MORSE_Complex64_t *B;
int *ldb;
MORSE_Complex64_t *T;
int *ldt;
MORSE_Complex64_t *WORK;
dague_dtd_unpack_args(
this_task,
UNPACK_VALUE, &M,
UNPACK_VALUE, &N,
UNPACK_VALUE, &L,
UNPACK_VALUE, &ib,
UNPACK_DATA, &A,
UNPACK_VALUE, &lda,
UNPACK_DATA, &B,
UNPACK_VALUE, &ldb,
UNPACK_DATA, &T,
UNPACK_VALUE, &ldt,
UNPACK_SCRATCH, &WORK );
CORE_ztpqrt( *M, *N, *L, *ib,
A, *lda, B, *ldb, T, *ldt, WORK );
return 0;
}
void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
{
dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt);
dague_insert_task(
DAGUE_dtd_handle, CORE_ztpqrt_parsec, "tpqrt",
sizeof(int), &M, VALUE,
sizeof(int), &N, VALUE,
sizeof(int), &L, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | REGION_U | REGION_D,
sizeof(int), &lda, VALUE,
PASSED_BY_REF, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | REGION_FULL,
sizeof(int), &ldb, VALUE,
PASSED_BY_REF, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INOUT | REGION_FULL,
sizeof(int), &ldt, VALUE,
sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH,
0);
}
......@@ -86,77 +86,7 @@ set(RUNTIME_COMMON
# ------------------------------------------------------
set(RUNTIME_SRCS_GENERATED "")
set(ZSRC
codelets/codelet_ztile_zero.c
codelets/codelet_zasum.c
##################
# BLAS 1
##################
codelets/codelet_zaxpy.c
##################
# BLAS 3
##################
codelets/codelet_zgemm.c
codelets/codelet_zhemm.c
codelets/codelet_zher2k.c
codelets/codelet_zherk.c
codelets/codelet_zsymm.c
codelets/codelet_zsyr2k.c
codelets/codelet_zsyrk.c
codelets/codelet_ztrmm.c
codelets/codelet_ztrsm.c
##################
# LAPACK
##################
codelets/codelet_zgeadd.c
codelets/codelet_zlascal.c
codelets/codelet_zgelqt.c
codelets/codelet_zgeqrt.c
codelets/codelet_zgessm.c
codelets/codelet_zgessq.c
codelets/codelet_zgetrf.c
codelets/codelet_zgetrf_incpiv.c
codelets/codelet_zgetrf_nopiv.c
codelets/codelet_zhe2ge.c
codelets/codelet_zherfb.c
codelets/codelet_zhessq.c
codelets/codelet_zlacpy.c
codelets/codelet_zlange.c
codelets/codelet_zlanhe.c
codelets/codelet_zlansy.c
codelets/codelet_zlantr.c
codelets/codelet_zlaset2.c
codelets/codelet_zlaset.c
codelets/codelet_zlatro.c
codelets/codelet_zlauum.c
codelets/codelet_zplghe.c
codelets/codelet_zplgsy.c
codelets/codelet_zplrnt.c
codelets/codelet_zplssq.c
codelets/codelet_zpotrf.c
codelets/codelet_zssssm.c
codelets/codelet_zsyssq.c
codelets/codelet_zsytrf_nopiv.c
codelets/codelet_ztradd.c
codelets/codelet_ztrasm.c
codelets/codelet_ztrssq.c
codelets/codelet_ztrtri.c
codelets/codelet_ztslqt.c
codelets/codelet_ztsmlq.c
codelets/codelet_ztsmqr.c
codelets/codelet_ztsmlq_hetra1.c
codelets/codelet_ztsmqr_hetra1.c
codelets/codelet_ztsqrt.c
codelets/codelet_ztstrf.c
codelets/codelet_zttlqt.c
codelets/codelet_zttmlq.c
codelets/codelet_zttmqr.c
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
# BUILD
##################
codelets/codelet_zbuild.c
${CODELETS_ZSRC}
)
precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
......
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_ztpqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "runtime/quark/include/morse_quark.h"
static void
CORE_ztpmqrt_quark( Quark *quark )
{
MORSE_enum side;
MORSE_enum trans;
int M;
int N;
int K;
int L;
int ib;
const MORSE_Complex64_t *V;
int ldv;
const MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *WORK;
quark_unpack_args_16( quark, side, trans, M, N, K, L, ib,
V, ldv, T, ldt, A, lda, B, ldb, WORK );
CORE_ztpmqrt( side, trans, M, N, K, L, ib,
V, ldv, T, ldt, A, lda, B, ldb, WORK );
}
void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TSMQR;
QUARK_Insert_Task(
opt->quark, CORE_ztpmqrt_quark, (Quark_Task_Flags*)opt,
sizeof(MORSE_enum), &side, VALUE,
sizeof(MORSE_enum), &trans, VALUE,
sizeof(int), &M, VALUE,
sizeof(int), &N, VALUE,
sizeof(int), &K, VALUE,
sizeof(int), &L, VALUE,
sizeof(int), &ib, VALUE,
sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT,
sizeof(int), &ldv, VALUE,
sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT,
sizeof(int), &ldt, VALUE,
sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT,
sizeof(int), &lda, VALUE,
sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT,
sizeof(int), &ldb, VALUE,
sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH,
0);
}
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_ztpqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "runtime/quark/include/morse_quark.h"
static void
CORE_ztpqrt_quark( Quark *quark )
{
int M;
int N;
int L;
int ib;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
quark_unpack_args_11( quark, M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
CORE_ztpqrt( M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
}
void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TSQRT;
QUARK_Insert_Task(
opt->quark, CORE_ztpqrt_quark, (Quark_Task_Flags*)opt,
sizeof(int), &M, VALUE,
sizeof(int), &N, VALUE,
sizeof(int), &L, VALUE,
sizeof(int), &ib, VALUE,
sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | QUARK_REGION_U | QUARK_REGION_D,
sizeof(int), &lda, VALUE,
sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT,
sizeof(int), &ldb, VALUE,
sizeof(MORSE_Complex64_t)*nb*ib, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(MORSE_Complex64_t)*(ib+1)*nb, NULL, SCRATCH,
0);
}
......@@ -106,77 +106,7 @@ set_source_files_properties(control/runtime_profiling.c PROPERTIES COMPILE_FLAGS
set(RUNTIME_SRCS_GENERATED "")
set(ZSRC
codelets/codelet_zcallback.c
codelets/codelet_ztile_zero.c
codelets/codelet_zasum.c
##################
# BLAS 1
##################
codelets/codelet_zaxpy.c
##################
# BLAS 3
##################
codelets/codelet_zgemm.c
codelets/codelet_zhemm.c
codelets/codelet_zher2k.c
codelets/codelet_zherk.c
codelets/codelet_zsymm.c
codelets/codelet_zsyr2k.c
codelets/codelet_zsyrk.c
codelets/codelet_ztrmm.c
codelets/codelet_ztrsm.c
##################
# LAPACK
##################
codelets/codelet_zgeadd.c
codelets/codelet_zlascal.c
codelets/codelet_zgelqt.c
codelets/codelet_zgeqrt.c
codelets/codelet_zgessm.c
codelets/codelet_zgessq.c
codelets/codelet_zgetrf.c
codelets/codelet_zgetrf_incpiv.c
codelets/codelet_zgetrf_nopiv.c
codelets/codelet_zhe2ge.c
codelets/codelet_zherfb.c
codelets/codelet_zhessq.c
codelets/codelet_zlacpy.c
codelets/codelet_zlange.c
codelets/codelet_zlanhe.c
codelets/codelet_zlansy.c
codelets/codelet_zlantr.c
codelets/codelet_zlaset2.c
codelets/codelet_zlaset.c
codelets/codelet_zlatro.c
codelets/codelet_zlauum.c
codelets/codelet_zplghe.c
codelets/codelet_zplgsy.c
codelets/codelet_zplrnt.c
codelets/codelet_zplssq.c
codelets/codelet_zpotrf.c
codelets/codelet_zssssm.c
codelets/codelet_zsyssq.c
codelets/codelet_zsytrf_nopiv.c
codelets/codelet_ztradd.c
codelets/codelet_ztrasm.c
codelets/codelet_ztrssq.c
codelets/codelet_ztrtri.c
codelets/codelet_ztslqt.c
codelets/codelet_ztsmlq.c
codelets/codelet_ztsmqr.c
codelets/codelet_ztsmlq_hetra1.c
codelets/codelet_ztsmqr_hetra1.c
codelets/codelet_ztsqrt.c
codelets/codelet_ztstrf.c
codelets/codelet_zttlqt.c
codelets/codelet_zttmlq.c
codelets/codelet_zttmqr.c
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
# BUILD
##################
codelets/codelet_zbuild.c
${CODELETS_ZSRC}
)
precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
......
......@@ -67,7 +67,9 @@ CHAMELEON_CL_CB(zssssm, starpu_matrix_get_nx(task->handles[0]), starpu_ma
CHAMELEON_CL_CB(zsymm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N);
CHAMELEON_CL_CB(zsyr2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M);
CHAMELEON_CL_CB(zsyrk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N);
CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1));
CHAMELEON_CL_CB(ztpqrt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K);
CHAMELEON_CL_CB(ztpmqrt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K);
CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1));
CHAMELEON_CL_CB(ztrmm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N);
CHAMELEON_CL_CB(ztrsm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N);
CHAMELEON_CL_CB(ztrtri, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M *M*M);
......
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_ztpmqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "runtime/starpu/include/morse_starpu.h"
#include "runtime/starpu/include/runtime_codelet_z.h"
void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
struct starpu_codelet *codelet = &cl_ztpmqrt;
void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
if ( morse_desc_islocal( A, Am, An ) ||
morse_desc_islocal( B, Bm, Bn ) ||
morse_desc_islocal( V, Vm, Vn ) ||
morse_desc_islocal( T, Tm, Tn ) )
{
starpu_insert_task(
codelet,
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, execution_rank,
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztpmqrt",
#endif
0);
}
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum trans;
int M;
int N;
int K;
int L;
int ib;
const MORSE_Complex64_t *V;
int ldv;
const MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *WORK;
V = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
&ldv, &ldt, &lda, &ldb );
CORE_ztpmqrt( side, trans, M, N, K, L, ib,
V, ldv, T, ldt, A, lda, B, ldb, WORK );
}
#if defined(CHAMELEON_USE_CUDA)
static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum trans;
int M;
int N;
int K;
int L;
int k;
int ib;
const cuDoubleComplex *V;
int ldv;
const cuDoubleComplex *T;
int ldt;
cuDoubleComplex *A;
int lda;
cuDoubleComplex *B;
int ldb;
cuDoubleComplex *W;
CUstream stream;
V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
&ldv, &ldt, &lda, &ldb );
stream = starpu_cuda_get_local_stream();
cublasSetKernelStream( stream );
CUDA_ztpmqrt(
side, trans, M, N, K, L, ib,
A, lda, B, ldb, V, ldv, T, ldt,
W, stream );
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_ztpqrt.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "runtime/starpu/include/morse_starpu.h"
#include "runtime/starpu/include/runtime_codelet_z.h"
void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
int M, int N, int L, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb,
const MORSE_desc_t *T, int Tm, int Tn, int ldt )
{
struct starpu_codelet *codelet = &cl_ztpqrt;
void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL;
if ( morse_desc_islocal( A, Am, An ) ||
morse_desc_islocal( B, Bm, Bn ) ||
morse_desc_islocal( T, Tm, Tn ) )
{
starpu_insert_task(
codelet,
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_RW, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, execution_rank,
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztpqrt",
#endif
0);
}
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
{
int M;
int N;
int L;
int ib;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
&lda, &ldb, &ldt );
CORE_ztpqrt( M, N, L, ib,
A, lda, B, ldb, T, ldt, WORK );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
......@@ -119,6 +119,6 @@
extern struct starpu_perfmodel cl_##name##_fake; \
void cl_##name##_callback(); \
void profiling_display_##name##_info(void); \
void estimate_##name##_sustained_peak(double *res);
void estimate_##name##_sustained_peak(double *res)
#endif /* __CODELET_PROFILE_H__ */
......@@ -73,6 +73,8 @@ ZCODELETS_HEADER(syssq)
ZCODELETS_HEADER(trasm)
ZCODELETS_HEADER(trssq)
ZCODELETS_HEADER(trtri)
ZCODELETS_HEADER(tpqrt)
ZCODELETS_HEADER(tpmqrt)
ZCODELETS_HEADER(tslqt)
ZCODELETS_HEADER(tsmlq)
ZCODELETS_HEADER(tsmqr)
......
......@@ -87,7 +87,7 @@
#define CODELETS_ALL_HEADER(name) \
CHAMELEON_CL_CB_HEADER(name) \
CHAMELEON_CL_CB_HEADER(name); \
void cl_##name##_load_fake_model(void); \
void cl_##name##_restore_model(void); \
extern struct starpu_codelet cl_##name; \
......
......@@ -26,10 +26,10 @@
#ifndef _MORSE_STARPU_WORKSPACE_H_
#define _MORSE_STARPU_WORKSPACE_H_
/*
* Allocate workspace in host memory: CPU for any worker
/*
* Allocate workspace in host memory: CPU for any worker
* or allocate workspace in worker's memory: main memory for cpu workers,
* and embedded memory for CUDA devices.
* and embedded memory for CUDA devices.
*/
#define MORSE_HOST_MEM 0
#define MORSE_WORKER_MEM 1
......@@ -48,7 +48,7 @@ typedef struct morse_starpu_ws_s MORSE_starpu_ws_t;
* (eg. MORSE_CUDA|MORSE_CPU for all CPU and GPU workers). The
* memory_location argument indicates whether this should be a buffer in host
* memory or in worker's memory (MORSE_HOST_MEM or MORSE_WORKER_MEM). This function
* returns 0 upon successful completion.
* returns 0 upon successful completion.
*/
int RUNTIME_starpu_ws_alloc ( MORSE_starpu_ws_t **workspace, size_t size, int which_workers, int memory_location);
int RUNTIME_starpu_ws_free ( MORSE_starpu_ws_t *workspace);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment