Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 14482041 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Remove all TS/TT codelets for QR/LQ kernels

parent 5715a0a8
No related branches found
No related tags found
1 merge request!129Fix #56 - Issue in CUDA_zparfb kernels for TT case
Showing
with 402 additions and 2730 deletions
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-07
* @precisions normal z -> c d s
*
*/
......@@ -31,435 +31,499 @@
* Declarations of QUARK wrappers (called by CHAMELEON) - alphabetical order
*/
void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
cham_store_t storev, cham_uplo_t uplo, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn );
cham_store_t storev, cham_uplo_t uplo, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn );
void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha,
const CHAM_desc_t *A, int Am, int An, int lda );
cham_uplo_t uplo,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha,
const CHAM_desc_t *A, int Am, int An, int lda );
void INSERT_TASK_zbrdalg( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int N, int NB,
const CHAM_desc_t *A,
const CHAM_desc_t *C, int Cm, int Cn,
const CHAM_desc_t *S, int Sm, int Sn,
int i, int j, int m, int grsiz, int BAND,
int *PCOL, int *ACOL, int *MCOL );
cham_uplo_t uplo,
int N, int NB,
const CHAM_desc_t *A,
const CHAM_desc_t *C, int Cm, int Cn,
const CHAM_desc_t *S, int Sm, int Sn,
int i, int j, int m, int grsiz, int BAND,
int *PCOL, int *ACOL, int *MCOL );
void INSERT_TASK_zgelqt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zgemm2( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zgemm2( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zgemm_f2( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
void INSERT_TASK_zgemm_p2( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAMELEON_Complex64_t **B, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAMELEON_Complex64_t **B, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zgemm_p2f1( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAMELEON_Complex64_t **B, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 );
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAMELEON_Complex64_t **B, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 );
void INSERT_TASK_zgemm_p3( const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc );
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc );
void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
int m, int n, int k, int ib, int nb,
int *IPIV,
const CHAM_desc_t *L, int Lm, int Ln, int ldl,
const CHAM_desc_t *D, int Dm, int Dn, int ldd,
const CHAM_desc_t *A, int Am, int An, int lda );
int m, int n, int k, int ib, int nb,
int *IPIV,
const CHAM_desc_t *L, int Lm, int Ln, int ldl,
const CHAM_desc_t *D, int Dm, int Dn, int ldd,
const CHAM_desc_t *A, int Am, int An, int lda );
void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
int m, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
int m, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int *IPIV,
cham_bool_t check_info, int iinfo );
int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int *IPIV,
cham_bool_t check_info, int iinfo );
void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *L, int Lm, int Ln, int ldl,
int *IPIV,
cham_bool_t check_info, int iinfo );
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *L, int Lm, int Ln, int ldl,
int *IPIV,
cham_bool_t check_info, int iinfo );
void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda, int iinfo );
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda, int iinfo );
void INSERT_TASK_zgetrf_reclap( const RUNTIME_option_t *options,
int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int *IPIV,
int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int *IPIV,
cham_bool_t check_info, int iinfo,
int nbthread );
cham_bool_t check_info, int iinfo,
int nbthread );
void INSERT_TASK_zgetrf_rectil( const RUNTIME_option_t *options,
const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size,
int *IPIV,
const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size,
int *IPIV,
cham_bool_t check_info, int iinfo,
int nbthread );
cham_bool_t check_info, int iinfo,
int nbthread );
void INSERT_TASK_zgetrip( const RUNTIME_option_t *options,
int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA );
int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA );
void INSERT_TASK_zgetrip_f1( const RUNTIME_option_t *options,
int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF );
int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF );
void INSERT_TASK_zgetrip_f2( const RUNTIME_option_t *options,
int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 );
int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 );
void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
cham_uplo_t uplo,
int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_zhemm( const RUNTIME_option_t *options,
cham_side_t side, cham_uplo_t uplo,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
cham_side_t side, cham_uplo_t uplo,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zhegst( const RUNTIME_option_t *options,
int itype, cham_uplo_t uplo, int N,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn, int LDB,
int iinfo );
int itype, cham_uplo_t uplo, int N,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn, int LDB,
int iinfo );
void INSERT_TASK_zherk( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zher2k( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int LDB,
double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zher2k( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int LDB,
double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zherfb( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int n, int k, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *C, int Cm, int Cn, int ldc );
cham_uplo_t uplo,
int n, int k, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int mb,
int displA, const CHAM_desc_t *A, int Am, int An, int lda,
int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int mb,
int displA, const CHAM_desc_t *A, int Am, int An, int lda,
int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_zlange( const RUNTIME_option_t *options,
cham_normtype_t norm, int M, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
cham_normtype_t norm, int M, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
void INSERT_TASK_zlange_max( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn );
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn );
void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
cham_uplo_t uplo, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
void INSERT_TASK_zlanhe( const RUNTIME_option_t *options,
cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
int M, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
int M, int N, int NB,
const CHAM_desc_t *A, int Am, int An, int LDA,
const CHAM_desc_t *B, int Bm, int Bn );
void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
void INSERT_TASK_zlaswp( const RUNTIME_option_t *options,
int n, const CHAM_desc_t *A, int Am, int An, int lda,
int i1, int i2, int *ipiv, int inc );
int n, const CHAM_desc_t *A, int Am, int An, int lda,
int i1, int i2, int *ipiv, int inc );
void INSERT_TASK_zlaswp_f2( const RUNTIME_option_t *options,
int n, const CHAM_desc_t *A, int Am, int An, int lda,
int i1, int i2, int *ipiv, int inc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
int n, const CHAM_desc_t *A, int Am, int An, int lda,
int i1, int i2, int *ipiv, int inc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
void INSERT_TASK_zlaswp_ontile( const RUNTIME_option_t *options,
const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel );
void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options,
const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
int i1, int i2, int *ipiv, int inc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options,
const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel );
void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options,
const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
int i1, int i2, int *ipiv, int inc,
const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options,
const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel );
void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda );
cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda );
void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
int bigM, int m0, int n0, unsigned long long int seed );
double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
int bigM, int m0, int n0, unsigned long long int seed );
void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
int bigM, int m0, int n0, unsigned long long int seed );
CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
int bigM, int m0, int n0, unsigned long long int seed );
void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
int bigM, int m0, int n0, unsigned long long int seed );
int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
int bigM, int m0, int n0, unsigned long long int seed );
void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int iinfo );
int iinfo );
void INSERT_TASK_zshift( const RUNTIME_option_t *options,
int s, int m, int n, int L,
CHAMELEON_Complex64_t *A );
int s, int m, int n, int L,
CHAMELEON_Complex64_t *A );
void INSERT_TASK_zshiftw( const RUNTIME_option_t *options,
int s, int cl, int m, int n, int L,
const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W );
int s, int cl, int m, int n, int L,
const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W );
void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
const int *IPIV );
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
const int *IPIV );
void INSERT_TASK_zsymm( const RUNTIME_option_t *options,
cham_side_t side, cham_uplo_t uplo,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
cham_side_t side, cham_uplo_t uplo,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int LDB,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int LDB,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
cham_uplo_t uplo, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int iinfo );
cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int iinfo );
void INSERT_TASK_zswpab( const RUNTIME_option_t *options,
int i, int n1, int n2,
const CHAM_desc_t *A, int Am, int An, int szeA );
int i, int n1, int n2,
const CHAM_desc_t *A, int Am, int An, int szeA );
void INSERT_TASK_zswptr_ontile( const RUNTIME_option_t *options,
const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn,
int i1, int i2, int *ipiv, int inc,
const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak );
const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn,
int i1, int i2, int *ipiv, int inc,
const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak );
void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
int m, int n, int l, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int M, int N, int K, int L, int ib, int nb,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
int m, int n, int l, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int M, int N, int K, int L, int ib, int nb,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m, int n, int k, int l, int ib, int nb,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
cham_side_t side, cham_trans_t trans,
int m, int n, int k, int l, int ib, int nb,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
int m, int n, int l, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
int m, int n, int l, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztrdalg( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int N, int NB,
const CHAM_desc_t *A,
const CHAM_desc_t *C, int Cm, int Cn,
const CHAM_desc_t *S, int Sm, int Sn,
int i, int j, int m, int grsiz, int BAND,
int *PCOL, int *ACOL, int *MCOL );
cham_uplo_t uplo,
int N, int NB,
const CHAM_desc_t *A,
const CHAM_desc_t *C, int Cm, int Cn,
const CHAM_desc_t *S, int Sm, int Sn,
int i, int j, int m, int grsiz, int BAND,
int *PCOL, int *ACOL, int *MCOL );
void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn );
cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn );
void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_ztrmm_p2( const RUNTIME_option_t *options,
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t **B, int ldb );
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t **B, int ldb );
void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb );
void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_diag_t diag,
int m, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
cham_uplo_t uplo, cham_diag_t diag,
int m, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_diag_t diag, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
cham_uplo_t uplo, cham_diag_t diag, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
int iinfo );
void INSERT_TASK_ztslqt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztsmlq( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
int iinfo );
void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztsmqr( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztsqrt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *U, int Um, int Un, int ldu,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *L, int Lm, int Ln, int ldl,
int *IPIV,
cham_bool_t check_info, int iinfo );
void INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_zttqrt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_zttmlq( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
int m, int n, int ib, int nb,
const CHAM_desc_t *U, int Um, int Un, int ldu,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *L, int Lm, int Ln, int ldl,
int *IPIV,
cham_bool_t check_info, int iinfo );
void INSERT_TASK_zpamm( const RUNTIME_option_t *options,
int op, cham_side_t side, cham_store_t storev,
int m, int n, int k, int l,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_zttlqt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt );
void INSERT_TASK_zpamm( const RUNTIME_option_t *options,
int op, cham_side_t side, cham_store_t storev,
int m, int n, int k, int l,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *W, int Wm, int Wn, int ldw );
const CHAM_desc_t *W, int Wm, int Wn, int ldw );
void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn );
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn );
void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
const CHAM_desc_t *RESULT, int RESULTm, int RESULTn );
const CHAM_desc_t *RESULT, int RESULTm, int RESULTn );
void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m, int n, int ib, int nb, int k,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *C, int Cm, int Cn, int ldc );
cham_side_t side, cham_trans_t trans,
int m, int n, int ib, int nb, int k,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m, int n, int k, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *C, int Cm, int Cn, int ldc );
cham_side_t side, cham_trans_t trans,
int m, int n, int k, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt,
const CHAM_desc_t *C, int Cm, int Cn, int ldc );
void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, int lda,
void *user_data, void* user_build_callback );
const CHAM_desc_t *A, int Am, int An, int lda,
void *user_data, void* user_build_callback );
/**
* Keep these insert_task for retro-compatibility
*/
static inline void
INSERT_TASK_ztslqt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztplqt( options, m, n, 0, ib, nb,
A1, A1m, A1n, lda1,
A2, A2m, A2n, lda2,
T, Tm, Tn, ldt );
}
static inline void
INSERT_TASK_ztsqrt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztpqrt( options, m, n, 0, ib, nb,
A1, A1m, A1n, lda1,
A2, A2m, A2n, lda2,
T, Tm, Tn, ldt );
}
static inline void
INSERT_TASK_zttlqt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztplqt( options, m, n, n, ib, nb,
A1, A1m, A1n, lda1,
A2, A2m, A2n, lda2,
T, Tm, Tn, ldt );
}
static inline void
INSERT_TASK_zttqrt( const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztpqrt( options, m, n, m, ib, nb,
A1, A1m, A1n, lda1,
A2, A2m, A2n, lda2,
T, Tm, Tn, ldt );
}
static inline void
INSERT_TASK_ztsmlq( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, 0, ib, nb,
V, Vm, Vn, ldv, T, Tm, Tn, ldt,
A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
}
static inline void
INSERT_TASK_ztsmqr( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, 0, ib, nb,
V, Vm, Vn, ldv, T, Tm, Tn, ldt,
A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
}
static inline void
INSERT_TASK_zttmlq( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, n2, ib, nb,
V, Vm, Vn, ldv, T, Tm, Tn, ldt,
A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
}
static inline void
INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt )
{
return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, m2, ib, nb,
V, Vm, Vn, ldv, T, Tm, Tn, ldt,
A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
}
#endif /* _chameleon_tasks_z_h_ */
......@@ -21,7 +21,7 @@
# @author Cedric Castagnede
# @author Emmanuel Agullo
# @author Mathieu Faverge
# @date 2012-07-13
# @date 2018-11-07
#
###
......@@ -86,17 +86,9 @@ set(CODELETS_ZSRC
codelets/codelet_ztrasm.c
codelets/codelet_ztrssq.c
codelets/codelet_ztrtri.c
codelets/codelet_ztslqt.c
codelets/codelet_ztsmlq.c
codelets/codelet_ztsmqr.c
codelets/codelet_ztsmlq_hetra1.c
codelets/codelet_ztsmqr_hetra1.c
codelets/codelet_ztsqrt.c
codelets/codelet_ztstrf.c
codelets/codelet_zttlqt.c
codelets/codelet_zttmlq.c
codelets/codelet_zttmqr.c
codelets/codelet_zttqrt.c
codelets/codelet_zunmlq.c
codelets/codelet_zunmqr.c
##################
......
/**
*
* @file parsec/codelet_ztslqt.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztslqt PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_ztslqt_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
parsec_dtd_unpack_args(
this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK );
CORE_ztslqt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK );
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_ztslqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_ztslqt_parsec, options->priority, "tslqt",
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
PARSEC_DTD_ARG_END );
}
/**
*
* @file parsec/codelet_ztsmlq.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztsmlq PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_ztsmlq_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
parsec_dtd_unpack_args(
this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork );
CORE_ztsmlq( side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_ztsmlq_parsec, options->priority, "tsmlq",
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
sizeof(int), &ldv, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
PARSEC_DTD_ARG_END );
}
/**
*
* @file parsec/codelet_ztsmqr.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztsmqr PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_ztsmqr_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
parsec_dtd_unpack_args(
this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork );
CORE_ztsmqr( side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_ztsmqr_parsec, options->priority, "tsmqr",
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
sizeof(int), &ldv, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
PARSEC_DTD_ARG_END );
}
/**
*
* @file parsec/codelet_ztsqrt.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztsqrt PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_ztsqrt_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
parsec_dtd_unpack_args(
this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK );
CORE_ztsqrt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK );
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_ztsqrt_parsec, options->priority, "tsqrt",
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
PARSEC_DTD_ARG_END );
}
/**
*
* @file parsec/codelet_zttlqt.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttlqt PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_zttlqt_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
parsec_dtd_unpack_args(
this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK );
CORE_zttlqt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK );
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_zttlqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_zttlqt_parsec, options->priority, "ttlqt",
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
PARSEC_DTD_ARG_END );
}
/**
*
* @file parsec/codelet_zttmlq.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttmlq PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_zttmlq_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
parsec_dtd_unpack_args(
this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork );
CORE_zttmlq( side, trans, m1, n1, m2, n2, k, ib, A1, lda1,
A2, lda2, V, ldv, T, ldt, WORK, ldwork);
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_zttmlq(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_zttmlq_parsec, options->priority, "ttmlq",
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
sizeof(int), &ldv, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
PARSEC_DTD_ARG_END );
}
/**
*
* @file parsec/codelet_zttmqr.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttmqr PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_zttmqr_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
parsec_dtd_unpack_args(
this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork );
CORE_zttmqr( side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_zttmqr(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_zttmqr_parsec, options->priority, "ttmqr",
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
sizeof(int), &ldv, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
PARSEC_DTD_ARG_END );
}
/**
*
* @file parsec/codelet_zttqrt.c
*
* @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttqrt PaRSEC codelet
*
* @version 1.0.0
* @author Reazul Hoque
* @precisions normal z -> c d s
*
*/
#include "chameleon_parsec.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static inline int
CORE_zttqrt_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
parsec_dtd_unpack_args(
this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK );
CORE_zttqrt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK );
(void)context;
return PARSEC_HOOK_RETURN_DONE;
}
void INSERT_TASK_zttqrt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
parsec_dtd_taskpool_insert_task(
PARSEC_dtd_taskpool, CORE_zttqrt_parsec, options->priority, "ttqrt",
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
sizeof(int), &lda1, VALUE,
PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
sizeof(int), &lda2, VALUE,
PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
PARSEC_DTD_ARG_END );
}
/**
*
* @file quark/codelet_ztslqt.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztslqt Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void CORE_ztslqt_quark(Quark *quark)
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_ztslqt computes a LQ factorization of a rectangular matrix
* formed by coupling side-by-side a complex M-by-M
* lower triangular tile A1 and a complex M-by-N tile A2:
*
* | A1 A2 | = L * Q
*
* The tile Q is represented as a product of elementary reflectors
*
* Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v'
*
* where tau is a complex scalar, and v is a complex vector with
* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
* A2(i,1:n), and tau in TAU(i).
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the tile A1 and A2. M >= 0.
* The number of columns of the tile A1.
*
* @param[in] N
* The number of columns of the tile A2. N >= 0.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M-by-M tile A1.
* On exit, the elements on and below the diagonal of the array
* contain the M-by-M lower trapezoidal tile L;
* the elements above the diagonal are not referenced.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M).
*
* @param[in,out] A2
* On entry, the M-by-N tile A2.
* On exit, all the elements with the array TAU, represent
* the unitary tile Q as a product of elementary reflectors
* (see Further Details).
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* @param[out] WORK
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_ztslqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TSLQT;
QUARK_Insert_Task(opt->quark, CORE_ztslqt_quark, (Quark_Task_Flags*)opt,
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_L | QUARK_REGION_D,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
0);
}
/**
*
* @file quark/codelet_ztsmlq.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztsmlq Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Azzam Haidar
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void CORE_ztsmlq_quark(Quark *quark)
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_ztsmlq overwrites the general complex M1-by-N1 tile A1 and
* M2-by-N2 tile A2 with
*
* SIDE = 'L' SIDE = 'R'
* TRANS = 'N': Q * | A1 | | A1 A2 | * Q
* | A2 |
*
* TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H
* | A2 |
*
* where Q is a complex unitary matrix defined as the product of k
* elementary reflectors
*
* Q = H(k)' . . . H(2)' H(1)'
*
* as returned by CORE_ZTSLQT.
*
*******************************************************************************
*
* @param[in] side
* @arg ChamLeft : apply Q or Q**H from the Left;
* @arg ChamRight : apply Q or Q**H from the Right.
*
* @param[in] trans
* @arg ChamNoTrans : No transpose, apply Q;
* @arg ChamConjTrans : ConjTranspose, apply Q**H.
*
* @param[in] M1
* The number of rows of the tile A1. M1 >= 0.
*
* @param[in] N1
* The number of columns of the tile A1. N1 >= 0.
*
* @param[in] M2
* The number of rows of the tile A2. M2 >= 0.
* M2 = M1 if side == ChamRight.
*
* @param[in] N2
* The number of columns of the tile A2. N2 >= 0.
* N2 = N1 if side == ChamLeft.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M1-by-N1 tile A1.
* On exit, A1 is overwritten by the application of Q.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M1).
*
* @param[in,out] A2
* On entry, the M2-by-N2 tile A2.
* On exit, A2 is overwritten by the application of Q.
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M2).
*
* @param[in] V
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* CORE_ZTSLQT in the first k rows of its array argument V.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
*
* @param[in] T
* The IB-by-N1 triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] WORK
* Workspace array of size
* LDWORK-by-M1 if side == ChamLeft
* LDWORK-by-IB if side == ChamRight
*
* @param[in] LDWORK
* The leading dimension of the array WORK.
* LDWORK >= max(1,IB) if side == ChamLeft
* LDWORK >= max(1,N1) if side == ChamRight
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TSMLQ;
QUARK_Insert_Task(opt->quark, CORE_ztsmlq_quark, (Quark_Task_Flags*)opt,
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT,
sizeof(int), &ldv, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
0);
}
/**
*
* @file quark/codelet_ztsmqr.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztsmqr Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Azzam Haidar
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void CORE_ztsmqr_quark(Quark *quark)
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_ztsmqr overwrites the general complex M1-by-N1 tile A1 and
* M2-by-N2 tile A2 with
*
* SIDE = 'L' SIDE = 'R'
* TRANS = 'N': Q * | A1 | | A1 A2 | * Q
* | A2 |
*
* TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H
* | A2 |
*
* where Q is a complex unitary matrix defined as the product of k
* elementary reflectors
*
* Q = H(1) H(2) . . . H(k)
*
* as returned by CORE_ZTSQRT.
*
*******************************************************************************
*
* @param[in] side
* @arg ChamLeft : apply Q or Q**H from the Left;
* @arg ChamRight : apply Q or Q**H from the Right.
*
* @param[in] trans
* @arg ChamNoTrans : No transpose, apply Q;
* @arg ChamConjTrans : ConjTranspose, apply Q**H.
*
* @param[in] M1
* The number of rows of the tile A1. M1 >= 0.
*
* @param[in] N1
* The number of columns of the tile A1. N1 >= 0.
*
* @param[in] M2
* The number of rows of the tile A2. M2 >= 0.
* M2 = M1 if side == ChamRight.
*
* @param[in] N2
* The number of columns of the tile A2. N2 >= 0.
* N2 = N1 if side == ChamLeft.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M1-by-N1 tile A1.
* On exit, A1 is overwritten by the application of Q.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M1).
*
* @param[in,out] A2
* On entry, the M2-by-N2 tile A2.
* On exit, A2 is overwritten by the application of Q.
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M2).
*
* @param[in] V
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* CORE_ZTSQRT in the first k columns of its array argument V.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
*
* @param[in] T
* The IB-by-N1 triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] WORK
* Workspace array of size
* LDWORK-by-N1 if side == ChamLeft
* LDWORK-by-IB if side == ChamRight
*
* @param[in] LDWORK
* The leading dimension of the array WORK.
* LDWORK >= max(1,IB) if side == ChamLeft
* LDWORK >= max(1,M1) if side == ChamRight
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TSMQR;
QUARK_Insert_Task(opt->quark, CORE_ztsmqr_quark, (Quark_Task_Flags*)opt,
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT,
sizeof(int), &ldv, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
0);
}
/**
*
* @file quark/codelet_ztsqrt.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztsqrt Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void CORE_ztsqrt_quark(Quark *quark)
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_ztsqrt computes a QR factorization of a rectangular matrix
* formed by coupling a complex N-by-N upper triangular tile A1
* on top of a complex M-by-N tile A2:
*
* | A1 | = Q * R
* | A2 |
*
*******************************************************************************
*
* @param[in] M
* The number of columns of the tile A2. M >= 0.
*
* @param[in] N
* The number of rows of the tile A1.
* The number of columns of the tiles A1 and A2. N >= 0.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the N-by-N tile A1.
* On exit, the elements on and above the diagonal of the array
* contain the N-by-N upper trapezoidal tile R;
* the elements below the diagonal are not referenced.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,N).
*
* @param[in,out] A2
* On entry, the M-by-N tile A2.
* On exit, all the elements with the array TAU, represent
* the unitary tile Q as a product of elementary reflectors
* (see Further Details).
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* @param[out] WORK
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TSQRT;
QUARK_Insert_Task(opt->quark, CORE_ztsqrt_quark, (Quark_Task_Flags*)opt,
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_U | QUARK_REGION_D,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
0);
}
/**
*
* @file quark/codelet_zttlqt.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttlqt Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void CORE_zttlqt_quark(Quark *quark)
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
CORE_zttlqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_zttlqt computes a LQ factorization of a rectangular matrix
* formed by coupling side-by-side a complex M-by-M lower triangular tile A1
* and a complex M-by-N lower triangular tile A2:
*
* | A1 A2 | = L * Q
*
* The tile Q is represented as a product of elementary reflectors
*
* Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v'
*
* where tau is a complex scalar, and v is a complex vector with
* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
* A2(i,1:n), and tau in TAU(i).
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the tile A1 and A2. M >= 0.
* The number of columns of the tile A1.
*
* @param[in] N
* The number of columns of the tile A2. N >= 0.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M-by-M tile A1.
* On exit, the elements on and below the diagonal of the array
* contain the M-by-M lower trapezoidal tile L;
* the elements above the diagonal are not referenced.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,N).
*
* @param[in,out] A2
* On entry, the M-by-N lower triangular tile A2.
* On exit, the elements on and below the diagonal of the array
* with the array TAU, represent
* the unitary tile Q as a product of elementary reflectors
* (see Further Details).
*
* @param[in] LDA2
* The leading dimension of the array A2. LDA2 >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* @param[in,out] WORK
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_zttlqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TTLQT;
QUARK_Insert_Task(opt->quark, CORE_zttlqt_quark, (Quark_Task_Flags*)opt,
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_L | QUARK_REGION_D,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | QUARK_REGION_L | QUARK_REGION_D | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
0);
}
/**
*
* @file quark/codelet_zttmlq.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttmlq Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void CORE_zttmlq_quark(Quark *quark)
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, A1, lda1,
A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_zttmlq overwrites the general complex M1-by-N1 tile A1 and
* M2-by-N2 tile A2 (N1 == N2) with
*
* SIDE = 'L' SIDE = 'R'
* TRANS = 'N': Q * | A1 | | A1 | * Q
* | A2 | | A2 |
*
* TRANS = 'C': Q**H * | A1 | | A1 | * Q**H
* | A2 | | A2 |
*
* where Q is a complex unitary matrix defined as the product of k
* elementary reflectors
*
* Q = H(1) H(2) . . . H(k)
*
* as returned by CORE_zttqrt.
*
*******************************************************************************
*
* @param[in] side
* @arg ChamLeft : apply Q or Q**H from the Left;
* @arg ChamRight : apply Q or Q**H from the Right.
*
* @param[in] trans
* @arg ChamNoTrans : No transpose, apply Q;
* @arg ChamConjTrans : ConjTranspose, apply Q**H.
*
* @param[in] M1
* The number of rows of the tile A1. M1 >= 0.
*
* @param[in] N1
* The number of columns of the tile A1. N1 >= 0.
*
* @param[in] M2
* The number of rows of the tile A2. M2 >= 0.
*
* @param[in] N2
* The number of columns of the tile A2. N2 >= 0.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M1-by-N1 tile A1.
* On exit, A1 is overwritten by the application of Q.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M1).
*
* @param[in,out] A2
* On entry, the M2-by-N2 tile A2.
* On exit, A2 is overwritten by the application of Q.
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M2).
*
* @param[in] V
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* CORE_ZTTQRT in the first k rows of its array argument V.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
*
* @param[in] T
* The IB-by-N1 triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] WORK
* Workspace array of size LDWORK-by-N1.
*
* @param[in] LDWORK
* The dimension of the array WORK. LDWORK >= max(1,IB).
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_zttmlq(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TTMLQ;
QUARK_Insert_Task(opt->quark, CORE_zttmlq_quark, (Quark_Task_Flags*)opt,
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT | QUARK_REGION_L | QUARK_REGION_D,
sizeof(int), &ldv, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
0);
}
/**
*
* @file quark/codelet_zttmqr.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttmqr Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
static void
CORE_zttmqr_quark( Quark *quark )
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_zttmqr overwrites the general complex M1-by-N1 tile A1 and
* M2-by-N2 tile A2 (N1 == N2) with
*
* SIDE = 'L' SIDE = 'R'
* TRANS = 'N': Q * | A1 | | A1 | * Q
* | A2 | | A2 |
*
* TRANS = 'C': Q**H * | A1 | | A1 | * Q**H
* | A2 | | A2 |
*
* where Q is a complex unitary matrix defined as the product of k
* elementary reflectors
*
* Q = H(1) H(2) . . . H(k)
*
* as returned by CORE_zttqrt.
*
*******************************************************************************
*
* @param[in] side
* @arg ChamLeft : apply Q or Q**H from the Left;
* @arg ChamRight : apply Q or Q**H from the Right.
*
* @param[in] trans
* @arg ChamNoTrans : No transpose, apply Q;
* @arg ChamConjTrans : ConjTranspose, apply Q**H.
*
* @param[in] M1
* The number of rows of the tile A1. M1 >= 0.
*
* @param[in] N1
* The number of columns of the tile A1. N1 >= 0.
*
* @param[in] M2
* The number of rows of the tile A2. M2 >= 0.
*
* @param[in] N2
* The number of columns of the tile A2. N2 >= 0.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M1-by-N1 tile A1.
* On exit, A1 is overwritten by the application of Q.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M1).
*
* @param[in,out] A2
* On entry, the M2-by-N2 tile A2.
* On exit, A2 is overwritten by the application of Q.
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M2).
*
* @param[in] V
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* CORE_ZTTQRT in the first k rows of its array argument V.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
*
* @param[in] T
* The IB-by-N1 triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] WORK
* Workspace array of size LDWORK-by-N1.
*
* @param[in] LDWORK
* The dimension of the array WORK. LDWORK >= max(1,IB).
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_zttmqr(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
int ldwork = side == ChamLeft ? ib : nb;
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TTMQR;
QUARK_Insert_Task(opt->quark, CORE_zttmqr_quark, (Quark_Task_Flags*)opt,
sizeof(int), &side, VALUE,
sizeof(int), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT | QUARK_REGION_U | QUARK_REGION_D,
sizeof(int), &ldv, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
0);
}
/**
*
* @file quark/codelet_zttqrt.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zttqrt Quark codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_quark.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void CORE_zttqrt_quark(Quark *quark)
{
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU;
CHAMELEON_Complex64_t *WORK;
quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
CORE_zttqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_zttqrt computes a QR factorization of a rectangular matrix
* formed by coupling a complex N-by-N upper triangular tile A1
* on top of a complex M-by-N upper trapezoidal tile A2:
*
* | A1 | = Q * R
* | A2 |
*
* The tile Q is represented as a product of elementary reflectors
*
* Q = H(1) H(2) . . . H(k), where k = min(M,N).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v'
*
* where tau is a complex scalar, and v is a complex vector with
* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i),
* and tau in TAU(i).
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the tile A2. M >= 0.
*
* @param[in] N
* The number of columns of the tile A1 and A2. N >= 0.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the N-by-N tile A1.
* On exit, the elements on and above the diagonal of the array
* contain the N-by-N upper trapezoidal tile R;
* the elements below the diagonal are not referenced.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,N).
*
* @param[in,out] A2
* On entry, the M-by-N upper triangular tile A2.
* On exit, the elements on and above the diagonal of the array
* with the array TAU, represent
* the unitary tile Q as a product of elementary reflectors
* (see Further Details).
*
* @param[in] LDA2
* The leading dimension of the array A2. LDA2 >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* @param[in,out] WORK
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_zttqrt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
quark_option_t *opt = (quark_option_t*)(options->schedopt);
DAG_CORE_TTQRT;
QUARK_Insert_Task(opt->quark, CORE_zttqrt_quark, (Quark_Task_Flags*)opt,
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_U | QUARK_REGION_D,
sizeof(int), &lda1, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | QUARK_REGION_U | QUARK_REGION_D | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH,
sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
0);
}
/**
*
* @file starpu/codelet_ztslqt.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztslqt StarPU codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
*/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_ztslqt computes a LQ factorization of a rectangular matrix
* formed by coupling side-by-side a complex M-by-M
* lower triangular tile A1 and a complex M-by-N tile A2:
*
* | A1 A2 | = L * Q
*
* The tile Q is represented as a product of elementary reflectors
*
* Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v'
*
* where tau is a complex scalar, and v is a complex vector with
* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
* A2(i,1:n), and tau in TAU(i).
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the tile A1 and A2. M >= 0.
* The number of columns of the tile A1.
*
* @param[in] N
* The number of columns of the tile A2. N >= 0.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M-by-M tile A1.
* On exit, the elements on and below the diagonal of the array
* contain the M-by-M lower trapezoidal tile L;
* the elements above the diagonal are not referenced.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M).
*
* @param[in,out] A2
* On entry, the M-by-N tile A2.
* On exit, all the elements with the array TAU, represent
* the unitary tile Q as a product of elementary reflectors
* (see Further Details).
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* @param[out] WORK
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_ztslqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
(void)nb;
struct starpu_codelet *codelet = &cl_ztslqt;
void (*callback)(void*) = options->profiling ? cl_ztslqt_callback : NULL;
CHAMELEON_starpu_ws_t *h_work = (CHAMELEON_starpu_ws_t*)(options->ws_host);
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A1, A1m, A1n);
CHAMELEON_ACCESS_RW(A2, A2m, A2n);
CHAMELEON_ACCESS_W(T, Tm, Tn);
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),
STARPU_VALUE, &lda1, sizeof(int),
STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),
STARPU_VALUE, &lda2, sizeof(int),
STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH, options->ws_worker,
/* /\* 2 * ib * (nb+ib) + nb *\/ */
STARPU_VALUE, &h_work, sizeof(CHAMELEON_starpu_ws_t *),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztslqt",
#endif
0);
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztslqt_cpu_func(void *descr[], void *cl_arg)
{
CHAMELEON_starpu_ws_t *h_work;
int m;
int n;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *TAU, *WORK;
A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
TAU= (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* nb + ib*nb */
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
WORK = TAU + chameleon_max( m, n );
CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU(ztslqt, 4, cl_ztslqt_cpu_func)
/**
*
* @file starpu/codelet_ztsmlq.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon ztsmlq StarPU codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Azzam Haidar
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2018-11-07
* @precisions normal z -> c d s
*
*/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
* CORE_ztsmlq overwrites the general complex M1-by-N1 tile A1 and
* M2-by-N2 tile A2 with
*
* SIDE = 'L' SIDE = 'R'
* TRANS = 'N': Q * | A1 | | A1 A2 | * Q
* | A2 |
*
* TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H
* | A2 |
*
* where Q is a complex unitary matrix defined as the product of k
* elementary reflectors
*
* Q = H(k)' . . . H(2)' H(1)'
*
* as returned by CORE_ZTSLQT.
*
*******************************************************************************
*
* @param[in] side
* @arg ChamLeft : apply Q or Q**H from the Left;
* @arg ChamRight : apply Q or Q**H from the Right.
*
* @param[in] trans
* @arg ChamNoTrans : No transpose, apply Q;
* @arg ChamConjTrans : ConjTranspose, apply Q**H.
*
* @param[in] M1
* The number of rows of the tile A1. M1 >= 0.
*
* @param[in] N1
* The number of columns of the tile A1. N1 >= 0.
*
* @param[in] M2
* The number of rows of the tile A2. M2 >= 0.
* M2 = M1 if side == ChamRight.
*
* @param[in] N2
* The number of columns of the tile A2. N2 >= 0.
* N2 = N1 if side == ChamLeft.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M1-by-N1 tile A1.
* On exit, A1 is overwritten by the application of Q.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M1).
*
* @param[in,out] A2
* On entry, the M2-by-N2 tile A2.
* On exit, A2 is overwritten by the application of Q.
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M2).
*
* @param[in] V
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* CORE_ZTSLQT in the first k rows of its array argument V.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
*
* @param[in] T
* The IB-by-N1 triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] WORK
* Workspace array of size
* LDWORK-by-M1 if side == ChamLeft
* LDWORK-by-IB if side == ChamRight
*
* @param[in] LDWORK
* The leading dimension of the array WORK.
* LDWORK >= max(1,IB) if side == ChamLeft
* LDWORK >= max(1,N1) if side == ChamRight
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options,
cham_side_t side, cham_trans_t trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
const CHAM_desc_t *V, int Vm, int Vn, int ldv,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
struct starpu_codelet *codelet = &cl_ztsmlq;
void (*callback)(void*) = options->profiling ? cl_ztsmlq_callback : NULL;
int ldwork = side == ChamLeft ? ib : nb;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A1, A1m, A1n);
CHAMELEON_ACCESS_RW(A2, A2m, A2n);
CHAMELEON_ACCESS_R(V, Vm, Vn);
CHAMELEON_ACCESS_R(T, Tm, Tn);
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &side, sizeof(int),
STARPU_VALUE, &trans, sizeof(int),
STARPU_VALUE, &m1, sizeof(int),
STARPU_VALUE, &n1, sizeof(int),
STARPU_VALUE, &m2, sizeof(int),
STARPU_VALUE, &n2, sizeof(int),
STARPU_VALUE, &k, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),
STARPU_VALUE, &lda1, sizeof(int),
STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),
STARPU_VALUE, &lda2, sizeof(int),
STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* max( ib*nb, 3*ib*nb ) */
STARPU_SCRATCH, options->ws_worker,
STARPU_VALUE, &ldwork, sizeof(int),
STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "ztsmlq",
#endif
0);
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztsmlq_cpu_func(void *descr[], void *cl_arg)
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
CHAMELEON_Complex64_t *A1;
int lda1;
CHAMELEON_Complex64_t *A2;
int lda2;
CHAMELEON_Complex64_t *V;
int ldv;
CHAMELEON_Complex64_t *T;
int ldt;
CHAMELEON_Complex64_t *WORK;
int ldwork;
size_t lwork;
A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
&lda1, &lda2, &ldv, &ldt, &ldwork, &lwork );
CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
(void)lwork;
}
#if defined(CHAMELEON_USE_CUDA)
static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
{
cham_side_t side;
cham_trans_t trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
cuDoubleComplex *A1;
int lda1;
cuDoubleComplex *A2;
int lda2;
cuDoubleComplex *V;
int ldv;
cuDoubleComplex *T;
int ldt;
cuDoubleComplex *W;
int ldwork;
size_t lwork;
A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
V = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
&lda1, &lda2, &ldv, &ldt, &ldwork, &lwork );
RUNTIME_getStream(stream);
CUDA_ztsmlq( side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt,
W, lwork, stream );
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS(ztsmlq, 5, cl_ztsmlq_cpu_func, cl_ztsmlq_cuda_func, STARPU_CUDA_ASYNC)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment