diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index 28aeaa1b889e0eb5fe16618b02d9bebd73de905a..8265e990686f22639acc23d0919348d017947cb2 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -20,7 +20,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede - * @date 2010-11-15 + * @date 2018-11-07 * @precisions normal z -> c d s * */ @@ -31,435 +31,499 @@ * Declarations of QUARK wrappers (called by CHAMELEON) - alphabetical order */ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, - cham_store_t storev, cham_uplo_t uplo, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn ); + cham_store_t storev, cham_uplo_t uplo, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_zlascal( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An, int lda ); + cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int lda ); void INSERT_TASK_zbrdalg( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int N, int NB, - const CHAM_desc_t *A, - const CHAM_desc_t *C, int Cm, int Cn, - const CHAM_desc_t *S, int Sm, int Sn, - int i, int j, int m, int grsiz, int BAND, - int *PCOL, int *ACOL, int *MCOL ); + cham_uplo_t uplo, + int N, int NB, + const CHAM_desc_t *A, + const CHAM_desc_t *C, int Cm, int Cn, + const CHAM_desc_t *S, int Sm, int Sn, + int i, int j, int m, int grsiz, int BAND, + int *PCOL, int *ACOL, int *MCOL ); void INSERT_TASK_zgelqt( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ); void INSERT_TASK_zgemm( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zgemm2( const RUNTIME_option_t *options, cham_trans_t transA, cham_trans_t transB, int m, int n, int k, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zgemm2( const RUNTIME_option_t *options, + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zgemm_f2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, + const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, + const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); void INSERT_TASK_zgemm_p2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAMELEON_Complex64_t **B, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAMELEON_Complex64_t **B, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zgemm_p2f1( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAMELEON_Complex64_t **B, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 ); + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAMELEON_Complex64_t **B, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, + const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 ); void INSERT_TASK_zgemm_p3( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc ); + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc ); void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ); void INSERT_TASK_zgessm( const RUNTIME_option_t *options, - int m, int n, int k, int ib, int nb, - int *IPIV, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - const CHAM_desc_t *D, int Dm, int Dn, int ldd, - const CHAM_desc_t *A, int Am, int An, int lda ); + int m, int n, int k, int ib, int nb, + int *IPIV, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + const CHAM_desc_t *D, int Dm, int Dn, int ldd, + const CHAM_desc_t *A, int Am, int An, int lda ); void INSERT_TASK_zgessq( const RUNTIME_option_t *options, - int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int *IPIV, - cham_bool_t check_info, int iinfo ); + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int *IPIV, + cham_bool_t check_info, int iinfo ); void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - int *IPIV, - cham_bool_t check_info, int iinfo ); + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo ); void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, int iinfo ); + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, int iinfo ); void INSERT_TASK_zgetrf_reclap( const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int *IPIV, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int *IPIV, - cham_bool_t check_info, int iinfo, - int nbthread ); + cham_bool_t check_info, int iinfo, + int nbthread ); void INSERT_TASK_zgetrf_rectil( const RUNTIME_option_t *options, - const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size, - int *IPIV, + const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size, + int *IPIV, - cham_bool_t check_info, int iinfo, - int nbthread ); + cham_bool_t check_info, int iinfo, + int nbthread ); void INSERT_TASK_zgetrip( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA ); + int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA ); void INSERT_TASK_zgetrip_f1( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, - const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF ); + int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, + const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF ); void INSERT_TASK_zgetrip_f2( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 ); + int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, + const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1, + const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 ); void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + cham_uplo_t uplo, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_zhemm( const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zhegst( const RUNTIME_option_t *options, - int itype, cham_uplo_t uplo, int N, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn, int LDB, - int iinfo ); + int itype, cham_uplo_t uplo, int N, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn, int LDB, + int iinfo ); void INSERT_TASK_zherk( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, int lda, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zher2k( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int LDB, + double alpha, const CHAM_desc_t *A, int Am, int An, int lda, double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zher2k( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int LDB, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zherfb( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc ); + cham_uplo_t uplo, + int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ); -void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ); +void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int mb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_zlange( const RUNTIME_option_t *options, - cham_normtype_t norm, int M, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn ); + cham_normtype_t norm, int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_zlange_max( const RUNTIME_option_t *options, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn ); + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_zhessq( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_zlanhe( const RUNTIME_option_t *options, - cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn ); + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_zlansy( const RUNTIME_option_t *options, - cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn ); + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_zlantr( const RUNTIME_option_t *options, - cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, - int M, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn ); + cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, + int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_zlaset( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); -void INSERT_TASK_zlaset2( const RUNTIME_option_t *options, cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); + CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); +void INSERT_TASK_zlaset2( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); void INSERT_TASK_zlaswp( const RUNTIME_option_t *options, - int n, const CHAM_desc_t *A, int Am, int An, int lda, - int i1, int i2, int *ipiv, int inc ); + int n, const CHAM_desc_t *A, int Am, int An, int lda, + int i1, int i2, int *ipiv, int inc ); void INSERT_TASK_zlaswp_f2( const RUNTIME_option_t *options, - int n, const CHAM_desc_t *A, int Am, int An, int lda, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); + int n, const CHAM_desc_t *A, int Am, int An, int lda, + int i1, int i2, int *ipiv, int inc, + const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, + const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); void INSERT_TASK_zlaswp_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel ); -void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); -void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options, const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel ); +void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options, + const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, + int i1, int i2, int *ipiv, int inc, + const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, + const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); +void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options, + const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, + int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel ); void INSERT_TASK_zlatro( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_zlauum( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda ); + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda ); void INSERT_TASK_zplghe( const RUNTIME_option_t *options, - double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ); + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ); void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, - CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ); + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ); void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ); + int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ); void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo ); + int iinfo ); void INSERT_TASK_zshift( const RUNTIME_option_t *options, - int s, int m, int n, int L, - CHAMELEON_Complex64_t *A ); + int s, int m, int n, int L, + CHAMELEON_Complex64_t *A ); void INSERT_TASK_zshiftw( const RUNTIME_option_t *options, - int s, int cl, int m, int n, int L, - const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W ); + int s, int cl, int m, int n, int L, + const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W ); void INSERT_TASK_zssssm( const RUNTIME_option_t *options, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, - const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, - const int *IPIV ); + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, + const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, + const int *IPIV ); void INSERT_TASK_zsymm( const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int LDB, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int LDB, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo ); + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo ); void INSERT_TASK_zswpab( const RUNTIME_option_t *options, - int i, int n1, int n2, - const CHAM_desc_t *A, int Am, int An, int szeA ); + int i, int n1, int n2, + const CHAM_desc_t *A, int Am, int An, int szeA ); void INSERT_TASK_zswptr_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak ); + const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn, + int i1, int i2, int *ipiv, int inc, + const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak ); void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, - int m, int n, int l, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, + int m, int n, int l, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ); +void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m, int n, int k, int l, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int l, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, - int m, int n, int l, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); + int m, int n, int l, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ); void INSERT_TASK_ztrdalg( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int N, int NB, - const CHAM_desc_t *A, - const CHAM_desc_t *C, int Cm, int Cn, - const CHAM_desc_t *S, int Sm, int Sn, - int i, int j, int m, int grsiz, int BAND, - int *PCOL, int *ACOL, int *MCOL ); + cham_uplo_t uplo, + int N, int NB, + const CHAM_desc_t *A, + const CHAM_desc_t *C, int Cm, int Cn, + const CHAM_desc_t *S, int Sm, int Sn, + int i, int j, int m, int grsiz, int BAND, + int *PCOL, int *ACOL, int *MCOL ); void INSERT_TASK_ztradd( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_ztrasm( const RUNTIME_option_t *options, - cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn ); + cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_ztrmm_p2( const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t **B, int ldb ); + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t **B, int ldb ); void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ); + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ); void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, - int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); + cham_uplo_t uplo, cham_diag_t diag, + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, + cham_uplo_t uplo, cham_diag_t diag, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo ); -void INSERT_TASK_ztslqt( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_ztsmlq( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); + int iinfo ); void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_ztsmqr( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ); void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_ztsqrt( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ); void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *U, int Um, int Un, int ldu, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - int *IPIV, - cham_bool_t check_info, int iinfo ); -void INSERT_TASK_zttmqr( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_zttqrt( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_zttmlq( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, + int m, int n, int ib, int nb, + const CHAM_desc_t *U, int Um, int Un, int ldu, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo ); +void INSERT_TASK_zpamm( const RUNTIME_option_t *options, + int op, cham_side_t side, cham_store_t storev, + int m, int n, int k, int l, const CHAM_desc_t *A1, int A1m, int A1n, int lda1, const CHAM_desc_t *A2, int A2m, int A2n, int lda2, const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_zttlqt( const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_zpamm( const RUNTIME_option_t *options, - int op, cham_side_t side, cham_store_t storev, - int m, int n, int k, int l, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *W, int Wm, int Wn, int ldw ); + const CHAM_desc_t *W, int Wm, int Wn, int ldw ); void INSERT_TASK_zplssq( const RUNTIME_option_t *options, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, - const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ); + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ); void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ); + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ); void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m, int n, int ib, int nb, int k, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc ); + cham_side_t side, cham_trans_t trans, + int m, int n, int ib, int nb, int k, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc ); + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zbuild( const RUNTIME_option_t *options, - const CHAM_desc_t *A, int Am, int An, int lda, - void *user_data, void* user_build_callback ); + const CHAM_desc_t *A, int Am, int An, int lda, + void *user_data, void* user_build_callback ); + + +/** + * Keep these insert_task for retro-compatibility + */ +static inline void +INSERT_TASK_ztslqt( const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztplqt( options, m, n, 0, ib, nb, + A1, A1m, A1n, lda1, + A2, A2m, A2n, lda2, + T, Tm, Tn, ldt ); +} + +static inline void +INSERT_TASK_ztsqrt( const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztpqrt( options, m, n, 0, ib, nb, + A1, A1m, A1n, lda1, + A2, A2m, A2n, lda2, + T, Tm, Tn, ldt ); +} + +static inline void +INSERT_TASK_zttlqt( const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztplqt( options, m, n, n, ib, nb, + A1, A1m, A1n, lda1, + A2, A2m, A2n, lda2, + T, Tm, Tn, ldt ); +} + +static inline void +INSERT_TASK_zttqrt( const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztpqrt( options, m, n, m, ib, nb, + A1, A1m, A1n, lda1, + A2, A2m, A2n, lda2, + T, Tm, Tn, ldt ); +} + +static inline void +INSERT_TASK_ztsmlq( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, 0, ib, nb, + V, Vm, Vn, ldv, T, Tm, Tn, ldt, + A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); +} + +static inline void +INSERT_TASK_ztsmqr( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, 0, ib, nb, + V, Vm, Vn, ldv, T, Tm, Tn, ldt, + A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); +} + +static inline void +INSERT_TASK_zttmlq( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, n2, ib, nb, + V, Vm, Vn, ldv, T, Tm, Tn, ldt, + A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); +} + +static inline void +INSERT_TASK_zttmqr( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, m2, ib, nb, + V, Vm, Vn, ldv, T, Tm, Tn, ldt, + A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); +} #endif /* _chameleon_tasks_z_h_ */ diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index f14ef4838888f5481b52774b5b63116825fc00d8..73503ee0ce89aff056eb5971f4904bdbe0787315 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -21,7 +21,7 @@ # @author Cedric Castagnede # @author Emmanuel Agullo # @author Mathieu Faverge -# @date 2012-07-13 +# @date 2018-11-07 # ### @@ -86,17 +86,9 @@ set(CODELETS_ZSRC codelets/codelet_ztrasm.c codelets/codelet_ztrssq.c codelets/codelet_ztrtri.c - codelets/codelet_ztslqt.c - codelets/codelet_ztsmlq.c - codelets/codelet_ztsmqr.c codelets/codelet_ztsmlq_hetra1.c codelets/codelet_ztsmqr_hetra1.c - codelets/codelet_ztsqrt.c codelets/codelet_ztstrf.c - codelets/codelet_zttlqt.c - codelets/codelet_zttmlq.c - codelets/codelet_zttmqr.c - codelets/codelet_zttqrt.c codelets/codelet_zunmlq.c codelets/codelet_zunmqr.c ################## diff --git a/runtime/parsec/codelets/codelet_ztslqt.c b/runtime/parsec/codelets/codelet_ztslqt.c deleted file mode 100644 index 89c8721131a948e63657ebbd08eea7c553dca5c2..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_ztslqt.c +++ /dev/null @@ -1,70 +0,0 @@ -/** - * - * @file parsec/codelet_ztslqt.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztslqt PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_ztslqt_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - parsec_dtd_unpack_args( - this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK ); - - CORE_ztslqt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK ); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_ztslqt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_ztslqt_parsec, options->priority, "tslqt", - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_ztsmlq.c b/runtime/parsec/codelets/codelet_ztsmlq.c deleted file mode 100644 index 56b86887aec4bcf6c3ad6685d6f027a40b188491..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_ztsmlq.c +++ /dev/null @@ -1,89 +0,0 @@ -/** - * - * @file parsec/codelet_ztsmlq.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsmlq PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_ztsmlq_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - parsec_dtd_unpack_args( - this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork ); - - CORE_ztsmlq( side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_ztsmlq_parsec, options->priority, "tsmlq", - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT, - sizeof(int), &ldv, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_ztsmqr.c b/runtime/parsec/codelets/codelet_ztsmqr.c deleted file mode 100644 index e8059bde8466f5957cd435dbc0e1a7b3b55714eb..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_ztsmqr.c +++ /dev/null @@ -1,89 +0,0 @@ -/** - * - * @file parsec/codelet_ztsmqr.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsmqr PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_ztsmqr_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - parsec_dtd_unpack_args( - this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork ); - - CORE_ztsmqr( side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_ztsmqr_parsec, options->priority, "tsmqr", - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT, - sizeof(int), &ldv, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_ztsqrt.c b/runtime/parsec/codelets/codelet_ztsqrt.c deleted file mode 100644 index a8edb3c0fdf0562f9c374bdb2100919d6f825b97..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_ztsqrt.c +++ /dev/null @@ -1,70 +0,0 @@ -/** - * - * @file parsec/codelet_ztsqrt.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsqrt PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_ztsqrt_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - parsec_dtd_unpack_args( - this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK ); - - CORE_ztsqrt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK ); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_ztsqrt_parsec, options->priority, "tsqrt", - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_zttlqt.c b/runtime/parsec/codelets/codelet_zttlqt.c deleted file mode 100644 index 1a72dd5cffefc3c4cc929558042fa10d5343aa33..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_zttlqt.c +++ /dev/null @@ -1,71 +0,0 @@ -/** - * - * @file parsec/codelet_zttlqt.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttlqt PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_zttlqt_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - parsec_dtd_unpack_args( - this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK ); - - CORE_zttlqt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK ); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_zttlqt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_zttlqt_parsec, options->priority, "ttlqt", - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_zttmlq.c b/runtime/parsec/codelets/codelet_zttmlq.c deleted file mode 100644 index b0788876c79e5197cbc30ffa4880b77a68531ff5..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_zttmlq.c +++ /dev/null @@ -1,89 +0,0 @@ -/** - * - * @file parsec/codelet_zttmlq.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttmlq PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_zttmlq_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - parsec_dtd_unpack_args( - this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork ); - - CORE_zttmlq( side, trans, m1, n1, m2, n2, k, ib, A1, lda1, - A2, lda2, V, ldv, T, ldt, WORK, ldwork); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_zttmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_zttmlq_parsec, options->priority, "ttmlq", - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT, - sizeof(int), &ldv, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_zttmqr.c b/runtime/parsec/codelets/codelet_zttmqr.c deleted file mode 100644 index f8a8b8f6bc9fb415323554d7eb8efea1dab0725d..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_zttmqr.c +++ /dev/null @@ -1,90 +0,0 @@ -/** - * - * @file parsec/codelet_zttmqr.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttmqr PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_zttmqr_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - parsec_dtd_unpack_args( - this_task, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &A1, &lda1, &A2, &lda2, &V, &ldv, &T, &ldt, &WORK, &ldwork ); - - CORE_zttmqr( side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - - -void INSERT_TASK_zttmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_zttmqr_parsec, options->priority, "ttmqr", - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT, - sizeof(int), &ldv, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_zttqrt.c b/runtime/parsec/codelets/codelet_zttqrt.c deleted file mode 100644 index 6b22180076622b164425b06089774675fc873cfb..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_zttqrt.c +++ /dev/null @@ -1,70 +0,0 @@ -/** - * - * @file parsec/codelet_zttqrt.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttqrt PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_zttqrt_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - parsec_dtd_unpack_args( - this_task, &m, &n, &ib, &A1, &lda1, &A2, &lda2, &T, &ldt, &TAU, &WORK ); - - CORE_zttqrt( m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK ); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_zttqrt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_zttqrt_parsec, options->priority, "ttqrt", - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/quark/codelets/codelet_ztslqt.c b/runtime/quark/codelets/codelet_ztslqt.c deleted file mode 100644 index 4efb19be91de19cb3819cf674e4a08a9f1defaaf..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_ztslqt.c +++ /dev/null @@ -1,143 +0,0 @@ -/** - * - * @file quark/codelet_ztslqt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztslqt Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_ztslqt_quark(Quark *quark) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); - CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztslqt computes a LQ factorization of a rectangular matrix - * formed by coupling side-by-side a complex M-by-M - * lower triangular tile A1 and a complex M-by-N tile A2: - * - * | A1 A2 | = L * Q - * - * The tile Q is represented as a product of elementary reflectors - * - * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). - * - * Each H(i) has the form - * - * H(i) = I - tau * v * v' - * - * where tau is a complex scalar, and v is a complex vector with - * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - * A2(i,1:n), and tau in TAU(i). - * - ******************************************************************************* - * - * @param[in] M - * The number of rows of the tile A1 and A2. M >= 0. - * The number of columns of the tile A1. - * - * @param[in] N - * The number of columns of the tile A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M-by-M tile A1. - * On exit, the elements on and below the diagonal of the array - * contain the M-by-M lower trapezoidal tile L; - * the elements above the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, all the elements with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_ztslqt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TSLQT; - QUARK_Insert_Task(opt->quark, CORE_ztslqt_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_L | QUARK_REGION_D, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - 0); -} diff --git a/runtime/quark/codelets/codelet_ztsmlq.c b/runtime/quark/codelets/codelet_ztsmlq.c deleted file mode 100644 index b3003d130f6c08c8e5b60130b13b1baf5653ed04..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_ztsmlq.c +++ /dev/null @@ -1,190 +0,0 @@ -/** - * - * @file quark/codelet_ztsmlq.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsmlq Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Azzam Haidar - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_ztsmlq_quark(Quark *quark) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztsmlq overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 A2 | * Q - * | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H - * | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(k)' . . . H(2)' H(1)' - * - * as returned by CORE_ZTSLQT. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * M2 = M1 if side == ChamRight. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * N2 = N1 if side == ChamLeft. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTSLQT in the first k rows of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[in] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size - * LDWORK-by-M1 if side == ChamLeft - * LDWORK-by-IB if side == ChamRight - * - * @param[in] LDWORK - * The leading dimension of the array WORK. - * LDWORK >= max(1,IB) if side == ChamLeft - * LDWORK >= max(1,N1) if side == ChamRight - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TSMLQ; - QUARK_Insert_Task(opt->quark, CORE_ztsmlq_quark, (Quark_Task_Flags*)opt, - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT, - sizeof(int), &ldv, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_ztsmqr.c b/runtime/quark/codelets/codelet_ztsmqr.c deleted file mode 100644 index afcde5dfa02d9f67e8363f57698b47bd256ea59c..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_ztsmqr.c +++ /dev/null @@ -1,190 +0,0 @@ -/** - * - * @file quark/codelet_ztsmqr.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsmqr Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Azzam Haidar - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_ztsmqr_quark(Quark *quark) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztsmqr overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 A2 | * Q - * | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H - * | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(1) H(2) . . . H(k) - * - * as returned by CORE_ZTSQRT. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * M2 = M1 if side == ChamRight. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * N2 = N1 if side == ChamLeft. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTSQRT in the first k columns of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[in] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size - * LDWORK-by-N1 if side == ChamLeft - * LDWORK-by-IB if side == ChamRight - * - * @param[in] LDWORK - * The leading dimension of the array WORK. - * LDWORK >= max(1,IB) if side == ChamLeft - * LDWORK >= max(1,M1) if side == ChamRight - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TSMQR; - QUARK_Insert_Task(opt->quark, CORE_ztsmqr_quark, (Quark_Task_Flags*)opt, - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT, - sizeof(int), &ldv, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_ztsqrt.c b/runtime/quark/codelets/codelet_ztsqrt.c deleted file mode 100644 index 44457debb9112a93fd00ad3a6f6585d7f9d49ef0..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_ztsqrt.c +++ /dev/null @@ -1,131 +0,0 @@ -/** - * - * @file quark/codelet_ztsqrt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsqrt Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_ztsqrt_quark(Quark *quark) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); - CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztsqrt computes a QR factorization of a rectangular matrix - * formed by coupling a complex N-by-N upper triangular tile A1 - * on top of a complex M-by-N tile A2: - * - * | A1 | = Q * R - * | A2 | - * - ******************************************************************************* - * - * @param[in] M - * The number of columns of the tile A2. M >= 0. - * - * @param[in] N - * The number of rows of the tile A1. - * The number of columns of the tiles A1 and A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the N-by-N tile A1. - * On exit, the elements on and above the diagonal of the array - * contain the N-by-N upper trapezoidal tile R; - * the elements below the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,N). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, all the elements with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TSQRT; - QUARK_Insert_Task(opt->quark, CORE_ztsqrt_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_U | QUARK_REGION_D, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - 0); -} diff --git a/runtime/quark/codelets/codelet_zttlqt.c b/runtime/quark/codelets/codelet_zttlqt.c deleted file mode 100644 index 85eb8e3d80859879501a3592eff0ec4628c88e13..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_zttlqt.c +++ /dev/null @@ -1,143 +0,0 @@ -/** - * - * @file quark/codelet_zttlqt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttlqt Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_zttlqt_quark(Quark *quark) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); - CORE_zttlqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttlqt computes a LQ factorization of a rectangular matrix - * formed by coupling side-by-side a complex M-by-M lower triangular tile A1 - * and a complex M-by-N lower triangular tile A2: - * - * | A1 A2 | = L * Q - * - * The tile Q is represented as a product of elementary reflectors - * - * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). - * - * Each H(i) has the form - * - * H(i) = I - tau * v * v' - * - * where tau is a complex scalar, and v is a complex vector with - * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - * A2(i,1:n), and tau in TAU(i). - * - ******************************************************************************* - * - * @param[in] M - * The number of rows of the tile A1 and A2. M >= 0. - * The number of columns of the tile A1. - * - * @param[in] N - * The number of columns of the tile A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M-by-M tile A1. - * On exit, the elements on and below the diagonal of the array - * contain the M-by-M lower trapezoidal tile L; - * the elements above the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,N). - * - * @param[in,out] A2 - * On entry, the M-by-N lower triangular tile A2. - * On exit, the elements on and below the diagonal of the array - * with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the array A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[in,out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void INSERT_TASK_zttlqt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TTLQT; - QUARK_Insert_Task(opt->quark, CORE_zttlqt_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_L | QUARK_REGION_D, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | QUARK_REGION_L | QUARK_REGION_D | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - 0); -} diff --git a/runtime/quark/codelets/codelet_zttmlq.c b/runtime/quark/codelets/codelet_zttmlq.c deleted file mode 100644 index f3701869ca32a8a1376a8cf04d500eb8049a36f0..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_zttmlq.c +++ /dev/null @@ -1,182 +0,0 @@ -/** - * - * @file quark/codelet_zttmlq.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttmlq Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_zttmlq_quark(Quark *quark) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, - A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttmlq overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 (N1 == N2) with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 | * Q - * | A2 | | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H - * | A2 | | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(1) H(2) . . . H(k) - * - * as returned by CORE_zttqrt. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTTQRT in the first k rows of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[in] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size LDWORK-by-N1. - * - * @param[in] LDWORK - * The dimension of the array WORK. LDWORK >= max(1,IB). - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void INSERT_TASK_zttmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TTMLQ; - QUARK_Insert_Task(opt->quark, CORE_zttmlq_quark, (Quark_Task_Flags*)opt, - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT | QUARK_REGION_L | QUARK_REGION_D, - sizeof(int), &ldv, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_zttmqr.c b/runtime/quark/codelets/codelet_zttmqr.c deleted file mode 100644 index e106a34ce42948c3786f10f3af9930368299a781..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_zttmqr.c +++ /dev/null @@ -1,183 +0,0 @@ -/** - * - * @file quark/codelet_zttmqr.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttmqr Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static void -CORE_zttmqr_quark( Quark *quark ) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttmqr overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 (N1 == N2) with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 | * Q - * | A2 | | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H - * | A2 | | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(1) H(2) . . . H(k) - * - * as returned by CORE_zttqrt. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTTQRT in the first k rows of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[in] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size LDWORK-by-N1. - * - * @param[in] LDWORK - * The dimension of the array WORK. LDWORK >= max(1,IB). - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void INSERT_TASK_zttmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - int ldwork = side == ChamLeft ? ib : nb; - - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TTMQR; - QUARK_Insert_Task(opt->quark, CORE_zttmqr_quark, (Quark_Task_Flags*)opt, - sizeof(int), &side, VALUE, - sizeof(int), &trans, VALUE, - sizeof(int), &m1, VALUE, - sizeof(int), &n1, VALUE, - sizeof(int), &m2, VALUE, - sizeof(int), &n2, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT | QUARK_REGION_U | QUARK_REGION_D, - sizeof(int), &ldv, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - sizeof(int), &ldwork, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_zttqrt.c b/runtime/quark/codelets/codelet_zttqrt.c deleted file mode 100644 index d5f62c44110d6d8f39baecfd9ca2ba75c611f0b1..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_zttqrt.c +++ /dev/null @@ -1,143 +0,0 @@ -/** - * - * @file quark/codelet_zttqrt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttqrt Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_zttqrt_quark(Quark *quark) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); - CORE_zttqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttqrt computes a QR factorization of a rectangular matrix - * formed by coupling a complex N-by-N upper triangular tile A1 - * on top of a complex M-by-N upper trapezoidal tile A2: - * - * | A1 | = Q * R - * | A2 | - * - * The tile Q is represented as a product of elementary reflectors - * - * Q = H(1) H(2) . . . H(k), where k = min(M,N). - * - * Each H(i) has the form - * - * H(i) = I - tau * v * v' - * - * where tau is a complex scalar, and v is a complex vector with - * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i), - * and tau in TAU(i). - * - ******************************************************************************* - * - * @param[in] M - * The number of rows of the tile A2. M >= 0. - * - * @param[in] N - * The number of columns of the tile A1 and A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the N-by-N tile A1. - * On exit, the elements on and above the diagonal of the array - * contain the N-by-N upper trapezoidal tile R; - * the elements below the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,N). - * - * @param[in,out] A2 - * On entry, the M-by-N upper triangular tile A2. - * On exit, the elements on and above the diagonal of the array - * with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the array A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[in,out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void INSERT_TASK_zttqrt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - quark_option_t *opt = (quark_option_t*)(options->schedopt); - DAG_CORE_TTQRT; - QUARK_Insert_Task(opt->quark, CORE_zttqrt_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &ib, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_U | QUARK_REGION_D, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | QUARK_REGION_U | QUARK_REGION_D | LOCALITY, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT, - sizeof(int), &ldt, VALUE, - sizeof(CHAMELEON_Complex64_t)*nb, NULL, SCRATCH, - sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH, - 0); -} diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c deleted file mode 100644 index 870e1349af880473ae1e92787e84e0a6ad3df035..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_ztslqt.c +++ /dev/null @@ -1,174 +0,0 @@ -/** - * - * @file starpu/codelet_ztslqt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztslqt StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztslqt computes a LQ factorization of a rectangular matrix - * formed by coupling side-by-side a complex M-by-M - * lower triangular tile A1 and a complex M-by-N tile A2: - * - * | A1 A2 | = L * Q - * - * The tile Q is represented as a product of elementary reflectors - * - * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). - * - * Each H(i) has the form - * - * H(i) = I - tau * v * v' - * - * where tau is a complex scalar, and v is a complex vector with - * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - * A2(i,1:n), and tau in TAU(i). - * - ******************************************************************************* - * - * @param[in] M - * The number of rows of the tile A1 and A2. M >= 0. - * The number of columns of the tile A1. - * - * @param[in] N - * The number of columns of the tile A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M-by-M tile A1. - * On exit, the elements on and below the diagonal of the array - * contain the M-by-M lower trapezoidal tile L; - * the elements above the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, all the elements with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_ztslqt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_ztslqt; - void (*callback)(void*) = options->profiling ? cl_ztslqt_callback : NULL; - CHAMELEON_starpu_ws_t *h_work = (CHAMELEON_starpu_ws_t*)(options->ws_host); - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_W(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - /* max( nb * (ib+1), ib * (ib+nb) ) */ - STARPU_SCRATCH, options->ws_worker, - /* /\* 2 * ib * (nb+ib) + nb *\/ */ - STARPU_VALUE, &h_work, sizeof(CHAMELEON_starpu_ws_t *), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztslqt", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztslqt_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *h_work; - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU, *WORK; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - TAU= (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* nb + ib*nb */ - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); - - WORK = TAU + chameleon_max( m, n ); - CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztslqt, 4, cl_ztslqt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztsmlq.c b/runtime/starpu/codelets/codelet_ztsmlq.c deleted file mode 100644 index ad1d0a7a0cafa70bb9a6fab798a3bb8625be3681..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_ztsmlq.c +++ /dev/null @@ -1,267 +0,0 @@ -/** - * - * @file starpu/codelet_ztsmlq.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsmlq StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Azzam Haidar - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2018-11-07 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztsmlq overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 A2 | * Q - * | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H - * | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(k)' . . . H(2)' H(1)' - * - * as returned by CORE_ZTSLQT. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * M2 = M1 if side == ChamRight. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * N2 = N1 if side == ChamLeft. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTSLQT in the first k rows of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[in] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size - * LDWORK-by-M1 if side == ChamLeft - * LDWORK-by-IB if side == ChamRight - * - * @param[in] LDWORK - * The leading dimension of the array WORK. - * LDWORK >= max(1,IB) if side == ChamLeft - * LDWORK >= max(1,N1) if side == ChamRight - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - struct starpu_codelet *codelet = &cl_ztsmlq; - void (*callback)(void*) = options->profiling ? cl_ztsmlq_callback : NULL; - int ldwork = side == ChamLeft ? ib : nb; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_R(V, Vm, Vn); - CHAMELEON_ACCESS_R(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &m1, sizeof(int), - STARPU_VALUE, &n1, sizeof(int), - STARPU_VALUE, &m2, sizeof(int), - STARPU_VALUE, &n2, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), - STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - /* max( ib*nb, 3*ib*nb ) */ - STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &ldwork, sizeof(int), - STARPU_VALUE, &(options->ws_wsize), sizeof(size_t), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztsmlq", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztsmlq_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - size_t lwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, - &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork ); - - CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - - (void)lwork; -} - -#if defined(CHAMELEON_USE_CUDA) -static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - cuDoubleComplex *A1; - int lda1; - cuDoubleComplex *A2; - int lda2; - cuDoubleComplex *V; - int ldv; - cuDoubleComplex *T; - int ldt; - cuDoubleComplex *W; - int ldwork; - size_t lwork; - - A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); - W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, - &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork ); - - RUNTIME_getStream(stream); - - CUDA_ztsmlq( side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, - W, lwork, stream ); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif -} -#endif /* defined(CHAMELEON_USE_CUDA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS(ztsmlq, 5, cl_ztsmlq_cpu_func, cl_ztsmlq_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c deleted file mode 100644 index 349aa129e837c6d271e8e1f1b3a2af686ba6d903..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_ztsmqr.c +++ /dev/null @@ -1,274 +0,0 @@ -/** - * - * @file starpu/codelet_ztsmqr.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsmqr StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Azzam Haidar - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2018-11-07 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztsmqr overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 A2 | * Q - * | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H - * | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(1) H(2) . . . H(k) - * - * as returned by CORE_ZTSQRT. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * M2 = M1 if side == ChamRight. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * N2 = N1 if side == ChamLeft. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTSQRT in the first k columns of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[in] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size - * LDWORK-by-N1 if side == ChamLeft - * LDWORK-by-IB if side == ChamRight - * - * @param[in] LDWORK - * The leading dimension of the array WORK. - * LDWORK >= max(1,IB) if side == ChamLeft - * LDWORK >= max(1,M1) if side == ChamRight - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_ztsmqr; - void (*callback)(void*) = options->profiling ? cl_ztsmqr_callback : NULL; - int ldwork = side == ChamLeft ? ib : nb; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_R(V, Vm, Vn); - CHAMELEON_ACCESS_R(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &m1, sizeof(int), - STARPU_VALUE, &n1, sizeof(int), - STARPU_VALUE, &m2, sizeof(int), - STARPU_VALUE, &n2, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), - STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - /* max( ib*nb, 2*ib*nb ) */ - STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &ldwork, sizeof(int), - STARPU_VALUE, &(options->ws_wsize), sizeof(size_t), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), -#endif -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztsmqr", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztsmqr_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - size_t lwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, - &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork); - - CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - - (void)lwork; -} - -#if defined(CHAMELEON_USE_CUDA) -static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - cuDoubleComplex *A1; - int lda1; - cuDoubleComplex *A2; - int lda2; - cuDoubleComplex *V; - int ldv; - cuDoubleComplex *T; - int ldt; - cuDoubleComplex *W; - int ldwork; - size_t lwork; - - A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); - W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, - &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork ); - - RUNTIME_getStream(stream); - - CUDA_ztsmqr( - side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, - W, lwork, stream ); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - (void)ldwork; -} -#endif /* defined(CHAMELEON_USE_CUDA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS(ztsmqr, 5, cl_ztsmqr_cpu_func, cl_ztsmqr_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c deleted file mode 100644 index cb93ba7c22c3f832f365f293545366f03863bd6c..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_ztsqrt.c +++ /dev/null @@ -1,166 +0,0 @@ -/** - * - * @file starpu/codelet_ztsqrt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztsqrt StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Jakub Kurzak - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_ztsqrt computes a QR factorization of a rectangular matrix - * formed by coupling a complex N-by-N upper triangular tile A1 - * on top of a complex M-by-N tile A2: - * - * | A1 | = Q * R - * | A2 | - * - ******************************************************************************* - * - * @param[in] M - * The number of columns of the tile A2. M >= 0. - * - * @param[in] N - * The number of rows of the tile A1. - * The number of columns of the tiles A1 and A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the N-by-N tile A1. - * On exit, the elements on and above the diagonal of the array - * contain the N-by-N upper trapezoidal tile R; - * the elements below the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,N). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, all the elements with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_ztsqrt; - void (*callback)(void*) = options->profiling ? cl_ztsqrt_callback : NULL; - CHAMELEON_starpu_ws_t *h_work = (CHAMELEON_starpu_ws_t*)(options->ws_host); - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_W(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn ), - STARPU_VALUE, &ldt, sizeof(int), - /* max( nb * (ib+1), ib * (ib+nb) ) */ - STARPU_SCRATCH, options->ws_worker, - /* 2 * ib * (nb+ib) + nb */ - STARPU_VALUE, &h_work, sizeof(CHAMELEON_starpu_ws_t *), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), -#endif -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztsqrt", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztsqrt_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *h_work; - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU, *WORK; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - TAU= (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* nb + ib*nb */ - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); - - WORK = TAU + chameleon_max( m, n ); - CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztsqrt, 4, cl_ztsqrt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zttlqt.c b/runtime/starpu/codelets/codelet_zttlqt.c deleted file mode 100644 index a673832082e5aaa466b31b53230f0f6421c77333..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_zttlqt.c +++ /dev/null @@ -1,173 +0,0 @@ -/** - * - * @file starpu/codelet_zttlqt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttlqt StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttlqt computes a LQ factorization of a rectangular matrix - * formed by coupling side-by-side a complex M-by-M lower triangular tile A1 - * and a complex M-by-N lower triangular tile A2: - * - * | A1 A2 | = L * Q - * - * The tile Q is represented as a product of elementary reflectors - * - * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). - * - * Each H(i) has the form - * - * H(i) = I - tau * v * v' - * - * where tau is a complex scalar, and v is a complex vector with - * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - * A2(i,1:n), and tau in TAU(i). - * - ******************************************************************************* - * - * @param[in] M - * The number of rows of the tile A1 and A2. M >= 0. - * The number of columns of the tile A1. - * - * @param[in] N - * The number of columns of the tile A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M-by-M tile A1. - * On exit, the elements on and below the diagonal of the array - * contain the M-by-M lower trapezoidal tile L; - * the elements above the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,N). - * - * @param[in,out] A2 - * On entry, the M-by-N lower triangular tile A2. - * On exit, the elements on and below the diagonal of the array - * with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the array A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[in,out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_zttlqt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zttlqt; - void (*callback)(void*) = options->profiling ? cl_zttlqt_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_W(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - /* nb * (ib+1) */ - STARPU_SCRATCH, options->ws_worker, - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zttlqt", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zttlqt_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* nb * (ib+1) */ - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt); - - WORK = TAU + chameleon_max( m, n ); - - CORE_zttlqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zttlqt, 4, cl_zttlqt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zttmlq.c b/runtime/starpu/codelets/codelet_zttmlq.c deleted file mode 100644 index c2924eafcef47724732584b52ab235208c578a78..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_zttmlq.c +++ /dev/null @@ -1,212 +0,0 @@ -/** - * - * @file starpu/codelet_zttmlq.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttmlq StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttmlq overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 (N1 == N2) with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 | * Q - * | A2 | | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H - * | A2 | | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(1) H(2) . . . H(k) - * - * as returned by CORE_zttqrt. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTTQRT in the first k rows of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[out] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size LDWORK-by-N1. - * - * @param[in] LDWORK - * The dimension of the array WORK. LDWORK >= max(1,IB). - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_zttmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zttmlq; - void (*callback)(void*) = options->profiling ? cl_zttmlq_callback : NULL; - int ldwork = side == ChamLeft ? ib : nb; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_R(V, Vm, Vn); - CHAMELEON_ACCESS_R(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &m1, sizeof(int), - STARPU_VALUE, &n1, sizeof(int), - STARPU_VALUE, &m2, sizeof(int), - STARPU_VALUE, &n2, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), - STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - /* nb * ib */ - STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &ldwork, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zttmlq", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zttmlq_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* nb * ib */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, - &lda1, &lda2, &ldv, &ldt, &ldwork); - - CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, - A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zttmlq, 5, cl_zttmlq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zttmqr.c b/runtime/starpu/codelets/codelet_zttmqr.c deleted file mode 100644 index 0f88020fc78c794112e37d26af6e9beff0daba53..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_zttmqr.c +++ /dev/null @@ -1,272 +0,0 @@ -/** - * - * @file starpu/codelet_zttmqr.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttmqr StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2018-11-07 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttmqr overwrites the general complex M1-by-N1 tile A1 and - * M2-by-N2 tile A2 (N1 == N2) with - * - * SIDE = 'L' SIDE = 'R' - * TRANS = 'N': Q * | A1 | | A1 | * Q - * | A2 | | A2 | - * - * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H - * | A2 | | A2 | - * - * where Q is a complex unitary matrix defined as the product of k - * elementary reflectors - * - * Q = H(1) H(2) . . . H(k) - * - * as returned by CORE_zttqrt. - * - ******************************************************************************* - * - * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. - * - * @param[in] trans - * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. - * - * @param[in] M1 - * The number of rows of the tile A1. M1 >= 0. - * - * @param[in] N1 - * The number of columns of the tile A1. N1 >= 0. - * - * @param[in] M2 - * The number of rows of the tile A2. M2 >= 0. - * M2 = M1 if side == ChamRight. - * - * @param[in] N2 - * The number of columns of the tile A2. N2 >= 0. - * N2 = N1 if side == ChamLeft. - * - * @param[in] K - * The number of elementary reflectors whose product defines - * the matrix Q. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the M1-by-N1 tile A1. - * On exit, A1 is overwritten by the application of Q. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). - * - * @param[in,out] A2 - * On entry, the M2-by-N2 tile A2. - * On exit, A2 is overwritten by the application of Q. - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M2). - * - * @param[in] V - * The i-th row must contain the vector which defines the - * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTTQRT in the first k columns of its array argument V. - * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). - * - * @param[in] T - * The IB-by-N1 triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] WORK - * Workspace array of size - * LDWORK-by-N1 if side == ChamLeft - * LDWORK-by-IB if side == ChamRight - * - * @param[in] LDWORK - * The leading dimension of the array WORK. - * LDWORK >= max(1,IB) if side == ChamLeft - * LDWORK >= max(1,M1) if side == ChamRight - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_zttmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zttmqr; - void (*callback)(void*) = options->profiling ? cl_zttmqr_callback : NULL; - int ldwork = side == ChamLeft ? ib : nb; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_R(V, Vm, Vn); - CHAMELEON_ACCESS_R(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &m1, sizeof(int), - STARPU_VALUE, &n1, sizeof(int), - STARPU_VALUE, &m2, sizeof(int), - STARPU_VALUE, &n2, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), - STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - /* max( ib*nb, 2*ib*nb ) */ - STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &ldwork, sizeof(int), - STARPU_VALUE, &(options->ws_wsize), sizeof(size_t), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), -#endif -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zttmqr", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zttmqr_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *WORK; - int ldwork; - size_t lwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, - &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork ); - - CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); - - (void)lwork; -} - -#if defined(CHAMELEON_USE_CUDA) -static void cl_zttmqr_cuda_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - cuDoubleComplex *A1; - int lda1; - cuDoubleComplex *A2; - int lda2; - cuDoubleComplex *V; - int ldv; - cuDoubleComplex *T; - int ldt; - cuDoubleComplex *W; - int ldwork; - size_t lwork; - - A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); - W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, - &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork ); - - RUNTIME_getStream(stream); - - CUDA_zttmqr( - side, trans, m1, n1, m2, n2, k, ib, - A1, lda1, A2, lda2, V, ldv, T, ldt, - W, lwork, stream ); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - (void)ldwork; -} -#endif /* defined(CHAMELEON_USE_CUDA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS(zttmqr, 5, cl_zttmqr_cpu_func, cl_zttmqr_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_zttqrt.c b/runtime/starpu/codelets/codelet_zttqrt.c deleted file mode 100644 index 39d52185fdc68654e966b34db700b31733cde5c6..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_zttqrt.c +++ /dev/null @@ -1,176 +0,0 @@ -/** - * - * @file starpu/codelet_zttqrt.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zttqrt StarPU codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Hatem Ltaief - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * CORE_zttqrt computes a QR factorization of a rectangular matrix - * formed by coupling a complex N-by-N upper triangular tile A1 - * on top of a complex M-by-N upper trapezoidal tile A2: - * - * | A1 | = Q * R - * | A2 | - * - * The tile Q is represented as a product of elementary reflectors - * - * Q = H(1) H(2) . . . H(k), where k = min(M,N). - * - * Each H(i) has the form - * - * H(i) = I - tau * v * v' - * - * where tau is a complex scalar, and v is a complex vector with - * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i), - * and tau in TAU(i). - * - ******************************************************************************* - * - * @param[in] M - * The number of rows of the tile A2. M >= 0. - * - * @param[in] N - * The number of columns of the tile A1 and A2. N >= 0. - * - * @param[in] IB - * The inner-blocking size. IB >= 0. - * - * @param[in,out] A1 - * On entry, the N-by-N tile A1. - * On exit, the elements on and above the diagonal of the array - * contain the N-by-N upper trapezoidal tile R; - * the elements below the diagonal are not referenced. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,N). - * - * @param[in,out] A2 - * On entry, the M-by-N upper triangular tile A2. - * On exit, the elements on and above the diagonal of the array - * with the array TAU, represent - * the unitary tile Q as a product of elementary reflectors - * (see Further Details). - * - * @param[in] LDA2 - * The leading dimension of the array A2. LDA2 >= max(1,M). - * - * @param[out] T - * The IB-by-N triangular factor T of the block reflector. - * T is upper triangular by block (economic storage); - * The rest of the array is not referenced. - * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. - * - * @param[out] TAU - * The scalar factors of the elementary reflectors (see Further - * Details). - * - * @param[in,out] WORK - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - -void INSERT_TASK_zttqrt(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zttqrt; - void (*callback)(void*) = options->profiling ? cl_zttqrt_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A1, A1m, A1n); - CHAMELEON_ACCESS_RW(A2, A2m, A2n); - CHAMELEON_ACCESS_W(T, Tm, Tn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), - STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), - STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - /* nb * (ib+1) */ - STARPU_SCRATCH, options->ws_worker, - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), -#endif -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zttqrt", -#endif - 0); -} - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zttqrt_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU; - CHAMELEON_Complex64_t *WORK; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* nb * (ib+1) */ - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt); - - WORK = TAU + chameleon_max( m, n ); - - CORE_zttqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zttqrt, 4, cl_zttqrt_cpu_func)