From f676d5243c6f732f8da9cf5b4d55a827bb2509a9 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Fri, 8 Feb 2019 10:49:26 +0100 Subject: [PATCH] Starpu/alloc on the fly --- compute/pzlange.c | 45 ++++++++------- compute/pzlansy.c | 23 ++++---- compute/pzunmlq_param.c | 6 ++ compute/pzunmqr_param.c | 6 ++ compute/zlaset.c | 2 +- control/compute_z.h | 7 ++- control/descriptor.c | 34 ++++++----- control/workspace.c | 6 +- coreblas/compute/core_zgelqt.c | 2 +- include/chameleon/tasks_z.h | 8 +++ runtime/openmp/codelets/codelet_zgelqt.c | 7 ++- runtime/openmp/codelets/codelet_zgeqrt.c | 7 ++- runtime/openmp/codelets/codelet_ztplqt.c | 6 +- runtime/openmp/codelets/codelet_ztpqrt.c | 6 +- runtime/parsec/codelets/codelet_zgelqt.c | 1 + runtime/parsec/codelets/codelet_zgeqrt.c | 1 + runtime/parsec/codelets/codelet_ztplqt.c | 1 + runtime/parsec/codelets/codelet_ztpqrt.c | 1 + runtime/quark/codelets/codelet_zgelqt.c | 1 + runtime/quark/codelets/codelet_zgeqrt.c | 1 + runtime/quark/codelets/codelet_ztplqt.c | 1 + runtime/quark/codelets/codelet_ztpqrt.c | 1 + runtime/starpu/codelets/codelet_zgelqt.c | 61 ++++++++++---------- runtime/starpu/codelets/codelet_zgemm.c | 2 +- runtime/starpu/codelets/codelet_zgeqrt.c | 62 ++++++++++----------- runtime/starpu/codelets/codelet_zlange.c | 8 +-- runtime/starpu/codelets/codelet_ztplqt.c | 1 + runtime/starpu/codelets/codelet_ztpqrt.c | 1 + runtime/starpu/control/runtime_descriptor.c | 2 +- runtime/starpu/control/runtime_options.c | 6 +- testing/testing_zgels.c | 1 - testing/testing_zgels_hqr.c | 2 - testing/testing_zgels_systolic.c | 2 - timing/time_zgelqf.c | 1 - timing/time_zgelqf_tile.c | 1 - timing/time_zgels.c | 9 ++- timing/time_zgels_tile.c | 3 +- timing/time_zgeqrf.c | 1 - timing/time_zgeqrf_hqr.c | 2 - timing/time_zgeqrf_hqr_tile.c | 2 - timing/time_zgeqrf_tile.c | 1 - timing/time_zgeqrs_tile.c | 1 - 42 files changed, 189 insertions(+), 153 deletions(-) diff --git a/compute/pzlange.c b/compute/pzlange.c index 2bb14d7a2..bb7d6b176 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -72,11 +72,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, } if ( m >= P ) { - INSERT_TASK_dgeadd( - options, - ChamNoTrans, 1, tempnn, A->nb, - 1.0, W( Wcol, m, n ), 1, - 1.0, W( Wcol, m%P, n ), 1 ); + INSERT_TASK_daxpy( + options, tempnn, 1., + W( Wcol, m, n ), 1, + W( Wcol, m%P, n ), 1 ); } } @@ -85,11 +84,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, * For each i, W(i, n) = reduce( W(0..P-1, n) ) */ for(m = 1; m < P; m++) { - INSERT_TASK_dgeadd( - options, - ChamNoTrans, 1, tempnn, A->nb, - 1.0, W( Wcol, m, n ), 1, - 1.0, W( Wcol, 0, n ), 1 ); + INSERT_TASK_daxpy( + options, tempnn, 1., + W( Wcol, m, n ), 1, + W( Wcol, 0, n ), 1 ); } INSERT_TASK_dlange( @@ -165,11 +163,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, } if ( n >= Q ) { - INSERT_TASK_dgeadd( - options, - ChamNoTrans, tempmm, 1, A->mb, - 1.0, W( Wcol, m, n ), tempmm, - 1.0, W( Wcol, m, n%Q), tempmm ); + INSERT_TASK_daxpy( + options, tempmm, 1., + W( Wcol, m, n ), 1, + W( Wcol, m, n%Q ), 1 ); } } @@ -178,11 +175,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, * For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) ) */ for(n = 1; n < Q; n++) { - INSERT_TASK_dgeadd( - options, - ChamNoTrans, tempmm, 1, A->mb, - 1.0, W( Wcol, m, n), tempmm, - 1.0, W( Wcol, m, 0), tempmm ); + INSERT_TASK_daxpy( + options, tempmm, 1., + W( Wcol, m, n ), 1, + W( Wcol, m, 0 ), 1 ); } INSERT_TASK_dlange( @@ -407,11 +403,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia case ChamOneNorm: RUNTIME_options_ws_alloc( &options, 1, 0 ); - chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, A->nb, A->nb, + chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 1, A->nb, A->nb, workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q, NULL, NULL, NULL ); wcol_init = 1; + /* + * Use the global allocator for Welt, otherwise flush may free the data before the result is read. + */ chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, A->p, worknt, 0, 0, A->p, worknt, A->p, A->q, NULL, NULL, NULL ); @@ -424,7 +423,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia case ChamInfNorm: RUNTIME_options_ws_alloc( &options, A->mb, 0 ); - chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, A->mb, 1, A->mb, + chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb, workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, NULL, NULL, NULL ); wcol_init = 1; @@ -522,7 +521,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia } CHAMELEON_Desc_Flush( &Welt, sequence ); CHAMELEON_Desc_Flush( A, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); + RUNTIME_sequence_wait( chamctxt, sequence ); *result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q )); diff --git a/compute/pzlansy.c b/compute/pzlansy.c index 5763df96d..6a9f56fb1 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -81,11 +81,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; for(n = Q; n < NT; n++) { - INSERT_TASK_dgeadd( - options, - ChamNoTrans, tempmm, 1, A->nb, - 1.0, W( Wcol, m, n ), tempmm, - 1.0, W( Wcol, m, n%Q), tempmm ); + INSERT_TASK_daxpy( + options, tempmm, 1., + W( Wcol, m, n ), 1, + W( Wcol, m, n%Q ), 1 ); } /** @@ -93,11 +92,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, * For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) ) */ for(n = 1; n < Q; n++) { - INSERT_TASK_dgeadd( - options, - ChamNoTrans, tempmm, 1, A->mb, - 1.0, W( Wcol, m, n), tempmm, - 1.0, W( Wcol, m, 0), tempmm ); + INSERT_TASK_daxpy( + options, tempmm, 1., + W( Wcol, m, n ), 1, + W( Wcol, m, 0 ), 1 ); } INSERT_TASK_dlange( @@ -334,11 +332,14 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra case ChamInfNorm: RUNTIME_options_ws_alloc( &options, 1, 0 ); - chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, A->mb, 1, A->mb, + chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb, workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, NULL, NULL, NULL ); wcol_init = 1; + /* + * Use the global allocator for Welt, otherwise flush may free the data before the result is read. + */ chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, workmt, A->q, 0, 0, workmt, A->q, A->p, A->q, NULL, NULL, NULL ); diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 48dbc13ea..02b740b3b 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -466,6 +466,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, RUNTIME_data_flush( sequence, T(k, n) ); } + /* Restore the original location of the tiles */ + for (m = 0; m < B->mt; m++) { + RUNTIME_data_migrate( sequence, B( m, k ), + B->get_rankof( B, m, k ) ); + } + RUNTIME_iteration_pop(chamctxt); } } diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index 772bfdf48..a11c5f247 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -467,6 +467,12 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, RUNTIME_data_flush( sequence, T(n, k) ); } + /* Restore the original location of the tiles */ + for (m = 0; m < B->mt; m++) { + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, k ) ); + } + RUNTIME_iteration_pop(chamctxt); } } diff --git a/compute/zlaset.c b/compute/zlaset.c index 2b03272b7..7001e66a2 100644 --- a/compute/zlaset.c +++ b/compute/zlaset.c @@ -266,7 +266,7 @@ int CHAMELEON_zlaset_Tile_Async( cham_uplo_t uplo, return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); } /* Check input arguments */ - if (A->nb != A->mb) { + if ( (alpha != beta) && (A->nb != A->mb) ) { chameleon_error("CHAMELEON_zlaset_Tile_Async", "only square tiles supported"); return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); } diff --git a/control/compute_z.h b/control/compute_z.h index 3229f1389..3bacf06a1 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -134,7 +134,7 @@ void chameleon_pzungqr_param( int genD, int K, const libhqr_tree_t *qrtree, static inline int chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int q ) { int diag_m = chameleon_min( m, n ); - return chameleon_desc_init( descA, CHAMELEON_MAT_ALLOC_GLOBAL, + return chameleon_desc_init( descA, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, nb, nb, nb*nb, diag_m, nb, 0, 0, diag_m, nb, p, q, chameleon_getaddr_diag, @@ -145,7 +145,7 @@ chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int #define chameleon_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \ { \ int rc; \ - rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_GLOBAL, \ + rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_TILE, \ ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \ (m), (n), (i), (j), (m), (n), 1, 1, \ NULL, NULL, NULL ); \ @@ -174,7 +174,7 @@ chameleon_zlap2tile( CHAM_context_t *chamctxt, if ( CHAMELEON_TRANSLATION == ChamOutOfPlace ) { /* Initialize the tile descriptor */ - chameleon_desc_init( descAt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, mb, nb, (mb)*(nb), + chameleon_desc_init( descAt, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, mb, nb, (mb)*(nb), lm, ln, 0, 0, m, n, 1, 1, chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, NULL ); @@ -235,6 +235,7 @@ chameleon_ztile2lap( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t static inline void chameleon_ztile2lap_cleanup( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t *descAt ) { + (void)chamctxt; chameleon_desc_destroy( descAl ); chameleon_desc_destroy( descAt ); } diff --git a/control/descriptor.c b/control/descriptor.c index c27fe5749..06e52cdec 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -226,26 +226,32 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat, /* The matrix is alocated tile by tile with out of core */ desc->ooc = 0; - // Matrix address - if ( mat == CHAMELEON_MAT_ALLOC_GLOBAL ) { - rc = chameleon_desc_mat_alloc( desc ); + switch ( (intptr_t)mat ) { + case (intptr_t)CHAMELEON_MAT_ALLOC_TILE: + if ( chamctxt->scheduler == RUNTIME_SCHED_STARPU ) { + /* Let's use the allocation on the fly as in OOC */ + desc->get_blkaddr = chameleon_getaddr_null; + desc->mat = NULL; + break; + } + /* Otherwise we switch back to the full allocation */ - desc->alloc_mat = 1; - desc->use_mat = 1; - } - else if ( mat == CHAMELEON_MAT_ALLOC_TILE ) { - //chameleon_error( "chameleon_desc_init", "CHAMELEON_MAT_ALLOC_TILE is not available yet" ); - //desc->mat = NULL; + case (intptr_t)CHAMELEON_MAT_ALLOC_GLOBAL: rc = chameleon_desc_mat_alloc( desc ); + desc->alloc_mat = 1; desc->use_mat = 1; + break; - desc->alloc_mat = 1; - } - else if ( mat == CHAMELEON_MAT_OOC ) { + case (intptr_t)CHAMELEON_MAT_OOC: + if ( chamctxt->scheduler != RUNTIME_SCHED_STARPU ) { + chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON Out-of-Core descriptors are supported only with StarPU"); + return CHAMELEON_ERR_NOT_SUPPORTED; + } desc->mat = NULL; desc->ooc = 1; - } - else { + break; + + default: /* memory of the matrix is handled by users */ desc->mat = mat; desc->use_mat = 1; diff --git a/control/workspace.c b/control/workspace.c index e743e33db..8039447fb 100644 --- a/control/workspace.c +++ b/control/workspace.c @@ -74,7 +74,8 @@ int chameleon_alloc_ibnb_tile(int M, int N, cham_tasktype_t func, int type, CHAM lm = IB * MT; ln = NB * NT; - return CHAMELEON_Desc_Create( desc, NULL, type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q ); + return CHAMELEON_Desc_Create( desc, CHAMELEON_MAT_ALLOC_TILE, type, IB, NB, IB*NB, + lm, ln, 0, 0, lm, ln, p, q ); } /** @@ -119,7 +120,8 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc /* TODO: Fix the distribution for IPIV */ *IPIV = (int*)malloc( size ); - return CHAMELEON_Desc_Create( desc, NULL, type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q ); + return CHAMELEON_Desc_Create( desc, CHAMELEON_MAT_ALLOC_TILE, type, IB, NB, IB*NB, + lm, ln, 0, 0, lm, ln, p, q ); } /** diff --git a/coreblas/compute/core_zgelqt.c b/coreblas/compute/core_zgelqt.c index cb9f67b49..7a2a74ca0 100644 --- a/coreblas/compute/core_zgelqt.c +++ b/coreblas/compute/core_zgelqt.c @@ -67,7 +67,7 @@ * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] T - * The IB-by-N triangular factor T of the block reflector. + * The IB-by-M triangular factor T of the block reflector. * T is upper triangular by block (economic storage); * The rest of the array is not referenced. * diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index ca0ae0e21..4fa07c2b4 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -483,6 +483,8 @@ INSERT_TASK_ztsmlq( const RUNTIME_option_t *options, const CHAM_desc_t *V, int Vm, int Vn, int ldv, const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { + (void)m1; + (void)n1; return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, 0, ib, nb, V, Vm, Vn, ldv, T, Tm, Tn, ldt, A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); @@ -497,6 +499,8 @@ INSERT_TASK_ztsmqr( const RUNTIME_option_t *options, const CHAM_desc_t *V, int Vm, int Vn, int ldv, const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { + (void)m1; + (void)n1; return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, 0, ib, nb, V, Vm, Vn, ldv, T, Tm, Tn, ldt, A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); @@ -511,6 +515,8 @@ INSERT_TASK_zttmlq( const RUNTIME_option_t *options, const CHAM_desc_t *V, int Vm, int Vn, int ldv, const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { + (void)m1; + (void)n1; return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, n2, ib, nb, V, Vm, Vn, ldv, T, Tm, Tn, ldt, A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); @@ -525,6 +531,8 @@ INSERT_TASK_zttmqr( const RUNTIME_option_t *options, const CHAM_desc_t *V, int Vm, int Vn, int ldv, const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { + (void)m1; + (void)n1; return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, m2, ib, nb, V, Vm, Vn, ldv, T, Tm, Tn, ldt, A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 ); diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c index 71a9bddce..3341a8f01 100644 --- a/runtime/openmp/codelets/codelet_zgelqt.c +++ b/runtime/openmp/codelets/codelet_zgelqt.c @@ -98,10 +98,13 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); int ws_size = options->ws_wsize; -#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0]) + +#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0]) { CHAMELEON_Complex64_t TAU[ws_size]; CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n ); - CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); + + CORE_zlaset( ChamUpperLower, ib, m, 0., 0., ptrT, ldt ); + CORE_zgelqt( m, n, ib, ptrA, lda, ptrT, ldt, TAU, work ); } } diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c index a09763773..6428375b2 100644 --- a/runtime/openmp/codelets/codelet_zgeqrt.c +++ b/runtime/openmp/codelets/codelet_zgeqrt.c @@ -99,10 +99,13 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); int ws_size = options->ws_wsize; -#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0]) + +#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0]) { CHAMELEON_Complex64_t TAU[ws_size]; CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n); - CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); + + CORE_zlaset( ChamUpperLower, ib, n, 0., 0., ptrT, ldt ); + CORE_zgeqrt( m, n, ib, ptrA, lda, ptrT, ldt, TAU, work ); } } diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c index 1acb66066..4bb4f16f0 100644 --- a/runtime/openmp/codelets/codelet_ztplqt.c +++ b/runtime/openmp/codelets/codelet_ztplqt.c @@ -31,9 +31,13 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); int ws_size = options->ws_wsize; -#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0], ptrT[0]) + +#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0]) { CHAMELEON_Complex64_t work[ws_size]; + + CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt); + CORE_ztplqt( M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt, work ); } diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c index 17917cc7b..7381f6ebd 100644 --- a/runtime/openmp/codelets/codelet_ztpqrt.c +++ b/runtime/openmp/codelets/codelet_ztpqrt.c @@ -30,9 +30,13 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); int ws_size = options->ws_wsize; -#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + +#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0]) { CHAMELEON_Complex64_t tmp[ws_size]; + + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt); + CORE_ztpqrt( M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt, tmp ); } diff --git a/runtime/parsec/codelets/codelet_zgelqt.c b/runtime/parsec/codelets/codelet_zgelqt.c index 6e159eddc..4ef5b5b7a 100644 --- a/runtime/parsec/codelets/codelet_zgelqt.c +++ b/runtime/parsec/codelets/codelet_zgelqt.c @@ -98,6 +98,7 @@ CORE_zgelqt_parsec( parsec_execution_stream_t *context, parsec_dtd_unpack_args( this_task, &m, &n, &ib, &A, &lda, &T, &ldt, &TAU, &WORK ); + CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt ); CORE_zgelqt( m, n, ib, A, lda, T, ldt, TAU, WORK ); (void)context; diff --git a/runtime/parsec/codelets/codelet_zgeqrt.c b/runtime/parsec/codelets/codelet_zgeqrt.c index d4e9cc529..53ac8ac04 100644 --- a/runtime/parsec/codelets/codelet_zgeqrt.c +++ b/runtime/parsec/codelets/codelet_zgeqrt.c @@ -99,6 +99,7 @@ CORE_zgeqrt_parsec ( parsec_execution_stream_t *context, parsec_dtd_unpack_args( this_task, &m, &n, &ib, &A, &lda, &T, &ldt, &TAU, &WORK ); + CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt ); CORE_zgeqrt( m, n, ib, A, lda, T, ldt, TAU, WORK ); (void)context; diff --git a/runtime/parsec/codelets/codelet_ztplqt.c b/runtime/parsec/codelets/codelet_ztplqt.c index 3da524a42..96a220925 100644 --- a/runtime/parsec/codelets/codelet_ztplqt.c +++ b/runtime/parsec/codelets/codelet_ztplqt.c @@ -40,6 +40,7 @@ CORE_ztplqt_parsec( parsec_execution_stream_t *context, parsec_dtd_unpack_args( this_task, &M, &N, &L, &ib, &A, &lda, &B, &ldb, &T, &ldt, &WORK ); + CORE_zlaset( ChamUpperLower, ib, M, 0., 0., T, ldt ); CORE_ztplqt( M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); diff --git a/runtime/parsec/codelets/codelet_ztpqrt.c b/runtime/parsec/codelets/codelet_ztpqrt.c index ace7a3bf9..f2308aa5b 100644 --- a/runtime/parsec/codelets/codelet_ztpqrt.c +++ b/runtime/parsec/codelets/codelet_ztpqrt.c @@ -40,6 +40,7 @@ CORE_ztpqrt_parsec( parsec_execution_stream_t *context, parsec_dtd_unpack_args( this_task, &M, &N, &L, &ib, &A, &lda, &B, &ldb, &T, &ldt, &WORK ); + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt ); CORE_ztpqrt( M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c index 7b1e5a47d..240773c98 100644 --- a/runtime/quark/codelets/codelet_zgelqt.c +++ b/runtime/quark/codelets/codelet_zgelqt.c @@ -40,6 +40,7 @@ void CORE_zgelqt_quark(Quark *quark) CHAMELEON_Complex64_t *WORK; quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK); + CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt ); CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK); } diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c index 010a24653..09ed24eef 100644 --- a/runtime/quark/codelets/codelet_zgeqrt.c +++ b/runtime/quark/codelets/codelet_zgeqrt.c @@ -40,6 +40,7 @@ void CORE_zgeqrt_quark(Quark *quark) CHAMELEON_Complex64_t *WORK; quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK); + CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt ); CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK); } diff --git a/runtime/quark/codelets/codelet_ztplqt.c b/runtime/quark/codelets/codelet_ztplqt.c index f0e51b375..98b153433 100644 --- a/runtime/quark/codelets/codelet_ztplqt.c +++ b/runtime/quark/codelets/codelet_ztplqt.c @@ -39,6 +39,7 @@ CORE_ztplqt_quark( Quark *quark ) quark_unpack_args_11( quark, M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt ); CORE_ztplqt( M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); } diff --git a/runtime/quark/codelets/codelet_ztpqrt.c b/runtime/quark/codelets/codelet_ztpqrt.c index 24ce98e12..b508e548c 100644 --- a/runtime/quark/codelets/codelet_ztpqrt.c +++ b/runtime/quark/codelets/codelet_ztpqrt.c @@ -39,6 +39,7 @@ CORE_ztpqrt_quark( Quark *quark ) quark_unpack_args_11( quark, M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt ); CORE_ztpqrt( M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); } diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 68d435d03..8ffad6e1a 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -26,6 +26,36 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_starpu_ws_t *h_work; + int m; + int n; + int ib; + CHAMELEON_Complex64_t *A; + int lda; + CHAMELEON_Complex64_t *T; + int ldt; + CHAMELEON_Complex64_t *TAU, *WORK; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */ + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); + + WORK = TAU + chameleon_max( m, n ); + CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt ); + CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -87,7 +117,6 @@ * \retval <0 if -i, the i-th argument had an illegal value * */ - void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -123,33 +152,3 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *h_work; - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A; - int lda; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU, *WORK; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */ - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); - - WORK = TAU + chameleon_max( m, n ); - CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 0719010b6..205da5e35 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -35,7 +35,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, cham_trans_t transA, cham_trans_t transB, int m, int n, int k, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) { (void)nb; diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index eaa242637..bee5168f9 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -26,6 +26,37 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_starpu_ws_t *h_work; + int m; + int n; + int ib; + CHAMELEON_Complex64_t *A; + int lda; + CHAMELEON_Complex64_t *T; + int ldt; + CHAMELEON_Complex64_t *TAU, *WORK; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + n * ib */ + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); + + WORK = TAU + chameleon_max( m, n ); + + CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt ); + CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -88,7 +119,6 @@ * \retval <0 if -i, the i-th argument had an illegal value * */ - void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -124,33 +154,3 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *h_work; - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A; - int lda; - CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *TAU, *WORK; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + n * ib */ - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); - - WORK = TAU + chameleon_max( m, n ); - CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c index f689d82bc..9ab611908 100644 --- a/runtime/starpu/codelets/codelet_zlange.c +++ b/runtime/starpu/codelets/codelet_zlange.c @@ -24,10 +24,10 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zlange(const RUNTIME_option_t *options, - cham_normtype_t norm, int M, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zlange( const RUNTIME_option_t *options, + cham_normtype_t norm, int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ) { (void)NB; struct starpu_codelet *codelet = &cl_zlange; diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c index c2f771e69..44615d5c3 100644 --- a/runtime/starpu/codelets/codelet_ztplqt.c +++ b/runtime/starpu/codelets/codelet_ztplqt.c @@ -43,6 +43,7 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, &lda, &ldb, &ldt ); + CORE_zlaset( ChamUpperLower, ib, M, 0., 0., T, ldt ); CORE_ztplqt( M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); } diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index bfddf9d4b..6fbd0afe6 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -43,6 +43,7 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, &lda, &ldb, &ldt ); + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt ); CORE_ztpqrt( M, N, L, ib, A, lda, B, ldb, T, ldt, WORK ); } diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index 92b63ce46..c8ffd2e6b 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -238,7 +238,7 @@ void RUNTIME_desc_destroy( CHAM_desc_t *desc ) for (m = 0; m < lmt; m++) { if (*handle != NULL) { - starpu_data_unregister(*handle); + starpu_data_unregister_submit(*handle); } handle++; } diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c index a7a308326..8c833bd18 100644 --- a/runtime/starpu/control/runtime_options.c +++ b/runtime/starpu/control/runtime_options.c @@ -49,9 +49,9 @@ int RUNTIME_options_ws_alloc( RUNTIME_option_t *options, size_t worker_size, siz int ret = 0; if ( worker_size > 0 ) { options->ws_wsize = worker_size; - starpu_vector_data_register((starpu_data_handle_t*)(&(options->ws_worker)), - -1, (uintptr_t)NULL, - worker_size, sizeof(char)); + starpu_matrix_data_register( (starpu_data_handle_t*)(&(options->ws_worker)), + -1, (uintptr_t)NULL, + worker_size, worker_size, 1, sizeof(char)); } if ( host_size > 0 ) { options->ws_hsize = host_size; diff --git a/testing/testing_zgels.c b/testing/testing_zgels.c index 9abcde8cf..6316ab1f5 100644 --- a/testing/testing_zgels.c +++ b/testing/testing_zgels.c @@ -103,7 +103,6 @@ int testing_zgels(int argc, char **argv) } CHAMELEON_Alloc_Workspace_zgels(M, N, &T, 1, 1); - memset(T->mat, 0, (T->llm*T->lln)*sizeof(CHAMELEON_Complex64_t)); eps = LAPACKE_dlamch_work('e'); /*---------------------------------------------------------- diff --git a/testing/testing_zgels_hqr.c b/testing/testing_zgels_hqr.c index 67101b034..91b6d78d2 100644 --- a/testing/testing_zgels_hqr.c +++ b/testing/testing_zgels_hqr.c @@ -99,8 +99,6 @@ int testing_zgels_hqr(int argc, char **argv) CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, 1, 1); CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, 1, 1); - memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(CHAMELEON_Complex64_t)); - memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(CHAMELEON_Complex64_t)); eps = LAPACKE_dlamch_work( 'e' ); diff --git a/testing/testing_zgels_systolic.c b/testing/testing_zgels_systolic.c index 53176ceec..7862ee0fd 100644 --- a/testing/testing_zgels_systolic.c +++ b/testing/testing_zgels_systolic.c @@ -93,8 +93,6 @@ int testing_zgels_systolic(int argc, char **argv) CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, 1, 1); CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, 1, 1); - memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(CHAMELEON_Complex64_t)); - memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(CHAMELEON_Complex64_t)); eps = LAPACKE_dlamch_work( 'e' ); diff --git a/timing/time_zgelqf.c b/timing/time_zgelqf.c index e2c709b70..45c69f046 100644 --- a/timing/time_zgelqf.c +++ b/timing/time_zgelqf.c @@ -44,7 +44,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels(M, N, &T, P, Q); - memset(T->mat, 0, (T->llm*T->lln)*sizeof(ChamComplexDouble)); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, CHAMELEON_Complex64_t, A, LDA, N ); diff --git a/timing/time_zgelqf_tile.c b/timing/time_zgelqf_tile.c index f79ee5a85..bc4723baf 100644 --- a/timing/time_zgelqf_tile.c +++ b/timing/time_zgelqf_tile.c @@ -45,7 +45,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q); - memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble)); /* CHAMELEON ZGEQRF */ START_TIMING(); diff --git a/timing/time_zgels.c b/timing/time_zgels.c index 77bbbe667..30a3ad5e1 100644 --- a/timing/time_zgels.c +++ b/timing/time_zgels.c @@ -26,7 +26,7 @@ #include "timing_zauxiliary.h" static int -RunTest(int *iparam, double *dparam, chameleon_time_t *t_) +RunTest(int *iparam, double *dparam, chameleon_time_t *t_) { CHAM_desc_t *T; PASTE_CODE_IPARAM_LOCALS( iparam ); @@ -47,7 +47,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) CHAMELEON_zplrnt( M, NRHS, x, LDB, 5673 ); CHAMELEON_Alloc_Workspace_zgels(M, N, &T, P, Q); - memset(T->mat, 0, (T->llm*T->lln)*sizeof(ChamComplexDouble)); /* Save A and b */ if (check) { @@ -58,13 +57,13 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) START_TIMING(); CHAMELEON_zgels( ChamNoTrans, M, N, NRHS, A, LDA, T, x, LDB ); STOP_TIMING(); - + /* Check the solution */ if (check) { dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, b, x, LDB, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), + &(dparam[IPARAM_ANORM]), + &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(Acpy); free(b); } diff --git a/timing/time_zgels_tile.c b/timing/time_zgels_tile.c index 6e0d300fa..0d628287b 100644 --- a/timing/time_zgels_tile.c +++ b/timing/time_zgels_tile.c @@ -25,7 +25,7 @@ #include "./timing.c" static int -RunTest(int *iparam, double *dparam, chameleon_time_t *t_) +RunTest(int *iparam, double *dparam, chameleon_time_t *t_) { CHAM_desc_t *descT; PASTE_CODE_IPARAM_LOCALS( iparam ); @@ -46,7 +46,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q); - memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble)); /* Save A and B for check */ if (check == 1){ diff --git a/timing/time_zgeqrf.c b/timing/time_zgeqrf.c index 70353b2ca..89e3534e4 100644 --- a/timing/time_zgeqrf.c +++ b/timing/time_zgeqrf.c @@ -44,7 +44,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels(M, N, &T, P, Q); - memset(T->mat, 0, (T->llm*T->lln)*sizeof(ChamComplexDouble)); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, CHAMELEON_Complex64_t, A, LDA, N ); diff --git a/timing/time_zgeqrf_hqr.c b/timing/time_zgeqrf_hqr.c index 725597fa0..6b4f60459 100644 --- a/timing/time_zgeqrf_hqr.c +++ b/timing/time_zgeqrf_hqr.c @@ -51,9 +51,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, P, Q); - memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(ChamComplexDouble)); CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, P, Q); - memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(ChamComplexDouble)); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, CHAMELEON_Complex64_t, A, LDA, N ); diff --git a/timing/time_zgeqrf_hqr_tile.c b/timing/time_zgeqrf_hqr_tile.c index 2b30953e0..3af4530fd 100644 --- a/timing/time_zgeqrf_hqr_tile.c +++ b/timing/time_zgeqrf_hqr_tile.c @@ -58,9 +58,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, P, Q); - memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(ChamComplexDouble)); CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, P, Q); - memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(ChamComplexDouble)); /* Initialize matrix */ matrix.mt = TS->mt; diff --git a/timing/time_zgeqrf_tile.c b/timing/time_zgeqrf_tile.c index b35782a69..dc257558b 100644 --- a/timing/time_zgeqrf_tile.c +++ b/timing/time_zgeqrf_tile.c @@ -45,7 +45,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q); - memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble)); /* CHAMELEON ZGEQRF */ START_TIMING(); diff --git a/timing/time_zgeqrs_tile.c b/timing/time_zgeqrs_tile.c index 3018a74b2..78b008c1a 100644 --- a/timing/time_zgeqrs_tile.c +++ b/timing/time_zgeqrs_tile.c @@ -48,7 +48,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) /* Allocate Workspace */ CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q); - memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble)); /* CHAMELEON ZGEQRF */ CHAMELEON_zgeqrf_Tile( descA, descT ); -- GitLab