diff --git a/compute/pzgemm.c b/compute/pzgemm.c index f0c77ad3f4773654701a41b2aae4c757e5a7d41a..5c6563d2d6d6280f0f08a35b9a25f266eb5e9764 100644 --- a/compute/pzgemm.c +++ b/compute/pzgemm.c @@ -233,7 +233,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran options, ChamUpperLower, tempkk, tempmm, A( k, m ), - WA( m, (k % C->q) + lq ) ); + WA( m, (m % C->q) + lq ) ); RUNTIME_data_flush( sequence, A( k, m ) ); @@ -241,8 +241,8 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran INSERT_TASK_zlacpy( options, ChamUpperLower, tempkk, tempmm, - WA( m, ((k+q-1) % C->q) + lq ), - WA( m, ((k+q) % C->q) + lq ) ); + WA( m, ((m+q-1) % C->q) + lq ), + WA( m, ((m+q) % C->q) + lq ) ); } } } @@ -273,7 +273,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran options, ChamUpperLower, tempnn, tempkk, B( n, k ), - WB( (k % C->p) + lp, n ) ); + WB( (n % C->p) + lp, n ) ); RUNTIME_data_flush( sequence, B( n, k ) ); @@ -281,8 +281,8 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran INSERT_TASK_zlacpy( options, ChamUpperLower, tempnn, tempkk, - WB( ((k+p-1) % C->p) + lp, n ), - WB( ((k+p) % C->p) + lp, n ) ); + WB( ((n+p-1) % C->p) + lp, n ), + WB( ((n+p) % C->p) + lp, n ) ); } } } diff --git a/compute/pzgepdf_qdwh.c b/compute/pzgepdf_qdwh.c index de7a82cb1a1f8e99c79bbea4a3551e8c5549d835..420ad6f502ceccea3a253363e5b00333c5f49aea 100644 --- a/compute/pzgepdf_qdwh.c +++ b/compute/pzgepdf_qdwh.c @@ -36,7 +36,7 @@ static int _zgepdf_qdwh_opt_genD = 0; #endif static int _zgepdf_qdwh_opt_qr = 1; -static int _zgepdf_qdwh_opt_id = 1; +static int _zgepdf_qdwh_opt_id = 1; // There is a numerical issue when combining this optimization and the StarPU lacpy static int _zgepdf_qdwh_verbose = 0; /** @@ -719,13 +719,13 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t it++; last = ( it >= itconv ); + chameleon_sequence_wait( chamctxt, sequence_it ); if ( params[2] > 100 ) { int do_qr = (!_zgepdf_qdwh_opt_qr) || (it > 1); if ( (chamctxt->scheduler == RUNTIME_SCHED_PARSEC) && ( sequence_it != sequence_qr ) ) { - chameleon_sequence_wait( chamctxt, sequence_it ); sequence_it = sequence_qr; request_it = &request_qr; } @@ -753,7 +753,6 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t if ( (chamctxt->scheduler == RUNTIME_SCHED_PARSEC) && ( sequence_it != sequence_po ) ) { - chameleon_sequence_wait( chamctxt, sequence_it ); sequence_it = sequence_po; request_it = &request_po; } @@ -796,10 +795,10 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t } } + chameleon_sequence_wait( chamctxt, sequence_it ); if ( (chamctxt->scheduler == RUNTIME_SCHED_PARSEC) && ( sequence_it != sequence ) ) { - chameleon_sequence_wait( chamctxt, sequence_it ); chameleon_sequence_destroy( chamctxt, sequence_qr ); chameleon_sequence_destroy( chamctxt, sequence_po ); } diff --git a/compute/pzhemm.c b/compute/pzhemm.c index b47dda5baa2ac08d4904d564a73a98b4cecdf515..12269d34a3abf4de45163eb75657441cceac5e74 100644 --- a/compute/pzhemm.c +++ b/compute/pzhemm.c @@ -339,7 +339,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempam, tempak, A( Am, Ak ), - WA( m, (k % C->q) + lq ) ); + WA( m, (Ak % C->q) + lq ) ); RUNTIME_data_flush( sequence, A( Am, Ak ) ); @@ -347,8 +347,8 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, INSERT_TASK_zlacpy( options, ChamUpperLower, tempam, tempak, - WA( m, ((k+q-1) % C->q) + lq ), - WA( m, ((k+q) % C->q) + lq ) ); + WA( m, ((Ak+q-1) % C->q) + lq ), + WA( m, ((Ak+q) % C->q) + lq ) ); } } @@ -496,7 +496,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempak, tempan, A( Ak, An ), - WB( (k % C->p) + lp, n ) ); + WB( (Ak % C->p) + lp, n ) ); RUNTIME_data_flush( sequence, A( Ak, An ) ); @@ -504,8 +504,8 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, INSERT_TASK_zlacpy( options, ChamUpperLower, tempak, tempan, - WB( ((k+p-1) % C->p) + lp, n ), - WB( ((k+p) % C->p) + lp, n ) ); + WB( ((Ak+p-1) % C->p) + lp, n ), + WB( ((Ak+p) % C->p) + lp, n ) ); } } diff --git a/compute/pzsymm.c b/compute/pzsymm.c index 47632f6ce07bd4a0254cff94ad7b1a258f3d14e4..944ac75ea2daef41f206207d420e9d0543fd0ae1 100644 --- a/compute/pzsymm.c +++ b/compute/pzsymm.c @@ -340,7 +340,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempam, tempak, A( Am, Ak ), - WA( m, (k % C->q) + lq ) ); + WA( m, (Ak % C->q) + lq ) ); RUNTIME_data_flush( sequence, A( Am, Ak ) ); @@ -348,8 +348,8 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, INSERT_TASK_zlacpy( options, ChamUpperLower, tempam, tempak, - WA( m, ((k+q-1) % C->q) + lq ), - WA( m, ((k+q) % C->q) + lq ) ); + WA( m, ((Ak+q-1) % C->q) + lq ), + WA( m, ((Ak+q) % C->q) + lq ) ); } } @@ -497,7 +497,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempak, tempan, A( Ak, An ), - WB( (k % C->p) + lp, n ) ); + WB( (Ak % C->p) + lp, n ) ); RUNTIME_data_flush( sequence, A( Ak, An ) ); @@ -505,8 +505,8 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, INSERT_TASK_zlacpy( options, ChamUpperLower, tempak, tempan, - WB( ((k+p-1) % C->p) + lp, n ), - WB( ((k+p) % C->p) + lp, n ) ); + WB( ((Ak+p-1) % C->p) + lp, n ), + WB( ((Ak+p) % C->p) + lp, n ) ); } } diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index 2d227b37b7167fa28f687652cdcc4b5e734b3299..aa8d73ed3774d60b34f1b9d6641c984db59fe820 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -151,7 +151,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, if ( (uplo == ChamUpperLower) && (tileA->m == m) && (tileA->n == n) && (tileB->m == m) && (tileB->n == n) && - (displA == 0) && (displB == 0) && 0 ) + (displA == 0) && (displB == 0) ) { #if defined(CHAMELEON_USE_MPI) insert_task_zlacpy_on_remote_node( options, @@ -227,7 +227,7 @@ void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, /* Insert the task */ if ( (uplo == ChamUpperLower) && (tileA->m == m) && (tileA->n == n) && - (tileB->m == m) && (tileB->n == n) && 0 ) + (tileB->m == m) && (tileB->n == n) ) { #if defined(CHAMELEON_USE_MPI) insert_task_zlacpy_on_remote_node( options, diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index c185e50b525c719a7b62422f89d5d5b9a259c435..c8d012141de283ea45c4ebe3a4ae3f9270d7e435 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -21,6 +21,10 @@ if (CHAMELEON_SIMULATION) endif() endif() +set( SINGLE_PRECISIONS s c ) +# list all tests that have a specific input file for single precision computations +set( SINGLE_TESTS gepdf_qdwh genm2 ) + if (NOT CHAMELEON_SIMULATION) foreach(prec ${CHAMELEON_PRECISION}) @@ -82,7 +86,11 @@ if (NOT CHAMELEON_SIMULATION) endif() foreach( test ${TESTSTMP} ) - add_test( test_${cat}_${prec}${test} ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/${test}.in ) + if ( ${test} IN_LIST SINGLE_TESTS AND ${prec} IN_LIST SINGLE_PRECISIONS ) + add_test( test_${cat}_${prec}${test} ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/${test}_32.in ) + else() + add_test( test_${cat}_${prec}${test} ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/${test}.in ) + endif() endforeach() if ( CHAMELEON_SCHED_STARPU ) @@ -111,11 +119,15 @@ if (NOT CHAMELEON_SIMULATION) list( REMOVE_ITEM TESTSTMP print gepdf_qr ) - foreach( test ${TESTSTMP} ) - if ( NOT (${cat} STREQUAL "mpi")) - add_test( test_${cat}_${prec}${test}_std ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/${test}.in --api=1 ) - endif() - endforeach() + if ( NOT (${cat} STREQUAL "mpi")) + foreach( test ${TESTSTMP} ) + if ( ${test} IN_LIST SINGLE_TESTS AND ${prec} IN_LIST SINGLE_PRECISIONS ) + add_test( test_${cat}_${prec}${test}_std ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/${test}_32.in --api=1 ) + else() + add_test( test_${cat}_${prec}${test}_std ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/${test}.in --api=1 ) + endif() + endforeach() + endif() endforeach() endforeach() endforeach() diff --git a/testing/input/genm2_32.in b/testing/input/genm2_32.in new file mode 100644 index 0000000000000000000000000000000000000000..3d2a04f54b973ff0c0a78b2228ccfc4649b64839 --- /dev/null +++ b/testing/input/genm2_32.in @@ -0,0 +1,19 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GENM2 +# mtxfmt +# nb: Tile size +# M: Number of rows of matrix A +# N: Number of columns of matrix A +# LDA: Leading dimension of matrix A +# cond: The condition number +# mode: the mode values for latms + +op = genm2 +nb = 16, 17 +m = 15, 25, 37 +n = 13, 23, 35 +lda = 41 +cond = 1., 1.e6 +mode = 1:6 diff --git a/testing/input/gepdf_qdwh_32.in b/testing/input/gepdf_qdwh_32.in new file mode 100644 index 0000000000000000000000000000000000000000..80de2094db4a8f7ea63f49cfa4b0a57ed65fd835 --- /dev/null +++ b/testing/input/gepdf_qdwh_32.in @@ -0,0 +1,23 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GEPDF_QDWH + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the A matrix +# n: Number of columns of the A matrix +# lda: Leading dimension of the A matrix +# ldb: Leading dimension of the H matrix +# cond: The condition number +# mode: the mode values for latms + +op = gepdf_qdwh +nb = 8 +ib = 3 +m = 8, 32, 64 +n = 8, 16, 32 +lda = 79 +ldb = 78 +cond = 1., 1.e6 +mode = 1:6