diff --git a/compute/zgesvd.c b/compute/zgesvd.c index be88952272af125224d03b30bc4050d3e5f2be26..db664159170531eb47cc0b066a745fb21102b40f 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -610,6 +610,14 @@ int MORSE_zgesvd_Tile_Async( MORSE_enum jobu, MORSE_enum jobvt, } morse_sequence_wait( morse, sequence ); + /* Cleanup the temporary data */ + if ( jobu != MorseNoVec ) { + morse_ztile2lap_cleanup( morse, &descUl, &descUt ); + } + if ( jobvt != MorseNoVec ) { + morse_ztile2lap_cleanup( morse, &descVTl, &descVTt ); + } + /* Solve the bidiagonal SVD problem */ /* On exit, U and VT are updated with bidiagonal matrix singular vectors */ #if !defined(CHAMELEON_SIMULATION) diff --git a/compute/zheevd.c b/compute/zheevd.c index 68768489e6f10dac7b3e5ceb9eeb2d146b19187e..d630088170a245ff20b8fb934907cc59b7072650 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -335,12 +335,12 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t descA; MORSE_desc_t descT; MORSE_desc_t D, *Dptr = NULL; - MORSE_Complex64_t *Q2; - int N, status; + MORSE_Complex64_t *Q2 = NULL; + int N, NB, status; double *E; MORSE_Complex64_t *V; - MORSE_desc_t descQ2; - MORSE_desc_t descV; + MORSE_desc_t descQ2l, descQ2t; + MORSE_desc_t descVl, descVt; MORSE_desc_t *subA, *subQ, *subT; morse = morse_context_self(); @@ -395,7 +395,8 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - N = descA.m; + N = descA.m; + NB = descA.mb; /* Allocate data structures for reduction to tridiagonal form */ E = malloc( (N - 1) * sizeof(double) ); @@ -463,10 +464,12 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, /* Q from MORSE_zhetrd refers to Q2 (lapack layout) */ /* V from LAPACKE_zstedc refers to V (lapack layout) */ /* The final eigenvectors are (Q1 Q2 V) or (Q1^h Q2 V) */ - /* morse_zooplap2tile( descQ2, Q2, NB, NB, N, N, 0, 0, N, N, sequence, request, */ - /* morse_desc_mat_free( &descQ2 ) ); */ - /* morse_zooplap2tile( descV, V, NB, NB, N, N, 0, 0, N, N, sequence, request, */ - /* morse_desc_mat_free(&(descQ2)); morse_desc_mat_free(&(descV)) ); */ + morse_zlap2tile( morse, &descQ2l, &descQ2t, MorseDescInput, MorseUpperLower, + Q2, NB, NB, N, N, N, N, sequence, request ); + + morse_zlap2tile( morse, &descVl, &descVt, MorseDescInput, MorseUpperLower, + V, NB, NB, N, N, N, N, sequence, request ); + if (uplo == MorseLower) { #if defined(CHAMELEON_COPY_DIAG) @@ -476,9 +479,9 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, Dptr = &D; } #endif - subA = morse_desc_submatrix(&descA, descA.mb, 0, descA.m -descA.mb, descA.n-descA.nb); - subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n ); - subT = morse_desc_submatrix(&descT, descT.mb, 0, descT.m -descT.mb, descT.n-descT.nb); + subA = morse_desc_submatrix(&descA, descA.mb, 0, descA.m -descA.mb, descA.n-descA.nb); + subQ = morse_desc_submatrix(&descQ2t, descQ2t.mb, 0, descQ2t.m-descQ2t.mb, descQ2t.n ); + subT = morse_desc_submatrix(&descT, descT.mb, 0, descT.m -descT.mb, descT.n-descT.nb); /* Compute Q2 = Q1 * Q2 */ morse_pzunmqr( MorseLeft, MorseNoTrans, @@ -487,7 +490,7 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, /* Compute the final eigenvectors A = (Q1 * Q2) * V */ morse_pzgemm( MorseNoTrans, MorseNoTrans, - 1.0, &descQ2, &descV, + 1.0, &descQ2t, &descVt, 0.0, &descA, sequence, request ); @@ -500,9 +503,9 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, Dptr = &D; } #endif - subA = morse_desc_submatrix(&descA, 0, descA.nb, descA.m -descA.mb, descA.n -descA.nb ); - subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n ); - subT = morse_desc_submatrix(&descT, 0, descT.nb, descT.m -descT.mb, descT.n -descT.nb ); + subA = morse_desc_submatrix(&descA, 0, descA.nb, descA.m -descA.mb, descA.n -descA.nb ); + subQ = morse_desc_submatrix(&descQ2t, descQ2t.mb, 0, descQ2t.m-descQ2t.mb, descQ2t.n ); + subT = morse_desc_submatrix(&descT, 0, descT.nb, descT.m -descT.mb, descT.n -descT.nb ); /* Compute Q2 = Q1^h * Q2 */ morse_pzunmlq( MorseLeft, MorseConjTrans, @@ -511,19 +514,24 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, /* Compute the final eigenvectors A = (Q1^h * Q2) * V */ morse_pzgemm( MorseNoTrans, MorseNoTrans, - 1.0, &descQ2, &descV, + 1.0, &descQ2t, &descVt, 0.0, &descA, sequence, request ); } + morse_ztile2lap( morse, &descQ2l, &descQ2t, + MorseDescInput, MorseUpperLower, sequence, request ); + morse_ztile2lap( morse, &descVl, &descVt, + MorseDescInput, MorseUpperLower, sequence, request ); + morse_sequence_wait( morse, sequence ); + /* Cleanup the temporary data */ + morse_ztile2lap_cleanup( morse, &descQ2l, &descQ2t ); + morse_ztile2lap_cleanup( morse, &descVl, &descVt ); + free(subA); free(subQ); free(subT); - morse_desc_mat_free( &descQ2 ); free(Q2); - - /* Cleanup the temporary data */ - morse_desc_mat_free( &descV ); free(V); free(E); if (Dptr != NULL) { diff --git a/coreblas/compute/core_zhe2ge.c b/coreblas/compute/core_zhe2ge.c index c4b5f785a8934f16f3623150cdc2d15871ccc2a8..df2d6225acedb0ae7332e1c7ec5d8ec0bdc6b500 100644 --- a/coreblas/compute/core_zhe2ge.c +++ b/coreblas/compute/core_zhe2ge.c @@ -39,9 +39,8 @@ void CORE_zhe2ge(MORSE_enum uplo, int M, int N, MORSE_Complex64_t *Bptr, *BTptr; int i, j; - Aptr = A; - Bptr = B; - BTptr = B; + Aptr = A; + Bptr = B; if (uplo == MorseLower){ for (j = 0; j < N; j++){ @@ -57,23 +56,10 @@ void CORE_zhe2ge(MORSE_enum uplo, int M, int N, } Aptr += (LDA - i + j + 1); Bptr += (LDB - i + j + 1); - - /* Bptr[ j * LDB + j ] = A[ j * LDA + j ]; */ - - /* for (i = j+1; i < M; i++) { */ - /* Bptr [ j * LDB + i ] = A[ j * LDA + i ]; */ - /* BTptr[ i * LDB + j ] = conj(A[ j * LDA + i ]); */ - /* } */ } } else{ for (j = 0; j < N; j++){ - /* for (i = 0; i < j; i++) { */ - /* Bptr [ j * LDB + i ] = A[ j * LDA + i ]; */ - /* BTptr[ i * LDB + j ] = conj(A[ j * LDA + i ]); */ - /* } */ - /* Bptr[ j * LDB + j ] = A[ j * LDA + j ]; */ - BTptr = B + j; for (i = 0; i < j; i++, Bptr++, Aptr++, BTptr += LDB) { *Bptr = *Aptr; diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 7fa98e57072324ed6261b601ba94e108d2919c2d..56445309d66f1591da48dceb4690116a5c3f860c 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -41,35 +41,11 @@ void MORSE_TASK_zgemm(const MORSE_option_t *options, (void)nb; struct starpu_codelet *codelet = &cl_zgemm; void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; - int sizeA = lda*k; - int sizeB = ldb*n; - int sizeC = ldc*n; - int execution_rank = C->get_rankof( C, Cm, Cn ); - int rank_changed=0; - (void)execution_rank; - - /* force execution on the rank owning the largest data (tile) */ - int threshold; - char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD"); - - if (env != NULL) - threshold = (unsigned)atoi(env); - else - threshold = 10; - if ( sizeA > threshold*sizeC ){ - execution_rank = A->get_rankof( A, Am, An ); - rank_changed = 1; - }else if( sizeB > threshold*sizeC ){ - execution_rank = B->get_rankof( B, Bm, Bn ); - rank_changed = 1; - } MORSE_BEGIN_ACCESS_DECLARATION; MORSE_ACCESS_R(A, Am, An); MORSE_ACCESS_R(B, Bm, Bn); MORSE_ACCESS_RW(C, Cm, Cn); - if (rank_changed) - MORSE_RANK_CHANGED(execution_rank); MORSE_END_ACCESS_DECLARATION; starpu_insert_task( @@ -89,9 +65,6 @@ void MORSE_TASK_zgemm(const MORSE_option_t *options, STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, -#if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, execution_rank, -#endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zgemm", #endif diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 779597aaf1c823dd60da28ed4b7667bf827e1656..aa6a9bb2a8dc5b6bc22455396b261e87333c9ff4 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -40,29 +40,10 @@ void MORSE_TASK_ztrsm(const MORSE_option_t *options, (void)nb; struct starpu_codelet *codelet = &cl_ztrsm; void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; - int sizeA = lda*m; - int sizeB = ldb*n; - int execution_rank = B->get_rankof( B, Bm, Bn ); - int rank_changed=0; - (void)execution_rank; - - /* force execution on the rank owning the largest data (tile) */ - int threshold; - char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD"); - if (env != NULL) - threshold = (unsigned)atoi(env); - else - threshold = 10; - if ( sizeA > threshold*sizeB ){ - execution_rank = A->get_rankof( A, Am, An ); - rank_changed=1; - } MORSE_BEGIN_ACCESS_DECLARATION; MORSE_ACCESS_R(A, Am, An); MORSE_ACCESS_RW(B, Bm, Bn); - if (rank_changed) - MORSE_RANK_CHANGED(execution_rank); MORSE_END_ACCESS_DECLARATION; starpu_insert_task( @@ -80,9 +61,6 @@ void MORSE_TASK_ztrsm(const MORSE_option_t *options, STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, -#if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, execution_rank, -#endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "ztrsm", #endif diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index fe740bd88f0667d1edbdd6b1b64ba7e8dfd45785..7beabcf9e08ca6f306ff472d424445709088186c 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -120,34 +120,11 @@ void MORSE_TASK_zunmqr(const MORSE_option_t *options, { struct starpu_codelet *codelet = &cl_zunmqr; void (*callback)(void*) = options->profiling ? cl_zunmqr_callback : NULL; - int sizeA = lda*k; - int sizeT = ldt*n; - int sizeC = ldc*n; - int execution_rank = C->get_rankof( C, Cm, Cn ); - int rank_changed=0; - (void)execution_rank; - - /* force execution on the rank owning the largest data (tile) */ - int threshold; - char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD"); - if (env != NULL) - threshold = (unsigned)atoi(env); - else - threshold = 10; - if ( sizeA > threshold*sizeC ){ - execution_rank = A->get_rankof( A, Am, An ); - rank_changed = 1; - }else if( sizeT > threshold*sizeC ){ - execution_rank = T->get_rankof( T, Tm, Tn ); - rank_changed = 1; - } MORSE_BEGIN_ACCESS_DECLARATION; MORSE_ACCESS_R(A, Am, An); MORSE_ACCESS_R(T, Tm, Tn); MORSE_ACCESS_RW(C, Cm, Cn); - if (rank_changed) - MORSE_RANK_CHANGED(execution_rank); MORSE_END_ACCESS_DECLARATION; starpu_insert_task( @@ -169,9 +146,6 @@ void MORSE_TASK_zunmqr(const MORSE_option_t *options, STARPU_VALUE, &nb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, -#if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, execution_rank, -#endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zunmqr", #endif diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index bd804a02c784f65e89bfc29847ad188b072a2ea9..8c3990103c34208a297e2fc5e6da78a14920a745 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -145,8 +145,6 @@ void RUNTIME_desc_create( MORSE_desc_t *desc ) { int64_t lmt = desc->lmt; int64_t lnt = desc->lnt; - starpu_data_handle_t *tiles; - (void)tiles; desc->occurences = 1; @@ -156,7 +154,6 @@ void RUNTIME_desc_create( MORSE_desc_t *desc ) */ desc->schedopt = (void*)calloc(lnt*lmt,sizeof(starpu_data_handle_t)); assert(desc->schedopt); - tiles = (starpu_data_handle_t*)(desc->schedopt); #if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) /* diff --git a/timing/time_zgemm_tile.c b/timing/time_zgemm_tile.c index 4936a1a1ff14e8a0ee469d53dccc1d65d536de06..ae504b8bca796cb07c95b82ab0c6f0f629754b74 100644 --- a/timing/time_zgemm_tile.c +++ b/timing/time_zgemm_tile.c @@ -48,6 +48,9 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) #if !defined(CHAMELEON_SIMULATION) LAPACKE_zlarnv_work(1, ISEED, 1, &alpha); LAPACKE_zlarnv_work(1, ISEED, 1, &beta); +#else + alpha = 1.5; + beta = -2.3; #endif /* Save C for check */ diff --git a/timing/time_zgesvd_tile.c b/timing/time_zgesvd_tile.c index b7c8f2e86f4942208f8591b1961994da99d3feee..4f207aeb6a802fe79f4db848cf5b1ac44a87d562 100644 --- a/timing/time_zgesvd_tile.c +++ b/timing/time_zgesvd_tile.c @@ -36,7 +36,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); PASTE_CODE_ALLOCATE_MATRIX( VT, (jobvt == MorseVec), MORSE_Complex64_t, N, N ); - PASTE_CODE_ALLOCATE_MATRIX( U, (jobu == MorseVec), MORSE_Complex64_t, M, M ); + PASTE_CODE_ALLOCATE_MATRIX( U, (jobu == MorseVec), MORSE_Complex64_t, M, M ); PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 ); /* Initialiaze Data */ @@ -46,7 +46,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_Alloc_Workspace_zgesvd(N, N, &descT, 1, 1); if ( jobu == MorseVec ) { - LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', M, M, 0., 1., U, M); + LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', M, M, 0., 1., U, M); } if ( jobvt == MorseVec ) { LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', N, N, 0., 1., VT, N); @@ -56,18 +56,18 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) INFO = MORSE_zgesvd_Tile(jobu, jobvt, descA, S, descT, U, M, VT, N); STOP_TIMING(); - if(INFO!=0){ - printf(" ERROR OCCURED INFO %d\n",INFO); + if( INFO != 0 ) { + printf(" ERROR OCCURED INFO %d\n",INFO); } /* DeAllocate Workspace */ MORSE_Dealloc_Workspace(&descT); - if (jobu == MorseVec) { - free( U ); + if ( U != NULL ) { + free( U ); } - if (jobvt == MorseVec) { - free( VT ); + if ( VT != NULL) { + free( VT ); } PASTE_CODE_FREE_MATRIX( descA ); free( S ); diff --git a/timing/time_zlange_tile.c b/timing/time_zlange_tile.c index 4ffe9e77a1768e282577f9816e333a667de77326..d52d1898614918ab42f444fed5c804e641be4fbe 100644 --- a/timing/time_zlange_tile.c +++ b/timing/time_zlange_tile.c @@ -77,7 +77,8 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) free( A ); } #endif - PASTE_CODE_FREE_MATRIX( descA ); + PASTE_CODE_FREE_MATRIX( descA ); + (void)normmorse; return 0; } diff --git a/timing/timing.c b/timing/timing.c index 093ade83cec701cb3880aaa4ee36e91fe161880f..38c89fbfc4cfd80c5045cb759fca8780b004dd6b 100644 --- a/timing/timing.c +++ b/timing/timing.c @@ -160,7 +160,6 @@ Test(int64_t n, int *iparam) { fadds = (double)(_FADDS); fmuls = (double)(_FMULS); flops = 1e-9 * (fmuls * fp_per_mul + fadds * fp_per_add); - gflops = 0.0; if ( iparam[IPARAM_WARMUP] ) { int status = RunTest( iparam, dparam, &(t[0]));