diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c index 623a25517808356acaa7b61373e831a9b043452b..602f7f4e6cb923454d2381961ac7c89ff3f8af11 100644 --- a/compute/pzhetrd_he2hb.c +++ b/compute/pzhetrd_he2hb.c @@ -23,9 +23,9 @@ #define A(m, n) A, m, n #define T(m, n) T, m, n -#define D(k) D, (k)-1, 0 +#define D(k) &D, (k)-1, 0 -#define AT(k) AT, k, 0 +#define AT(k) &AT, k, 0 #if defined(CHAMELEON_COPY_DIAG) #define E(m, n) E, m, 0 @@ -42,8 +42,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, { CHAM_context_t *chamctxt; RUNTIME_option_t options; - CHAM_desc_t *D = NULL; - CHAM_desc_t *AT = NULL; + CHAM_desc_t D; + CHAM_desc_t AT; size_t ws_worker = 0; size_t ws_host = 0; @@ -87,15 +87,12 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ - D = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t)); - chameleon_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q); + chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, A->p, A->q ); - AT = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t)); - *AT = chameleon_desc_init( - ChamComplexDouble, A->mb, A->nb, (A->mb*A->nb), - chameleon_min(A->mt, A->nt) * A->mb, A->nb, 0, 0, chameleon_min(A->mt, A->nt) * A->mb, A->nb, 1, 1); - chameleon_desc_mat_alloc( AT ); - RUNTIME_desc_create( AT ); + chameleon_desc_init( &AT, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, A->nb, (A->mb*A->nb), + chameleon_min(A->mt, A->nt) * A->mb, A->nb, 0, 0, + chameleon_min(A->mt, A->nt) * A->mb, A->nb, 1, 1, + NULL, NULL, NULL ); /* Let's extract the diagonal in a temporary copy that contains A and A' */ for (k = 1; k < A->nt; k++){ @@ -437,8 +434,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, RUNTIME_options_finalize(&options, chamctxt); CHAMELEON_Sequence_Wait(sequence); - CHAMELEON_Desc_Destroy( &D ); - CHAMELEON_Desc_Destroy( &AT ); + chameleon_desc_destroy( &D ); + chameleon_desc_destroy( &AT ); (void)E; } diff --git a/compute/pzlange.c b/compute/pzlange.c index cefbf531476995539dbb808d028c11dd5cd572f3..3f549b17c355764ce3c18eba3d201c0f53f394bf 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -26,9 +26,8 @@ //WS_ADD : A->mb + A->nb #include "control/common.h" -#define A(m, n) A, (m), (n) -#define Wcol(m, n) Wcol, (m), (n) -#define Welt(m, n) Welt, (m), (n) +#define A( m, n ) A, (m), (n) +#define W( desc, m, n ) (desc), (m), (n) static inline void chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, @@ -63,21 +62,21 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, INSERT_TASK_ztrasm( options, ChamColumnwise, uplo, diag, tempmm, tempnn, - A(m, n), ldam, Wcol(m, n) ); + A(m, n), ldam, W( Wcol, m, n ) ); } else { INSERT_TASK_dzasum( options, ChamColumnwise, ChamUpperLower, tempmm, tempnn, - A(m, n), ldam, Wcol(m, n) ); + A(m, n), ldam, W( Wcol, m, n ) ); } if ( m >= P ) { INSERT_TASK_dgeadd( options, - ChamNoTrans, tempnn, 1, A->nb, - 1.0, Wcol(m, n), tempnn, - 1.0, Wcol(m%P, n), tempnn ); + ChamNoTrans, 1, tempnn, A->nb, + 1.0, W( Wcol, m, n ), 1, + 1.0, W( Wcol, m%P, n ), 1 ); } } @@ -88,15 +87,15 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, for(m = 1; m < P; n++) { INSERT_TASK_dgeadd( options, - ChamNoTrans, tempnn, 1, A->nb, - 1.0, Wcol(m, n), tempnn, - 1.0, Wcol(0, n), tempnn ); + ChamNoTrans, 1, tempnn, A->nb, + 1.0, W( Wcol, m, n ), 1, + 1.0, W( Wcol, 0, n ), 1 ); } INSERT_TASK_dlange( options, - ChamMaxNorm, tempnn, 1, A->nb, - Wcol(0, n), tempnn, Welt(0, n)); + ChamMaxNorm, 1, tempnn, A->nb, + W( Wcol, 0, n), 1, W( Welt, 0, n)); } /** @@ -106,7 +105,7 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, for(n = Q; n < NT; n++) { INSERT_TASK_dlange_max( options, - Welt(0, n), Welt(0, n%Q) ); + W( Welt, 0, n), W( Welt, 0, n%Q) ); } /** @@ -116,7 +115,7 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, for(n = 1; n < Q; n++) { INSERT_TASK_dlange_max( options, - Welt(0, n), Welt(0, 0) ); + W( Welt, 0, n), W( Welt, 0, 0) ); } } @@ -153,21 +152,21 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, INSERT_TASK_ztrasm( options, ChamRowwise, uplo, diag, tempmm, tempnn, - A(m, n), ldam, Wcol(m, n) ); + A(m, n), ldam, W( Wcol, m, n) ); } else { INSERT_TASK_dzasum( options, ChamRowwise, ChamUpperLower, tempmm, tempnn, - A(m, n), ldam, Wcol(m, n) ); + A(m, n), ldam, W( Wcol, m, n) ); } if ( n >= Q ) { INSERT_TASK_dgeadd( options, ChamNoTrans, tempmm, 1, A->mb, - 1.0, Wcol(m, n ), tempmm, - 1.0, Wcol(m, n%Q), tempmm ); + 1.0, W( Wcol, m, n ), tempmm, + 1.0, W( Wcol, m, n%Q), tempmm ); } } @@ -179,14 +178,14 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, INSERT_TASK_dgeadd( options, ChamNoTrans, tempmm, 1, A->mb, - 1.0, Wcol(m, n), tempmm, - 1.0, Wcol(m, 0), tempmm ); + 1.0, W( Wcol, m, n), tempmm, + 1.0, W( Wcol, m, 0), tempmm ); } INSERT_TASK_dlange( options, ChamMaxNorm, tempmm, 1, A->nb, - Wcol(m, 0), 1, Welt(m, 0)); + W( Wcol, m, 0), 1, W( Welt, m, 0)); } /** @@ -196,7 +195,7 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, for(m = P; m < MT; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(m%P, 0) ); + W( Welt, m, 0), W( Welt, m%P, 0) ); } /** @@ -206,7 +205,7 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, for(m = 1; m < P; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(0, 0) ); + W( Welt, m, 0), W( Welt, 0, 0) ); } } @@ -242,19 +241,19 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ INSERT_TASK_zlantr( options, ChamMaxNorm, uplo, diag, tempmm, tempnn, A->nb, - A(m, n), ldam, Welt(m, n)); + A(m, n), ldam, W( Welt, m, n)); } else { INSERT_TASK_zlange( options, ChamMaxNorm, tempmm, tempnn, A->nb, - A(m, n), ldam, Welt(m, n)); + A(m, n), ldam, W( Welt, m, n)); } if ( n >= Q ) { INSERT_TASK_dlange_max( options, - Welt(m, n), Welt(m, n%Q) ); + W( Welt, m, n), W( Welt, m, n%Q) ); } } @@ -265,7 +264,7 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ for(n = 1; n < Q; n++) { INSERT_TASK_dlange_max( options, - Welt(m, n), Welt(m, 0) ); + W( Welt, m, n), W( Welt, m, 0) ); } } @@ -276,7 +275,7 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ for(m = P; m < MT; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(m%P, 0) ); + W( Welt, m, 0), W( Welt, m%P, 0) ); } /** @@ -286,7 +285,7 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ for(m = 1; m < P; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(0, 0) ); + W( Welt, m, 0), W( Welt, 0, 0) ); } } @@ -322,18 +321,18 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ INSERT_TASK_ztrssq( options, uplo, diag, tempmm, tempnn, - A(m, n), ldam, Welt(m, n) ); + A(m, n), ldam, W( Welt, m, n) ); } else { INSERT_TASK_zgessq( options, tempmm, tempnn, - A(m, n), ldam, Welt(m, n) ); + A(m, n), ldam, W( Welt, m, n) ); } if ( n >= Q ) { INSERT_TASK_dplssq( - options, Welt(m, n), Welt(m, n%Q) ); + options, W( Welt, m, n), W( Welt, m, n%Q) ); } } @@ -343,7 +342,7 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ */ for(n = 1; n < Q; n++) { INSERT_TASK_dplssq( - options, Welt(m, n), Welt(m, 0) ); + options, W( Welt, m, n), W( Welt, m, 0) ); } } @@ -353,7 +352,7 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ */ for(m = P; m < MT; m++) { INSERT_TASK_dplssq( - options, Welt(m, 0), Welt(m%P, 0) ); + options, W( Welt, m, 0), W( Welt, m%P, 0) ); } /** @@ -362,11 +361,11 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ */ for(m = 1; m < P; m++) { INSERT_TASK_dplssq( - options, Welt(m, 0), Welt(0, 0) ); + options, W( Welt, m, 0), W( Welt, 0, 0) ); } INSERT_TASK_dplssq2( - options, Welt(0, 0) ); + options, W( Welt, 0, 0) ); } /** @@ -378,13 +377,13 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia { CHAM_context_t *chamctxt; RUNTIME_option_t options; - CHAM_desc_t *Wcol = NULL; - CHAM_desc_t *Welt = NULL; + CHAM_desc_t Wcol; + CHAM_desc_t Welt; double alpha = 0.0; double beta = 0.0; int workn, workmt, worknt; - int m, n; + int m, n, wcol_init = 0; chamctxt = chameleon_context_self(); if ( sequence->status != CHAMELEON_SUCCESS ) { @@ -402,11 +401,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia case ChamOneNorm: RUNTIME_options_ws_alloc( &options, 1, 0 ); - CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, 1, A->nb, A->nb, - workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q ); + chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, A->nb, A->nb, + workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q, + NULL, NULL, NULL ); + wcol_init = 1; - CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, - A->p, worknt, 0, 0, A->p, worknt, A->p, A->q ); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, + A->p, worknt, 0, 0, A->p, worknt, A->p, A->q, + NULL, NULL, NULL ); break; @@ -416,11 +418,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia case ChamInfNorm: RUNTIME_options_ws_alloc( &options, A->mb, 0 ); - CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, A->mb, 1, A->mb, - workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q ); + chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, A->mb, 1, A->mb, + workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, + NULL, NULL, NULL ); + wcol_init = 1; - CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, - workmt, A->q, 0, 0, workmt, A->q, A->p, A->q ); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, + workmt, A->q, 0, 0, workmt, A->q, A->p, A->q, + NULL, NULL, NULL ); break; /* @@ -430,8 +435,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia RUNTIME_options_ws_alloc( &options, 1, 0 ); alpha = 1.; - CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 2, 1, 2, - workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q ); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q, + NULL, NULL, NULL ); break; /* @@ -441,8 +447,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia default: RUNTIME_options_ws_alloc( &options, 1, 0 ); - CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, - workmt, workn, 0, 0, workmt, workn, A->p, A->q ); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, + workmt, workn, 0, 0, workmt, workn, A->p, A->q, + NULL, NULL, NULL ); } /* Initialize workspaces */ @@ -450,44 +457,44 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia (norm == ChamOneNorm) ) { /* Initialize Wcol tile */ - for(m = 0; m < Wcol->mt; m++) { - for(n = 0; n < Wcol->nt; n++) { + for(m = 0; m < Wcol.mt; m++) { + for(n = 0; n < Wcol.nt; n++) { INSERT_TASK_dlaset( &options, - ChamUpperLower, Wcol->mb, Wcol->nb, + ChamUpperLower, Wcol.mb, Wcol.nb, alpha, beta, - Wcol(m,n), Wcol->mb ); + W( &Wcol, m, n ), Wcol.mb ); } } } - for(m = 0; m < Welt->mt; m++) { - for(n = 0; n < Welt->nt; n++) { + for(m = 0; m < Welt.mt; m++) { + for(n = 0; n < Welt.nt; n++) { INSERT_TASK_dlaset( &options, - ChamUpperLower, Welt->mb, Welt->nb, + ChamUpperLower, Welt.mb, Welt.nb, alpha, beta, - Welt(m,n), Welt->mb ); + W( &Welt, m, n ), Welt.mb ); } } switch ( norm ) { case ChamOneNorm: - chameleon_pzlange_one( uplo, diag, A, Wcol, Welt, &options ); - CHAMELEON_Desc_Flush( Wcol, sequence ); + chameleon_pzlange_one( uplo, diag, A, &Wcol, &Welt, &options ); + CHAMELEON_Desc_Flush( &Wcol, sequence ); break; case ChamInfNorm: - chameleon_pzlange_inf( uplo, diag, A, Wcol, Welt, &options ); - CHAMELEON_Desc_Flush( Wcol, sequence ); + chameleon_pzlange_inf( uplo, diag, A, &Wcol, &Welt, &options ); + CHAMELEON_Desc_Flush( &Wcol, sequence ); break; case ChamFrobeniusNorm: - chameleon_pzlange_frb( uplo, diag, A, Welt, &options ); + chameleon_pzlange_frb( uplo, diag, A, &Welt, &options ); break; case ChamMaxNorm: default: - chameleon_pzlange_max( uplo, diag, A, Welt, &options ); + chameleon_pzlange_max( uplo, diag, A, &Welt, &options ); } /** @@ -499,20 +506,20 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia INSERT_TASK_dlacpy( &options, ChamUpperLower, 1, 1, 1, - Welt(0,0), 1, Welt(m, n), 1); + W( &Welt, 0, 0 ), 1, W( &Welt, m, n ), 1); } } } - CHAMELEON_Desc_Flush( Welt, sequence ); + CHAMELEON_Desc_Flush( &Welt, sequence ); RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)Welt->get_blkaddr(Welt, A->myrank / A->q, A->myrank % A->q ); + *result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ); - if ( Wcol != NULL ) { - CHAMELEON_Desc_Destroy( &Wcol ); + if ( wcol_init ) { + chameleon_desc_destroy( &Wcol ); } - CHAMELEON_Desc_Destroy( &Welt ); + chameleon_desc_destroy( &Welt ); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, chamctxt); diff --git a/compute/pzlansy.c b/compute/pzlansy.c index 1d327383865353c2a1f3c835ea0a69c2749340e4..b81041526222324726d7cb19febdf33300694b5f 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -27,9 +27,8 @@ #include <math.h> #include "control/common.h" -#define A(m, n) A, (m), (n) -#define Wcol(m, n) Wcol, (m), (n) -#define Welt(m, n) Welt, (m), (n) +#define A( m, n ) A, (m), (n) +#define W( desc, m, n ) (desc), (m), (n) static inline void chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, @@ -62,18 +61,18 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, INSERT_TASK_dzasum( options, ChamRowwise, uplo, tempmm, tempnn, - A(m, n), ldam, Wcol(m, n) ); + A(m, n), ldam, W( Wcol, m, n) ); } else { INSERT_TASK_dzasum( options, ChamRowwise, ChamUpperLower, tempmm, tempnn, - A(m, n), ldam, Wcol(m, n) ); + A(m, n), ldam, W( Wcol, m, n) ); INSERT_TASK_dzasum( options, ChamColumnwise, ChamUpperLower, tempmm, tempnn, - A(m, n), ldam, Wcol(n, m) ); + A(m, n), ldam, W( Wcol, n, m) ); } } } @@ -85,8 +84,8 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, INSERT_TASK_dgeadd( options, ChamNoTrans, tempmm, 1, A->nb, - 1.0, Wcol(m, n ), tempmm, - 1.0, Wcol(m, n%Q), tempmm ); + 1.0, W( Wcol, m, n ), tempmm, + 1.0, W( Wcol, m, n%Q), tempmm ); } /** @@ -97,34 +96,34 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, INSERT_TASK_dgeadd( options, ChamNoTrans, tempmm, 1, A->mb, - 1.0, Wcol(m, n), tempmm, - 1.0, Wcol(m, 0), tempmm ); + 1.0, W( Wcol, m, n), tempmm, + 1.0, W( Wcol, m, 0), tempmm ); } INSERT_TASK_dlange( options, ChamMaxNorm, tempmm, 1, A->nb, - Wcol(m, 0), 1, Welt(m, 0)); + W( Wcol, m, 0), 1, W( Welt, m, 0)); } /** * Step 3: - * For m in 0..P-1, Welt(m, n) = max( Wcol(m..mt[P], n ) ) + * For m in 0..P-1, W( Welt, m, n) = max( Wcol(m..mt[P], n ) ) */ for(m = P; m < MT; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(m%P, 0) ); + W( Welt, m, 0), W( Welt, m%P, 0) ); } /** * Step 4: - * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + * For each i, W( Welt, i, n) = max( W( Welt, 0..P-1, n) ) */ for(m = 1; m < P; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(0, 0) ); + W( Welt, m, 0), W( Welt, 0, 0) ); } } @@ -159,26 +158,26 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, INSERT_TASK_zlanhe( options, ChamMaxNorm, uplo, tempmm, A->nb, - A(m, n), ldam, Welt(m, n)); + A(m, n), ldam, W( Welt, m, n)); } else { INSERT_TASK_zlansy( options, ChamMaxNorm, uplo, tempmm, A->nb, - A(m, n), ldam, Welt(m, n)); + A(m, n), ldam, W( Welt, m, n)); } } else { INSERT_TASK_zlange( options, ChamMaxNorm, tempmm, tempnn, A->nb, - A(m, n), ldam, Welt(m, n)); + A(m, n), ldam, W( Welt, m, n)); } if ( n >= Q ) { INSERT_TASK_dlange_max( options, - Welt(m, n), Welt(m, n%Q) ); + W( Welt, m, n), W( Welt, m, n%Q) ); } } @@ -189,7 +188,7 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, for(n = 1; n < Q; n++) { INSERT_TASK_dlange_max( options, - Welt(m, n), Welt(m, 0) ); + W( Welt, m, n), W( Welt, m, 0) ); } } @@ -200,7 +199,7 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, for(m = P; m < MT; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(m%P, 0) ); + W( Welt, m, 0), W( Welt, m%P, 0) ); } /** @@ -210,7 +209,7 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, for(m = 1; m < P; m++) { INSERT_TASK_dlange_max( options, - Welt(m, 0), Welt(0, 0) ); + W( Welt, m, 0), W( Welt, 0, 0) ); } } @@ -245,21 +244,21 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, if ( trans == ChamConjTrans) { INSERT_TASK_zhessq( options, uplo, tempmm, - A(m, n), ldam, Welt(m, n) ); + A(m, n), ldam, W( Welt, m, n) ); } else { INSERT_TASK_zsyssq( options, uplo, tempmm, - A(m, n), ldam, Welt(m, n) ); + A(m, n), ldam, W( Welt, m, n) ); } } else { INSERT_TASK_zgessq( options, tempmm, tempnn, - A(m, n), ldam, Welt(m, n) ); + A(m, n), ldam, W( Welt, m, n) ); INSERT_TASK_zgessq( options, tempmm, tempnn, - A(m, n), ldam, Welt(n, m) ); + A(m, n), ldam, W( Welt, n, m) ); } } } @@ -267,16 +266,16 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, for(m = 0; m < MT; m++) { for(n = Q; n < NT; n++) { INSERT_TASK_dplssq( - options, Welt(m, n), Welt(m, n%Q) ); + options, W( Welt, m, n), W( Welt, m, n%Q) ); } /** * Step 2: - * For each j, W(m, j) = reduce( Welt(m, 0..Q-1) ) + * For each j, W(m, j) = reduce( W( Welt, m, 0..Q-1) ) */ for(n = 1; n < Q; n++) { INSERT_TASK_dplssq( - options, Welt(m, n), Welt(m, 0) ); + options, W( Welt, m, n), W( Welt, m, 0) ); } } @@ -286,7 +285,7 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, */ for(m = P; m < MT; m++) { INSERT_TASK_dplssq( - options, Welt(m, 0), Welt(m%P, 0) ); + options, W( Welt, m, 0), W( Welt, m%P, 0) ); } /** @@ -295,11 +294,11 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, */ for(m = 1; m < P; m++) { INSERT_TASK_dplssq( - options, Welt(m, 0), Welt(0, 0) ); + options, W( Welt, m, 0), W( Welt, 0, 0) ); } INSERT_TASK_dplssq2( - options, Welt(0, 0) ); + options, W( Welt, 0, 0) ); } /** @@ -311,13 +310,13 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra { CHAM_context_t *chamctxt; RUNTIME_option_t options; - CHAM_desc_t *Wcol = NULL; - CHAM_desc_t *Welt = NULL; + CHAM_desc_t Wcol; + CHAM_desc_t Welt; double alpha = 0.0; double beta = 0.0; int workn, workmt, worknt; - int m, n; + int m, n, wcol_init = 0; chamctxt = chameleon_context_self(); if ( sequence->status != CHAMELEON_SUCCESS ) { @@ -336,11 +335,14 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra case ChamInfNorm: RUNTIME_options_ws_alloc( &options, 1, 0 ); - CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, A->mb, 1, A->mb, - workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q ); + chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, A->mb, 1, A->mb, + workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, + NULL, NULL, NULL ); + wcol_init = 1; - CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, - workmt, A->q, 0, 0, workmt, A->q, A->p, A->q ); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, + workmt, A->q, 0, 0, workmt, A->q, A->p, A->q, + NULL, NULL, NULL ); break; /* @@ -350,8 +352,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra RUNTIME_options_ws_alloc( &options, 1, 0 ); alpha = 1.; - CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 2, 1, 2, - workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q ); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q, + NULL, NULL, NULL ); break; /* @@ -361,8 +364,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra default: RUNTIME_options_ws_alloc( &options, 1, 0 ); - CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, - workmt, workn, 0, 0, workmt, workn, A->p, A->q ); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, + workmt, workn, 0, 0, workmt, workn, A->p, A->q, + NULL, NULL, NULL ); } /* Initialize workspaces */ @@ -370,40 +374,40 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra (norm == ChamOneNorm) ) { /* Initialize Wcol tile */ - for(m = 0; m < Wcol->mt; m++) { - for(n = 0; n < Wcol->nt; n++) { + for(m = 0; m < Wcol.mt; m++) { + for(n = 0; n < Wcol.nt; n++) { INSERT_TASK_dlaset( &options, - ChamUpperLower, Wcol->mb, Wcol->nb, + ChamUpperLower, Wcol.mb, Wcol.nb, alpha, beta, - Wcol(m,n), Wcol->mb ); + W( &Wcol, m, n ), Wcol.mb ); } } } - for(m = 0; m < Welt->mt; m++) { - for(n = 0; n < Welt->nt; n++) { + for(m = 0; m < Welt.mt; m++) { + for(n = 0; n < Welt.nt; n++) { INSERT_TASK_dlaset( &options, - ChamUpperLower, Welt->mb, Welt->nb, + ChamUpperLower, Welt.mb, Welt.nb, alpha, beta, - Welt(m,n), Welt->mb ); + W( &Welt, m, n ), Welt.mb ); } } switch ( norm ) { case ChamOneNorm: case ChamInfNorm: - chameleon_pzlansy_inf( uplo, A, Wcol, Welt, &options ); - CHAMELEON_Desc_Flush( Wcol, sequence ); + chameleon_pzlansy_inf( uplo, A, &Wcol, &Welt, &options ); + CHAMELEON_Desc_Flush( &Wcol, sequence ); break; case ChamFrobeniusNorm: - chameleon_pzlansy_frb( trans, uplo, A, Welt, &options ); + chameleon_pzlansy_frb( trans, uplo, A, &Welt, &options ); break; case ChamMaxNorm: default: - chameleon_pzlansy_max( trans, uplo, A, Welt, &options ); + chameleon_pzlansy_max( trans, uplo, A, &Welt, &options ); } /** @@ -415,20 +419,20 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra INSERT_TASK_dlacpy( &options, ChamUpperLower, 1, 1, 1, - Welt(0,0), 1, Welt(m, n), 1); + W( &Welt, 0, 0 ), 1, W( &Welt, m, n ), 1); } } } - CHAMELEON_Desc_Flush( Welt, sequence ); + CHAMELEON_Desc_Flush( &Welt, sequence ); RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)Welt->get_blkaddr(Welt, A->myrank / A->q, A->myrank % A->q ); + *result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ); - if ( Wcol != NULL ) { - CHAMELEON_Desc_Destroy( &Wcol ); + if ( wcol_init ) { + chameleon_desc_destroy( &Wcol ); } - CHAMELEON_Desc_Destroy( &Welt ); + chameleon_desc_destroy( &Welt ); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, chamctxt); diff --git a/compute/zgelqf.c b/compute/zgelqf.c index 388fb71483bda011dd0aadfc102c29b828b739c1..8c0cc4bec038b8d787854331a87b3c8f33272a12 100644 --- a/compute/zgelqf.c +++ b/compute/zgelqf.c @@ -297,8 +297,7 @@ int CHAMELEON_zgelqf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c index 2330cad7862ff14a5d611656fa5e01af978afcf1..fecf0f20f31a8e1bff2314c9da0ba9cb6967a6cb 100644 --- a/compute/zgelqf_param.c +++ b/compute/zgelqf_param.c @@ -297,8 +297,7 @@ int CHAMELEON_zgelqf_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgelqs.c b/compute/zgelqs.c index 93db15cf40479feee90c75b7ffc41c2cc1d75636..08ec0dd3dcd37eb606db79e900464cda4686f5ad 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -339,8 +339,7 @@ int CHAMELEON_zgelqs_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c index 11f08cc57362e09583cd5ea264a9e9e113777810..b594dd602962c779e6450134376570ce8feadc04 100644 --- a/compute/zgelqs_param.c +++ b/compute/zgelqs_param.c @@ -346,8 +346,7 @@ int CHAMELEON_zgelqs_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgels.c b/compute/zgels.c index 7118f3e58cfbb6e52eefba84c762adad905feb03..99d7914da27fd1cdc88463a70e07ba5fee7d4675 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -432,8 +432,7 @@ int CHAMELEON_zgels_Tile_Async( cham_trans_t trans, CHAM_desc_t *A, CHAMELEON_Desc_Flush( B, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgels_param.c b/compute/zgels_param.c index 7b41e4cdfc196e3a2dd9a6433222e2a0819fc050..ea23c9a4919594174e511c4a3256b61c9233ba4d 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -428,8 +428,7 @@ int CHAMELEON_zgels_param_Tile_Async( const libhqr_tree_t *qrtree, cham_trans_t CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index 8e0d642ed787504dc3e22d8cbdf38399d86bc931..f365fc5106713605e8a485b5193412494c95e51b 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -296,8 +296,7 @@ int CHAMELEON_zgeqrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c index 34171ce2229bd0858387acab5b27d769edab8927..36cb65d2ad8e391e8b6cd16f425b18e93ea9bf98 100644 --- a/compute/zgeqrf_param.c +++ b/compute/zgeqrf_param.c @@ -319,8 +319,7 @@ int CHAMELEON_zgeqrf_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c index f38b05dbc5e9425d4271fe1a2d66d5dcbb439db8..ee2d2bc6d43636e7da152fd1b39ed0e91ebaa803 100644 --- a/compute/zgeqrs.c +++ b/compute/zgeqrs.c @@ -334,8 +334,7 @@ int CHAMELEON_zgeqrs_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c index 79a6280f63f7b13e8cfd44377d9461cea19e741b..15a5ff0d64df8e8dc001aee59844fef3fcf34e40 100644 --- a/compute/zgeqrs_param.c +++ b/compute/zgeqrs_param.c @@ -335,8 +335,7 @@ int CHAMELEON_zgeqrs_param_Tile_Async( const libhqr_tree_t *qrtree, CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgesv_incpiv.c b/compute/zgesv_incpiv.c index 0e0d330f86ee53bf2bbf5a6ceff678c9b2ab6985..6dd3073f6016555191f353a3b30492edc10743a7 100644 --- a/compute/zgesv_incpiv.c +++ b/compute/zgesv_incpiv.c @@ -331,8 +331,7 @@ int CHAMELEON_zgesv_incpiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV CHAMELEON_Desc_Flush( Dptr, sequence ); CHAMELEON_Desc_Flush( B, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgesvd.c b/compute/zgesvd.c index 05f72f68c58839e99ae0fe2581602d82473c2771..a9ba03d418296d6db8f7c78c0277f58f93be690d 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -471,7 +471,7 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt, #if defined(CHAMELEON_COPY_DIAG) { - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); + chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, A->p, A->q ); Dptr = &D; } #endif @@ -480,12 +480,12 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt, sequence, request ); /* Allocate band structure */ - chameleon_zdesc_alloc_diag( descAB, - LDAB, NB, - LDAB, MINMN, - 0, 0, - LDAB, MINMN, - 1, 1 ); + chameleon_zdesc_alloc( descAB, + LDAB, NB, /* mb, nb */ + LDAB, N, /* lm, ln */ + 0, 0, /* i, j */ + LDAB, N, /* m, n */ + ); /* Convert matrix to band form */ chameleon_pztile2band( uplo, @@ -559,7 +559,7 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt, chameleon_sequence_wait( chamctxt, sequence ); #endif /* !defined(CHAMELEON_SIMULATION) */ - chameleon_desc_mat_free( &descAB ); + chameleon_desc_destroy( &descAB ); subA = NULL; subT = NULL; @@ -640,8 +640,7 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt, free(E); if ( Dptr ) { - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zgetrf_incpiv.c b/compute/zgetrf_incpiv.c index 92f3a6ddc2509089ff5cde9b7aebd1a135662090..f990126ebc1a351e7164c4c3cebb114aa3d4c422 100644 --- a/compute/zgetrf_incpiv.c +++ b/compute/zgetrf_incpiv.c @@ -298,8 +298,7 @@ int CHAMELEON_zgetrf_incpiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *L, int *IPI CHAMELEON_Desc_Flush( L, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zheevd.c b/compute/zheevd.c index a640fe54f9634b1da661a313f0fee0f3b8520eea..7f1a8b497ae733f1364976b1fdd5ad061b4e7b01 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -534,9 +534,8 @@ int CHAMELEON_zheevd_Tile_Async( cham_job_t jobz, cham_uplo_t uplo, free(Q2); free(V); free(E); - if (Dptr != NULL) { - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + if ( Dptr != NULL ) { + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zhetrd.c b/compute/zhetrd.c index 39deedc70ac1cd6dd467af5499ca421104e521d2..0815e1dd689c39e660a729c133efdade2487e92a 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -392,8 +392,7 @@ int CHAMELEON_zhetrd_Tile_Async( cham_job_t jobz, NB = descA.mb; #if defined(CHAMELEON_COPY_DIAG) { - chameleon_zdesc_alloc_diag( D, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, - 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q ); + chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, A->p, A->q ); Dptr = &D; } #endif @@ -404,12 +403,12 @@ int CHAMELEON_zhetrd_Tile_Async( cham_job_t jobz, LDAB = NB+1; /* Allocate band structure */ - chameleon_zdesc_alloc_diag( descAB, - LDAB, NB, /* mb, nb */ - LDAB, N, /* lm, ln */ - 0, 0, /* i, j */ - LDAB, N, /* m, n */ - 1, 1 ); + chameleon_zdesc_alloc( descAB, + LDAB, NB, /* mb, nb */ + LDAB, N, /* lm, ln */ + 0, 0, /* i, j */ + LDAB, N, /* m, n */ + ); /* Copy data into band structure */ chameleon_pztile2band( uplo, A, &descAB, @@ -432,10 +431,9 @@ int CHAMELEON_zhetrd_Tile_Async( cham_job_t jobz, } #endif /* !defined(CHAMELEON_SIMULATION) */ if (Dptr != NULL) { - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } - chameleon_desc_mat_free( &descAB ); + chameleon_desc_destroy( &descAB ); (void)D; return CHAMELEON_SUCCESS; } diff --git a/compute/ztile.c b/compute/ztile.c index 8214c4c6141373c443c0a25d979fca976084752c..d95a729b7bb430b4fe99fcee7275fe4ef2055252 100644 --- a/compute/ztile.c +++ b/compute/ztile.c @@ -77,8 +77,8 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t /* Create the B descriptor to handle the Lapack format matrix */ CHAMELEON_Desc_Create_User( &B, Af77, ChamComplexDouble, A->mb, A->nb, A->bsiz, - LDA, A->n, 0, 0, A->m, A->n, 1, 1, - chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); + LDA, A->n, 0, 0, A->m, A->n, 1, 1, + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); /* Start the computation */ chameleon_sequence_create( chamctxt, &sequence ); @@ -154,8 +154,8 @@ int CHAMELEON_zTile_to_Lapack( CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int /* Create the B descriptor to handle the Lapack format matrix */ CHAMELEON_Desc_Create_User( &B, Af77, ChamComplexDouble, A->mb, A->nb, A->bsiz, - LDA, A->n, 0, 0, A->m, A->n, 1, 1, - chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); + LDA, A->n, 0, 0, A->m, A->n, 1, 1, + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); /* Start the computation */ chameleon_sequence_create( chamctxt, &sequence ); diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 0512caeda802c5cb0b299486b053270a7a002bc4..4be3c82932a45caa1861f42068ca2d2b53c73616 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -426,8 +426,7 @@ int CHAMELEON_ztpgqrt_Tile_Async( int L, CHAMELEON_Desc_Flush( Q2, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zunglq.c b/compute/zunglq.c index 1851f381a1ac51b1522a6c5ea567caeab6a1add5..41015464634f910376b21d6d3f9df91dd83d4371 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -322,8 +322,7 @@ int CHAMELEON_zunglq_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c index a0571b4fa48ce53fccf6f1d6313ebd3b44873d1e..e6d36954211c8a931a7d8e9aa2b96a55c1953557 100644 --- a/compute/zunglq_param.c +++ b/compute/zunglq_param.c @@ -321,8 +321,7 @@ int CHAMELEON_zunglq_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zungqr.c b/compute/zungqr.c index 4f15a5a1695b97adf7f1fde0a34aef0750af10b0..c51539616760f3c45ceecf2b1f631846fc5443fc 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -319,8 +319,7 @@ int CHAMELEON_zungqr_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 0f17560c32b96be73b8cdba829afbb282e537433..5b46d66ff7626ee058fb7119d0ee344728509a3e 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -328,8 +328,7 @@ int CHAMELEON_zungqr_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zunmlq.c b/compute/zunmlq.c index 896c5d47d34c32cf494e9b84a409eca7b155933b..f460e12e4b353fa1746f03a9bfd4b9706e8f564b 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -381,8 +381,7 @@ int CHAMELEON_zunmlq_Tile_Async( cham_side_t side, cham_trans_t trans, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c index 6ca36dff3e1872eca47a27111914dcbe465f77fa..4c0a72358b357b1c3e83253c9dc0844606e2bea4 100644 --- a/compute/zunmlq_param.c +++ b/compute/zunmlq_param.c @@ -382,8 +382,7 @@ int CHAMELEON_zunmlq_param_Tile_Async( const libhqr_tree_t *qrtree, cham_side_t CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zunmqr.c b/compute/zunmqr.c index 0ddbaa8537774f8640e46dab2b3b8badad3fbb0a..6271ed98eeedbcb23bb88ff909ad303fa8d70c42 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -384,8 +384,7 @@ int CHAMELEON_zunmqr_Tile_Async( cham_side_t side, cham_trans_t trans, CHAMELEON_Desc_Flush( T, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index a8e6cc5920f972eb3d12a8e5804b9a521606e23b..5674ba090c9ceb0c581b383993b58a8ec0fb6573 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -389,8 +389,7 @@ int CHAMELEON_zunmqr_param_Tile_Async( const libhqr_tree_t *qrtree, CHAMELEON_Desc_Flush( TT, sequence ); CHAMELEON_Desc_Flush( Dptr, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - RUNTIME_desc_destroy( Dptr ); - chameleon_desc_mat_free( Dptr ); + chameleon_desc_destroy( Dptr ); } (void)D; return CHAMELEON_SUCCESS; diff --git a/control/compute_z.h b/control/compute_z.h index 3682d99cc71356fd4ad5100ca7e91d0208a86356..3229f1389f86287bdd0fd355f3934c25f67a99ab 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -25,34 +25,6 @@ #ifndef _compute_z_h_ #define _compute_z_h_ -/** - * LAPACK/Tile Descriptor accesses - */ -#define ChamDescInput 1 -#define ChamDescOutput 2 -#define ChamDescInout (ChamDescInput | ChamDescOutput) - -/** - * Macro for matrix conversion / Lapack interface - */ -#define chameleon_zdesc_alloc_diag( descA, mb, nb, lm, ln, i, j, m, n, p, q) \ - descA = chameleon_desc_init_diag( \ - ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \ - (m), (n), (i), (j), (m), (n), p, q); \ - chameleon_desc_mat_alloc( &(descA) ); \ - RUNTIME_desc_create( &(descA) ); - -#define chameleon_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \ - descA = chameleon_desc_init( \ - ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \ - (m), (n), (i), (j), (m), (n), 1, 1); \ - if ( chameleon_desc_mat_alloc( &(descA) ) ) { \ - chameleon_error( __func__, "chameleon_desc_mat_alloc() failed"); \ - {free;}; \ - return CHAMELEON_ERR_OUT_OF_RESOURCES; \ - } \ - RUNTIME_desc_create( &(descA) ); - /** * Declarations of internal sequential functions */ @@ -148,6 +120,41 @@ void chameleon_pzungqr_param( int genD, int K, const libhqr_tree_t *qrtree, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); + +/** + * LAPACK/Tile Descriptor accesses + */ +#define ChamDescInput 1 +#define ChamDescOutput 2 +#define ChamDescInout (ChamDescInput | ChamDescOutput) + +/** + * Macro for matrix conversion / Lapack interface + */ +static inline int +chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int q ) { + int diag_m = chameleon_min( m, n ); + return chameleon_desc_init( descA, CHAMELEON_MAT_ALLOC_GLOBAL, + ChamComplexDouble, nb, nb, nb*nb, + diag_m, nb, 0, 0, diag_m, nb, p, q, + chameleon_getaddr_diag, + chameleon_getblkldd_ccrb, + chameleon_getrankof_2d_diag ); +} + +#define chameleon_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \ + { \ + int rc; \ + rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_GLOBAL, \ + ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \ + (m), (n), (i), (j), (m), (n), 1, 1, \ + NULL, NULL, NULL ); \ + if ( rc != CHAMELEON_SUCCESS ) { \ + {free;} \ + return rc; \ + } \ + } + /** * @brief Internal function to convert the lapack format to tile format in * LAPACK interface calls @@ -160,35 +167,28 @@ chameleon_zlap2tile( CHAM_context_t *chamctxt, RUNTIME_sequence_t *seq, RUNTIME_request_t *req ) { /* Initialize the Lapack descriptor */ - *descAl = chameleon_desc_init_user( ChamComplexDouble, mb, nb, (mb)*(nb), - lm, ln, 0, 0, m, n, 1, 1, - chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); - descAl->mat = A; + chameleon_desc_init( descAl, A, ChamComplexDouble, mb, nb, (mb)*(nb), + lm, ln, 0, 0, m, n, 1, 1, + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); descAl->styp = ChamCM; - /* Initialize the tile descriptor */ - *descAt = chameleon_desc_init( ChamComplexDouble, mb, nb, (mb)*(nb), - lm, ln, 0, 0, m, n, 1, 1 ); - if ( CHAMELEON_TRANSLATION == ChamOutOfPlace ) { - if ( chameleon_desc_mat_alloc( descAt ) ) { - chameleon_error( "chameleon_zlap2tile", "chameleon_desc_mat_alloc() failed"); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - - RUNTIME_desc_create( descAl ); - RUNTIME_desc_create( descAt ); + /* Initialize the tile descriptor */ + chameleon_desc_init( descAt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, mb, nb, (mb)*(nb), + lm, ln, 0, 0, m, n, 1, 1, + chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, NULL ); if ( mode & ChamDescInput ) { chameleon_pzlacpy( uplo, descAl, descAt, seq, req ); } } else { - chameleon_fatal_error( "chameleon_zlap2tile", "INPLACE translation not supported yet"); - descAt->mat = A; + /* Initialize the tile descriptor */ + chameleon_desc_init( descAt, A, ChamComplexDouble, mb, nb, (mb)*(nb), + lm, ln, 0, 0, m, n, 1, 1, + chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, NULL ); - RUNTIME_desc_create( descAl ); - RUNTIME_desc_create( descAt ); + chameleon_fatal_error( "chameleon_zlap2tile", "INPLACE translation not supported yet"); if ( mode & ChamDescInput ) { /* CHAMELEON_zgecfi_Async( lm, ln, A, ChamCM, mb, nb, */ @@ -235,11 +235,8 @@ chameleon_ztile2lap( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t static inline void chameleon_ztile2lap_cleanup( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t *descAt ) { - if ( CHAMELEON_TRANSLATION == ChamOutOfPlace ) { - chameleon_desc_mat_free( descAt ); - } - RUNTIME_desc_destroy( descAl ); - RUNTIME_desc_destroy( descAt ); + chameleon_desc_destroy( descAl ); + chameleon_desc_destroy( descAt ); } #endif /* _compute_z_h_ */ diff --git a/control/descriptor.c b/control/descriptor.c index 7216518491ea24858167d648ab3ff69764b385ff..c27fe5749f2164bd558fedffc9d9f0ec9208552e 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -31,12 +31,49 @@ static int nbdesc = 0; +/** + * + */ +int chameleon_desc_mat_alloc( CHAM_desc_t *desc ) +{ + size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) + * (size_t)CHAMELEON_Element_Size(desc->dtyp); + if ((desc->mat = RUNTIME_malloc(size)) == NULL) { + chameleon_error("chameleon_desc_mat_alloc", "malloc() failed"); + return CHAMELEON_ERR_OUT_OF_RESOURCES; + } + + /* The matrix has already been registered by the Runtime alloc */ + desc->register_mat = 0; + + return CHAMELEON_SUCCESS; +} + +/** + * + */ +int chameleon_desc_mat_free( CHAM_desc_t *desc ) +{ + if ( (desc->mat != NULL) && + (desc->use_mat == 1 ) && + (desc->alloc_mat == 1 ) ) + { + size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) + * (size_t)CHAMELEON_Element_Size(desc->dtyp); + + RUNTIME_free(desc->mat, size); + desc->mat = NULL; + } + + return CHAMELEON_SUCCESS; +} + /** ****************************************************************************** * * @ingroup Descriptor * - * chameleon_desc_init_user - Internal function to create tiled matrix descriptor + * @brief Internal function to create tiled matrix descriptor * with generic function for data distribution and storage format. * ****************************************************************************** @@ -96,121 +133,132 @@ static int nbdesc = 0; * @return The descriptor with the matrix description parameters set. * */ -CHAM_desc_t chameleon_desc_init_user(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, - int m, int n, int p, int q, - void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), - int (*get_blkldd) ( const CHAM_desc_t*, int ), - int (*get_rankof) ( const CHAM_desc_t*, int, int )) +int chameleon_desc_init( CHAM_desc_t *desc, void *mat, + cham_flttype_t dtyp, int mb, int nb, int bsiz, + int lm, int ln, int i, int j, + int m, int n, int p, int q, + void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), + int (*get_blkldd) ( const CHAM_desc_t*, int ), + int (*get_rankof) ( const CHAM_desc_t*, int, int ) ) { CHAM_context_t *chamctxt; - CHAM_desc_t desc; + int rc = CHAMELEON_SUCCESS; - memset( &desc, 0, sizeof(CHAM_desc_t) ); + memset( desc, 0, sizeof(CHAM_desc_t) ); chamctxt = chameleon_context_self(); if (chamctxt == NULL) { chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON not initialized"); - return desc; + return CHAMELEON_ERR_NOT_INITIALIZED; } // If one of the function get_* is NULL, we switch back to the default, like in chameleon_desc_init() - desc.get_blkaddr = get_blkaddr ? get_blkaddr : chameleon_getaddr_ccrb; - desc.get_blkldd = get_blkldd ? get_blkldd : chameleon_getblkldd_ccrb; - desc.get_rankof = get_rankof ? get_rankof : chameleon_getrankof_2d; + desc->get_blkaddr = get_blkaddr ? get_blkaddr : chameleon_getaddr_ccrb; + desc->get_blkldd = get_blkldd ? get_blkldd : chameleon_getblkldd_ccrb; + desc->get_rankof = get_rankof ? get_rankof : chameleon_getrankof_2d; // Matrix properties - desc.dtyp = dtyp; + desc->dtyp = dtyp; // Should be given as parameter to follow get_blkaddr (unused) - desc.styp = ChamCCRB; - desc.mb = mb; - desc.nb = nb; - desc.bsiz = bsiz; + desc->styp = ChamCCRB; + desc->mb = mb; + desc->nb = nb; + desc->bsiz = bsiz; // Large matrix parameters - desc.lm = lm; - desc.ln = ln; + desc->lm = lm; + desc->ln = ln; // Large matrix derived parameters - desc.lmt = (lm%mb==0) ? (lm/mb) : (lm/mb+1); - desc.lnt = (ln%nb==0) ? (ln/nb) : (ln/nb+1); + desc->lmt = (lm%mb==0) ? (lm/mb) : (lm/mb+1); + desc->lnt = (ln%nb==0) ? (ln/nb) : (ln/nb+1); // Submatrix parameters - desc.i = i; - desc.j = j; - desc.m = m; - desc.n = n; + desc->i = i; + desc->j = j; + desc->m = m; + desc->n = n; // Submatrix derived parameters - desc.mt = (m == 0) ? 0 : (i+m-1)/mb - i/mb + 1; - desc.nt = (n == 0) ? 0 : (j+n-1)/nb - j/nb + 1; + desc->mt = (m == 0) ? 0 : (i+m-1)/mb - i/mb + 1; + desc->nt = (n == 0) ? 0 : (j+n-1)/nb - j/nb + 1; - desc.id = nbdesc; nbdesc++; - desc.occurences = 0; - desc.use_mat = 1; - desc.alloc_mat = 1; - desc.register_mat = (chamctxt->ncudas > 0) ? 1 : 0; - desc.ooc = 0; + desc->id = nbdesc; + nbdesc++; + desc->occurences = 0; - desc.myrank = RUNTIME_comm_rank( chamctxt ); + desc->myrank = RUNTIME_comm_rank( chamctxt ); // Grid size - desc.p = p; - desc.q = q; + desc->p = p; + desc->q = q; // Local dimensions in tiles - if ( desc.myrank < (p*q) ) { - desc.llmt = (desc.lmt + p - 1) / p; - desc.llnt = (desc.lnt + q - 1) / q; + if ( desc->myrank < (p*q) ) { + desc->llmt = (desc->lmt + p - 1) / p; + desc->llnt = (desc->lnt + q - 1) / q; // Local dimensions - if ( ((desc.lmt-1) % p) == (desc.myrank / q) ) { - desc.llm = ( desc.llmt - 1 ) * mb + ((lm%mb==0) ? mb : (lm%mb)); + if ( ((desc->lmt-1) % p) == (desc->myrank / q) ) { + desc->llm = ( desc->llmt - 1 ) * mb + ((lm%mb==0) ? mb : (lm%mb)); } else { - desc.llm = desc.llmt * mb; + desc->llm = desc->llmt * mb; } - if ( ((desc.lnt-1) % q) == (desc.myrank % q) ) { - desc.lln = ( desc.llnt - 1 ) * nb + ((ln%nb==0) ? nb : (ln%nb)); + if ( ((desc->lnt-1) % q) == (desc->myrank % q) ) { + desc->lln = ( desc->llnt - 1 ) * nb + ((ln%nb==0) ? nb : (ln%nb)); } else { - desc.lln = desc.llnt * nb; + desc->lln = desc->llnt * nb; } - desc.llm1 = (desc.llm/mb); - desc.lln1 = (desc.lln/nb); + desc->llm1 = (desc->llm/mb); + desc->lln1 = (desc->lln/nb); } else { - desc.llmt = 0; - desc.llnt = 0; - desc.llm = 0; - desc.lln = 0; - desc.llm1 = 0; - desc.lln1 = 0; + desc->llmt = 0; + desc->llnt = 0; + desc->llm = 0; + desc->lln = 0; + desc->llm1 = 0; + desc->lln1 = 0; } + /* memory of the matrix is handled by the user */ + desc->alloc_mat = 0; + /* if the user gives a pointer to the overall data (tiles) we can use it */ + desc->use_mat = 0; + /* users data can have multiple forms: let him register tiles */ + desc->register_mat = 0; + /* The matrix is alocated tile by tile with out of core */ + desc->ooc = 0; + // Matrix address - desc.mat = NULL; - desc.A21 = (size_t)(desc.llm - desc.llm%mb)*(size_t)(desc.lln - desc.lln%nb); - desc.A12 = (size_t)( desc.llm%mb)*(size_t)(desc.lln - desc.lln%nb) + desc.A21; - desc.A22 = (size_t)(desc.llm - desc.llm%mb)*(size_t)( desc.lln%nb) + desc.A12; + if ( mat == CHAMELEON_MAT_ALLOC_GLOBAL ) { + rc = chameleon_desc_mat_alloc( desc ); - return desc; -} + desc->alloc_mat = 1; + desc->use_mat = 1; + } + else if ( mat == CHAMELEON_MAT_ALLOC_TILE ) { + //chameleon_error( "chameleon_desc_init", "CHAMELEON_MAT_ALLOC_TILE is not available yet" ); + //desc->mat = NULL; + rc = chameleon_desc_mat_alloc( desc ); + desc->use_mat = 1; -/** - * Internal static descriptor initializer - */ -CHAM_desc_t chameleon_desc_init(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, - int m, int n, int p, int q) -{ - return chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d); -} + desc->alloc_mat = 1; + } + else if ( mat == CHAMELEON_MAT_OOC ) { + desc->mat = NULL; + desc->ooc = 1; + } + else { + /* memory of the matrix is handled by users */ + desc->mat = mat; + desc->use_mat = 1; + } -/** - * Internal static descriptor initializer for a block diagonal matrix - */ -CHAM_desc_t chameleon_desc_init_diag(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, - int m, int n, int p, int q) -{ - return chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d_diag); + desc->A21 = (size_t)(desc->llm - desc->llm%mb)*(size_t)(desc->lln - desc->lln%nb); + desc->A12 = (size_t)( desc->llm%mb)*(size_t)(desc->lln - desc->lln%nb) + desc->A21; + desc->A22 = (size_t)(desc->llm - desc->llm%mb)*(size_t)( desc->lln%nb) + desc->A12; + + /* Create runtime specific structure like registering data */ + RUNTIME_desc_create( desc ); + + return rc; } /** @@ -248,6 +296,12 @@ CHAM_desc_t* chameleon_desc_submatrix(CHAM_desc_t *descA, int i, int j, int m, i return descB; } +void chameleon_desc_destroy( CHAM_desc_t *desc ) +{ + RUNTIME_desc_destroy( desc ); + chameleon_desc_mat_free( desc ); +} + /** * Check for descriptor correctness */ @@ -295,42 +349,6 @@ int chameleon_desc_check(const CHAM_desc_t *desc) return CHAMELEON_SUCCESS; } -/** - * - */ -int chameleon_desc_mat_alloc( CHAM_desc_t *desc ) -{ - size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) - * (size_t)CHAMELEON_Element_Size(desc->dtyp); - if ((desc->mat = RUNTIME_malloc(size)) == NULL) { - chameleon_error("chameleon_desc_mat_alloc", "malloc() failed"); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - - /* The matrix has already been registered by the Runtime alloc */ - desc->register_mat = 0; - - return CHAMELEON_SUCCESS; -} - -/** - * - */ -int chameleon_desc_mat_free( CHAM_desc_t *desc ) -{ - if ( (desc->mat != NULL) && - (desc->use_mat == 1 ) && - (desc->alloc_mat == 1 ) ) - { - size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) - * (size_t)CHAMELEON_Element_Size(desc->dtyp); - - RUNTIME_free(desc->mat, size); - desc->mat = NULL; - } - return CHAMELEON_SUCCESS; -} - /** ***************************************************************************** * @@ -393,63 +411,12 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc ) * \retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Desc_Create(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q) +int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, + int lm, int ln, int i, int j, int m, int n, int p, int q ) { - CHAM_context_t *chamctxt; - CHAM_desc_t *desc; - int status; - - *descptr = NULL; - - chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON not initialized"); - return CHAMELEON_ERR_NOT_INITIALIZED; - } - - /* Allocate memory and initialize the descriptor */ - desc = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t)); - if (desc == NULL) { - chameleon_error("CHAMELEON_Desc_Create", "malloc() failed"); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - *desc = chameleon_desc_init(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q); - - if (mat == NULL) { - - size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) - * (size_t)CHAMELEON_Element_Size(desc->dtyp); - - if ((desc->mat = RUNTIME_malloc(size)) == NULL) { - chameleon_error("CHAMELEON_Desc_Create", "malloc() failed"); - free(desc); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - desc->use_mat = 1; - desc->alloc_mat = 1; - desc->register_mat = 0; - - } else { - desc->mat = mat; - /* memory of the matrix is handled by users */ - desc->alloc_mat = 0; - desc->use_mat = 1; - desc->register_mat = 0; - } - - /* Create scheduler structure like registering data */ - RUNTIME_desc_create( desc ); - - status = chameleon_desc_check( desc ); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_Desc_Create", "invalid descriptor"); - CHAMELEON_Desc_Destroy( &desc ); - return status; - } - - *descptr = desc; - return CHAMELEON_SUCCESS; + return CHAMELEON_Desc_Create_User( descptr, mat, dtyp, mb, nb, bsiz, + lm, ln, i, j, m, n, p, q, + NULL, NULL, NULL ); } /** @@ -507,11 +474,11 @@ int CHAMELEON_Desc_Create(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, * \retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q, - void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), - int (*get_blkldd) ( const CHAM_desc_t*, int ), - int (*get_rankof) ( const CHAM_desc_t*, int, int )) +int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, + int lm, int ln, int i, int j, int m, int n, int p, int q, + void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), + int (*get_blkldd) ( const CHAM_desc_t*, int ), + int (*get_rankof) ( const CHAM_desc_t*, int, int ) ) { CHAM_context_t *chamctxt; CHAM_desc_t *desc; @@ -532,22 +499,9 @@ int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t return CHAMELEON_ERR_OUT_OF_RESOURCES; } - *desc = chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - get_blkaddr, get_blkldd, get_rankof); - - /* if the user gives a pointer to the overall data (tiles) we can use it */ - desc->use_mat = (mat == NULL) ? 0 : 1; - - /* memory of the matrix is handled by the user */ - desc->alloc_mat = 0; - - /* users data can have multiple forms: let him register tiles */ - desc->register_mat = 0; - - desc->mat = mat; - - /* Create runtime specific structure like registering data */ - RUNTIME_desc_create( desc ); + chameleon_desc_init( desc, mat, dtyp, mb, nb, bsiz, + lm, ln, i, j, m, n, p, q, + get_blkaddr, get_blkldd, get_rankof ); status = chameleon_desc_check( desc ); if (status != CHAMELEON_SUCCESS) { @@ -616,47 +570,11 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i chameleon_error("CHAMELEON_Desc_Create_OOC_User", "Only StarPU supports on-demand tile allocation"); return CHAMELEON_ERR_NOT_SUPPORTED; #else - CHAM_context_t *chamctxt; - CHAM_desc_t *desc; - int status; - - *descptr = NULL; - - chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_error("CHAMELEON_Desc_Create_OOC_User", "CHAMELEON not initialized"); - return CHAMELEON_ERR_NOT_INITIALIZED; - } - - /* Allocate memory and initialize the descriptor */ - desc = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t)); - if ( desc == NULL ) { - chameleon_error("CHAMELEON_Desc_Create_OOC_User", "malloc() failed"); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - *desc = chameleon_desc_init_user( dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - chameleon_getaddr_null, NULL, get_rankof ); - - /* memory of the matrix is completely handled by runtime */ - desc->use_mat = 0; - desc->alloc_mat = 0; - desc->register_mat = 0; - - desc->mat = NULL; - desc->ooc = 1; - - /* Create scheduler structure like registering data */ - RUNTIME_desc_create( desc ); - - status = chameleon_desc_check( desc ); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_Desc_Create_OOC_User", "invalid descriptor"); - CHAMELEON_Desc_Destroy( &desc ); - return status; - } - - *descptr = desc; - return CHAMELEON_SUCCESS; + int rc; + rc = CHAMELEON_Desc_Create_User( descptr, CHAMELEON_MAT_OOC, dtyp, mb, nb, bsiz, + lm, ln, i, j, m, n, p, q, + chameleon_getaddr_null, NULL, get_rankof ); + return rc; #endif } @@ -704,9 +622,9 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz, int lm, int ln, int i, int j, int m, int n, int p, int q) { - return CHAMELEON_Desc_Create_OOC_User( descptr, dtyp, mb, nb, bsiz, - lm, ln, i, j, m, n, p, q, - chameleon_getrankof_2d ); + return CHAMELEON_Desc_Create_User( descptr, CHAMELEON_MAT_OOC, dtyp, mb, nb, bsiz, + lm, ln, i, j, m, n, p, q, + chameleon_getaddr_null, NULL, NULL ); } /** @@ -741,8 +659,7 @@ int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc) return CHAMELEON_ERR_UNALLOCATED; } - RUNTIME_desc_destroy( *desc ); - chameleon_desc_mat_free( *desc ); + chameleon_desc_destroy( *desc ); free(*desc); *desc = NULL; return CHAMELEON_SUCCESS; diff --git a/control/descriptor.h b/control/descriptor.h index dcf688df2eb0e85b564ca3bc2b0899ed4cee2f18..c3548c966ccb254287dc524dab723b12a705ad6d 100644 --- a/control/descriptor.h +++ b/control/descriptor.h @@ -37,6 +37,7 @@ inline static void* chameleon_geteltaddr(const CHAM_desc_t *A, int m, int n, int inline static void* chameleon_getaddr_cm (const CHAM_desc_t *A, int m, int n); inline static void* chameleon_getaddr_ccrb (const CHAM_desc_t *A, int m, int n); inline static void* chameleon_getaddr_null (const CHAM_desc_t *A, int m, int n); +inline static void* chameleon_getaddr_diag (const CHAM_desc_t *A, int m, int n); inline static int chameleon_getblkldd_cm (const CHAM_desc_t *A, int m); inline static int chameleon_getblkldd_ccrb(const CHAM_desc_t *A, int m); @@ -46,21 +47,16 @@ inline static int chameleon_getblkldd_ccrb(const CHAM_desc_t *A, int m); inline static int chameleon_getrankof_2d(const CHAM_desc_t *desc, int m, int n); inline static int chameleon_getrankof_2d_diag(const CHAM_desc_t *desc, int m, int n); -CHAM_desc_t chameleon_desc_init(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q); -CHAM_desc_t chameleon_desc_init_diag(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q); -CHAM_desc_t chameleon_desc_init_user(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, - int m, int n, int p, int q, - void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), - int (*get_blkldd)( const CHAM_desc_t*, int ), - int (*get_rankof)( const CHAM_desc_t*, int, int )); -CHAM_desc_t* chameleon_desc_submatrix(CHAM_desc_t *descA, int i, int j, int m, int n); - -int chameleon_desc_check (const CHAM_desc_t *desc); -int chameleon_desc_mat_alloc(CHAM_desc_t *desc); -int chameleon_desc_mat_free (CHAM_desc_t *desc); +int chameleon_desc_init ( CHAM_desc_t *desc, void *mat, + cham_flttype_t dtyp, int mb, int nb, int bsiz, + int lm, int ln, int i, int j, + int m, int n, int p, int q, + void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), + int (*get_blkldd) ( const CHAM_desc_t*, int ), + int (*get_rankof) ( const CHAM_desc_t*, int, int ) ); +CHAM_desc_t* chameleon_desc_submatrix( CHAM_desc_t *descA, int i, int j, int m, int n ); +void chameleon_desc_destroy ( CHAM_desc_t *desc ); +int chameleon_desc_check ( const CHAM_desc_t *desc ); #define BLKLDD(A, k) A->get_blkldd( A, k ) @@ -116,6 +112,15 @@ inline static void *chameleon_getaddr_cm(const CHAM_desc_t *A, int m, int n) return (void*)((intptr_t)A->mat + (offset*eltsize) ); } +/** + * Internal function to return address of block (m,n) with m,n = block indices + */ +inline static void *chameleon_getaddr_diag( const CHAM_desc_t *A, int m, int n ) +{ + assert( m == n ); + return chameleon_getaddr_ccrb( A, m, 0 ); +} + /** * Internal function to return address of block (m,n) with m,n = block indices * This version lets the runtime allocate on-demand. @@ -170,7 +175,6 @@ inline static int chameleon_getblkldd_cm(const CHAM_desc_t *A, int m) { return A->llm; } - /** * Internal function to return MPI rank of element A(m,n) with m,n = block indices */ @@ -187,7 +191,7 @@ inline static int chameleon_getrankof_2d(const CHAM_desc_t *A, int m, int n) inline static int chameleon_getrankof_2d_diag(const CHAM_desc_t *A, int m, int n) { int mm = m + A->i / A->mb; - assert( n == 0 ); + assert( m == n ); return (mm % A->p) * A->q + (mm % A->q); } diff --git a/control/workspace.c b/control/workspace.c index 8f5814c318d154382b80fb01cee0bc7c812a566f..e743e33dbb8d40461a70bb0288d1cb824d656c21 100644 --- a/control/workspace.c +++ b/control/workspace.c @@ -74,32 +74,7 @@ int chameleon_alloc_ibnb_tile(int M, int N, cham_tasktype_t func, int type, CHAM lm = IB * MT; ln = NB * NT; - /* Allocate and initialize descriptor */ - *desc = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t)); - if (*desc == NULL) { - chameleon_error("chameleon_alloc_ibnb_tile", "malloc() failed"); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - **desc = chameleon_desc_init(type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q); - - /* Allocate matrix */ - if (chameleon_desc_mat_alloc(*desc)) { - chameleon_error("chameleon_alloc_ibnb_tile", "malloc() failed"); - free(*desc); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - - RUNTIME_desc_create( *desc ); - - /* Check that everything is ok */ - status = chameleon_desc_check(*desc); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("chameleon_alloc_ibnb_tile", "invalid descriptor"); - free(*desc); - return status; - } - - return CHAMELEON_SUCCESS; + return CHAMELEON_Desc_Create( desc, NULL, type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q ); } /** @@ -144,18 +119,7 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc /* TODO: Fix the distribution for IPIV */ *IPIV = (int*)malloc( size ); - *desc = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t)); - **desc = chameleon_desc_init(type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q ); - - if ( chameleon_desc_mat_alloc(*desc) ) { - chameleon_error("chameleon_alloc_ipiv", "malloc() failed"); - free(*desc); - return CHAMELEON_ERR_OUT_OF_RESOURCES; - } - - RUNTIME_desc_create( *desc ); - - return CHAMELEON_SUCCESS; + return CHAMELEON_Desc_Create( desc, NULL, type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q ); } /** @@ -193,8 +157,7 @@ int CHAMELEON_Dealloc_Workspace(CHAM_desc_t **desc) chameleon_error("CHAMELEON_Dealloc_Worspace", "attempting to deallocate a NULL pointer"); return CHAMELEON_ERR_UNALLOCATED; } - chameleon_desc_mat_free( *desc ); - RUNTIME_desc_destroy( *desc ); + chameleon_desc_destroy( *desc ); free(*desc); *desc = NULL; diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index 4e7e0eaa202bae01f369954d87f683204552d07e..626a02de0989c56b5eafffef070bf107c13eb690 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -208,6 +208,13 @@ typedef enum chameleon_translation_e { ChamOutOfPlace = 2, } cham_translation_t; +/** + * @brief Constant to describe how to initialize the mat pointer in descriptors + */ +#define CHAMELEON_MAT_ALLOC_GLOBAL NULL +#define CHAMELEON_MAT_ALLOC_TILE ((void*)-1) +#define CHAMELEON_MAT_OOC ((void*)-2) + /** * CHAMELEON constants - success & error codes */