diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 32346a6e8805d671620a8722ba2a0c7852a97815..9a6a4f3dab3778963444ab4002158c270a5bf256 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -33,7 +33,99 @@ #define D(k) D, k, k /** - * Parallel tile LQ factorization - dynamic scheduling + * Parallel tile QR factorization (reduction Householder) - dynamic scheduling + * + * @param[in] genD + * Indicate if copies of the geqrt tiles must be done to speedup + * computations in updates. genD is considered only if D is not NULL. + */ +int chameleon_pzgelqf_step( int genD, int k, int ib, + CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, + RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ) +{ + int m, n; + int tempkm, tempkn, tempmm, tempnn; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + INSERT_TASK_zgelqt( + options, + tempkm, tempkn, ib, T->nb, + A(k, k), + T(k, k)); + + if ( genD ) { + int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; + int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + INSERT_TASK_zlacpy( + options, + ChamUpper, tempDkm, tempDkn, + A(k, k), + D(k) ); +#if defined(CHAMELEON_USE_CUDA) + INSERT_TASK_zlaset( + options, + ChamLower, tempDkm, tempDkn, + 0., 1., + D(k) ); +#endif + } + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + INSERT_TASK_zunmlq( + options, + ChamRight, ChamConjTrans, + tempmm, tempkn, tempkn, ib, T->nb, + D(k), + T(k, k), + A(m, k)); + } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + RUNTIME_data_migrate( sequence, A(k, k), + A->get_rankof( A, k, n ) ); + + /* TS kernel */ + INSERT_TASK_ztplqt( + options, + tempkm, tempnn, 0, ib, T->nb, + A(k, k), + A(k, n), + T(k, n)); + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + + RUNTIME_data_migrate( sequence, A(m, k), + A->get_rankof( A, m, n ) ); + + INSERT_TASK_ztpmlqt( + options, + ChamRight, ChamConjTrans, + tempmm, tempnn, A->mb, 0, ib, T->nb, + A(k, n), + T(k, n), + A(m, k), + A(m, n)); + } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); + } + + return 1; +} + +/** + * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling + * + * @param[in] genD + * Indicate if copies of the gelqt tiles must be done to speedup + * computations in updates. genD is considered only if D is not NULL. + * */ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) @@ -43,9 +135,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D size_t ws_worker = 0; size_t ws_host = 0; - int k, m, n; - int tempkm, tempkn, tempmm, tempnn; - int ib, minMNT; + int k, m, ib, minMNT; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) { @@ -61,7 +151,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D minMNT = A->mt; } - if ( D == NULL ) { + if ( (genD == 0) || (D == NULL) ) { D = A; genD = 0; } @@ -92,73 +182,8 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D for (k = 0; k < minMNT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - INSERT_TASK_zgelqt( - &options, - tempkm, tempkn, ib, T->nb, - A(k, k), - T(k, k)); - if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; - INSERT_TASK_zlacpy( - &options, - ChamUpper, tempDkm, tempDkn, - A(k, k), - D(k) ); -#if defined(CHAMELEON_USE_CUDA) - INSERT_TASK_zlaset( - &options, - ChamLower, tempDkm, tempDkn, - 0., 1., - D(k) ); -#endif - } - for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_zunmlq( - &options, - ChamRight, ChamConjTrans, - tempmm, tempkn, tempkn, ib, T->nb, - D(k), - T(k, k), - A(m, k)); - } - RUNTIME_data_flush( sequence, D(k) ); - RUNTIME_data_flush( sequence, T(k, k) ); - - for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - - RUNTIME_data_migrate( sequence, A(k, k), - A->get_rankof( A, k, n ) ); - - /* TS kernel */ - INSERT_TASK_ztplqt( - &options, - tempkm, tempnn, 0, ib, T->nb, - A(k, k), - A(k, n), - T(k, n)); - for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - - RUNTIME_data_migrate( sequence, A(m, k), - A->get_rankof( A, m, n ) ); - - INSERT_TASK_ztpmlqt( - &options, - ChamRight, ChamConjTrans, - tempmm, tempnn, A->mb, 0, ib, T->nb, - A(k, n), - T(k, n), - A(m, k), - A(m, n)); - } - RUNTIME_data_flush( sequence, A(k, n) ); - RUNTIME_data_flush( sequence, T(k, n) ); - } + chameleon_pzgelqf_step( genD, k, ib, + A, T, D, &options, sequence ); /* Restore the original location of the tiles */ for (m = k; m < A->mt; m++) { diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c index b69b84b50d7c2a0ce4359e83e9bdb475a8bf2cc1..7bc06b159f2cfaf7c8a684b0f4bdaa6dbea0b9f6 100644 --- a/compute/pzgelqf_param.c +++ b/compute/pzgelqf_param.c @@ -28,21 +28,162 @@ /** * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling + * + * @param[in] genD + * Indicate if copies of the gelqt tiles must be done to speedup + * computations in updates. genD is considered only if D is not NULL. + * + * @param[in] uplo + * - ChamUpper: Classic LQ factorization of the matrix A. + * - ChamLower: LQ factorization of the TTLQT kernel. + * - ChamUpperLower: LQ factorization of the TSLQT kernel. + */ +int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, + const libhqr_tree_t *qrtree, int *tiles, + CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, + RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ) +{ + CHAM_desc_t *T; + int m, n, i, p; + int L, nbgelqt; + int tempkmin, tempkm, tempnn, tempmm, temppn; + int node, nbtiles; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + + /* The number of geqrt to apply */ + nbgelqt = qrtree->getnbgeqrf( qrtree, k ); + + T = TS; + for (i = 0; i < nbgelqt; i++) { + p = qrtree->getm( qrtree, k, i ); + + /* We skip the LQ factorization if this is the last diagonal tile */ + if ( (uplo == ChamLower) && (p == k) ) { + continue; + } + + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + + INSERT_TASK_zgelqt( + options, + tempkm, temppn, ib, T->nb, + A(k, p), T(k, p)); + + if ( genD ) { + int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; + int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + + INSERT_TASK_zlacpy( + options, + ChamUpper, tempDkm, tempDpn, + A(k, p), D(k, p) ); +#if defined(CHAMELEON_USE_CUDA) + INSERT_TASK_zlaset( + options, + ChamLower, tempDkm, tempDpn, + 0., 1., + D(k, p) ); +#endif + } + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + INSERT_TASK_zunmlq( + options, + ChamRight, ChamConjTrans, + tempmm, temppn, tempkmin, ib, T->nb, + D(k, p), + T(k, p), + A(m, p)); + } + + if ( genD || ((k+1) < A->mt)) { + RUNTIME_data_flush( sequence, D(k, p) ); + } + RUNTIME_data_flush( sequence, T(k, p) ); + } + + /* Setting the order of the tiles */ + nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); + + for (i = 0; i < nbtiles; i++) { + n = tiles[i]; + p = qrtree->currpiv( qrtree, k, n ); + + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + if ( qrtree->gettype( qrtree, k, n ) == LIBHQR_KILLED_BY_TS ) { + /* TS kernel */ + T = TS; + L = 0; + + /* Force TT kernel if this is the last diagonal tile */ + if ( (uplo == ChamLower) && (n == k) ) { + L = tempnn; + } + } + else { + /* TT kernel */ + T = TT; + L = tempnn; + } + + node = A->get_rankof( A, k, n ); + RUNTIME_data_migrate( sequence, A(k, p), node ); + RUNTIME_data_migrate( sequence, A(k, n), node ); + + INSERT_TASK_ztplqt( + options, + tempkm, tempnn, chameleon_min(L, tempkm), ib, T->nb, + A(k, p), + A(k, n), + T(k, n)); + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + + node = A->get_rankof( A, m, n ); + RUNTIME_data_migrate( sequence, A(m, p), node ); + RUNTIME_data_migrate( sequence, A(m, n), node ); + + INSERT_TASK_ztpmlqt( + options, + ChamRight, ChamConjTrans, + tempmm, tempnn, tempkm, L, ib, T->nb, + A(k, n), + T(k, n), + A(m, p), + A(m, n)); + } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); + } + + return tiles[nbtiles]; +} + +/** + * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling + * + * @param[in] genD + * Indicate if copies of the gelqt tiles must be done to speedup + * computations in updates. genD is considered only if D is not NULL. + * */ -void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t *A, +void chameleon_pzgelqf_param( int genD, int K, + const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; - CHAM_desc_t *T; size_t ws_worker = 0; size_t ws_host = 0; - int k, m, n, i, p; - int K, L, nbgeqrt; - int tempkmin, tempkm, tempnn, tempmm, temppn; - int ib, node, nbtiles, *tiles; + int k, m; + int ib, *tiles; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) { @@ -52,7 +193,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ib = CHAMELEON_IB; - if ( D == NULL ) { + if ( (genD == 0) || (D == NULL) ) { D = A; genD = 0; } @@ -82,119 +223,11 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t /* Initialisation of temporary tiles array */ tiles = (int*)calloc(qrtree->mt, sizeof(int)); - K = chameleon_min(A->mt, A->nt); - for (k = 0; k < K; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - - /* The number of geqrt to apply */ - nbgeqrt = qrtree->getnbgeqrf(qrtree, k); - - T = TS; - for (i = 0; i < nbgeqrt; i++) { - p = qrtree->getm(qrtree, k, i); - - /* /\* We skip the LQ factorization if this is the last diagonal tile *\/ */ - /* if ( (uplo == ChamLower) && (p == k) ) { */ - /* continue; */ - /* } */ - - temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; - tempkmin = chameleon_min(tempkm, temppn); - - INSERT_TASK_zgelqt( - &options, - tempkm, temppn, ib, T->nb, - A(k, p), T(k, p)); - - if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; - - INSERT_TASK_zlacpy( - &options, - ChamUpper, tempDkm, tempDpn, - A(k, p), D(k, p) ); -#if defined(CHAMELEON_USE_CUDA) - INSERT_TASK_zlaset( - &options, - ChamLower, tempDkm, tempDpn, - 0., 1., - D(k, p) ); -#endif - } - - for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_zunmlq( - &options, - ChamRight, ChamConjTrans, - tempmm, temppn, tempkmin, ib, T->nb, - D(k, p), - T(k, p), - A(m, p)); - } - RUNTIME_data_flush( sequence, D(k, p) ); - RUNTIME_data_flush( sequence, T(k, p) ); - } - - /* Setting the order of the tiles */ - nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); - - for (i = 0; i < nbtiles; i++) { - n = tiles[i]; - p = qrtree->currpiv( qrtree, k, n ); - - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - - if ( qrtree->gettype( qrtree, k, n ) == LIBHQR_KILLED_BY_TS ) { - /* TS kernel */ - T = TS; - L = 0; - - /* /\* Force TT kernel if this is the last diagonal tile *\/ */ - /* if ( (uplo == ChamLower) && (n == k) ) { */ - /* L = tempnn; */ - /* } */ - } - else { - /* TT kernel */ - T = TT; - L = tempnn; - } - - node = A->get_rankof( A, k, n ); - RUNTIME_data_migrate( sequence, A(k, p), node ); - RUNTIME_data_migrate( sequence, A(k, n), node ); - - INSERT_TASK_ztplqt( - &options, - tempkm, tempnn, chameleon_min(L, tempkm), ib, T->nb, - A(k, p), - A(k, n), - T(k, n)); - - for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - - node = A->get_rankof( A, m, n ); - RUNTIME_data_migrate( sequence, A(m, p), node ); - RUNTIME_data_migrate( sequence, A(m, n), node ); - - INSERT_TASK_ztpmlqt( - &options, - ChamRight, ChamConjTrans, - tempmm, tempnn, tempkm, L, ib, T->nb, - A(k, n), - T(k, n), - A(m, p), - A(m, n)); - } - RUNTIME_data_flush( sequence, A(k, n) ); - RUNTIME_data_flush( sequence, T(k, n) ); - } + chameleon_pzgelqf_param_step( genD, ChamUpper, k, ib, qrtree, tiles, + A, TS, TT, D, &options, sequence ); /* Restore the original location of the tiles */ for (m = k; m < A->mt; m++) { diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index a7637392de7071440c77ae8e17d30a41ea879605..49c270cf0fa78cb7afa4b4a77275aebcab7ba75d 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -32,7 +32,101 @@ #define D(k) D, k, k /** - * Parallel tile QR factorization - dynamic scheduling + * Parallel tile QR factorization (reduction Householder) - dynamic scheduling + * + * @param[in] genD + * Indicate if copies of the geqrt tiles must be done to speedup + * computations in updates. genD is considered only if D is not NULL. + */ +int chameleon_pzgeqrf_step( int genD, int k, int ib, + CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, + RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ) +{ + int m, n; + int tempkm, tempkn, tempnn, tempmm; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + INSERT_TASK_zgeqrt( + options, + tempkm, tempkn, ib, T->nb, + A(k, k), + T(k, k)); + + if ( genD ) { + int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; + int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + INSERT_TASK_zlacpy( + options, + ChamLower, tempDkm, tempDkn, + A(k, k), + D(k) ); +#if defined(CHAMELEON_USE_CUDA) + INSERT_TASK_zlaset( + options, + ChamUpper, tempDkm, tempDkn, + 0., 1., + D(k) ); +#endif + } + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + INSERT_TASK_zunmqr( + options, + ChamLeft, ChamConjTrans, + tempkm, tempnn, tempkm, ib, T->nb, + D(k), + T(k, k), + A(k, n)); + } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + + RUNTIME_data_migrate( sequence, A(k, k), + A->get_rankof( A, m, k ) ); + + /* TS kernel */ + INSERT_TASK_ztpqrt( + options, + tempmm, tempkn, 0, ib, T->nb, + A(k, k), + A(m, k), + T(m, k)); + + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + RUNTIME_data_migrate( sequence, A(k, n), + A->get_rankof( A, m, n ) ); + + /* TS kernel */ + INSERT_TASK_ztpmqrt( + options, + ChamLeft, ChamConjTrans, + tempmm, tempnn, A->nb, 0, ib, T->nb, + A(m, k), + T(m, k), + A(k, n), + A(m, n)); + } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); + } + + return 1; +} + +/** + * Parallel tile QR factorization (reduction Householder) - dynamic scheduling + * + * @param[in] genD + * Indicate if copies of the geqrt tiles must be done to speedup + * computations in updates. genD is considered only if D is not NULL. + * */ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) @@ -42,10 +136,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D size_t ws_worker = 0; size_t ws_host = 0; - int k, m, n; - int tempkm, tempkn, tempnn, tempmm; - int ib; - int minMNT = chameleon_min(A->mt, A->nt); + int k, n, ib, minMNT; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) { @@ -55,7 +146,13 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D ib = CHAMELEON_IB; - if ( D == NULL ) { + if (A->m > A->n) { + minMNT = A->nt; + } else { + minMNT = A->mt; + } + + if ( (genD == 0) || (D == NULL) ) { D = A; genD = 0; } @@ -86,75 +183,8 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D for (k = 0; k < minMNT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - INSERT_TASK_zgeqrt( - &options, - tempkm, tempkn, ib, T->nb, - A(k, k), - T(k, k)); - if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; - INSERT_TASK_zlacpy( - &options, - ChamLower, tempDkm, tempDkn, - A(k, k), - D(k) ); -#if defined(CHAMELEON_USE_CUDA) - INSERT_TASK_zlaset( - &options, - ChamUpper, tempDkm, tempDkn, - 0., 1., - D(k) ); -#endif - } - for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zunmqr( - &options, - ChamLeft, ChamConjTrans, - tempkm, tempnn, tempkm, ib, T->nb, - D(k), - T(k, k), - A(k, n)); - } - RUNTIME_data_flush( sequence, D(k) ); - RUNTIME_data_flush( sequence, T(k, k) ); - - for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - - RUNTIME_data_migrate( sequence, A(k, k), - A->get_rankof( A, m, k ) ); - - /* TS kernel */ - INSERT_TASK_ztpqrt( - &options, - tempmm, tempkn, 0, ib, T->nb, - A(k, k), - A(m, k), - T(m, k)); - - for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - - RUNTIME_data_migrate( sequence, A(k, n), - A->get_rankof( A, m, n ) ); - - /* TS kernel */ - INSERT_TASK_ztpmqrt( - &options, - ChamLeft, ChamConjTrans, - tempmm, tempnn, A->nb, 0, ib, T->nb, - A(m, k), - T(m, k), - A(k, n), - A(m, n)); - } - RUNTIME_data_flush( sequence, A(m, k) ); - RUNTIME_data_flush( sequence, T(m, k) ); - } + chameleon_pzgeqrf_step( genD, k, ib, + A, T, D, &options, sequence ); /* Restore the original location of the tiles */ for (n = k; n < A->nt; n++) { diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c index a4e08e061087e47896d9586013b44878c0283c9c..d2bd084d2f898bafeb3d3c53d6d5dc76d824c497 100644 --- a/compute/pzgeqrf_param.c +++ b/compute/pzgeqrf_param.c @@ -164,7 +164,6 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, return tiles[nbtiles]; } - /** * Parallel tile QR factorization (reduction Householder) - dynamic scheduling * diff --git a/control/compute_z.h b/control/compute_z.h index d4acf6976e8eb512ff15f9095821292ecca91b27..295c64264eed47ed5a67ead3d2ff16d423175105 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -133,6 +133,16 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, CHAM_des void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzbuild( cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +int chameleon_pzgelqf_step( int genD, int k, int ib, + CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, + RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ); +int chameleon_pzgeqrf_step( int genD, int k, int ib, + CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, + RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ); +int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, + const libhqr_tree_t *qrtree, int *tiles, + CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, + RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ); int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, const libhqr_tree_t *qrtree, int *tiles, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, @@ -142,7 +152,8 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ); -void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, +void chameleon_pzgelqf_param( int genD, int K, const libhqr_tree_t *qrtree, + CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgeqrf_param( int genD, int K, const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,