diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 1dbea93bce01b21df0e941501138fafdf8593cb7..09dbfc41eae282599a3ec0de4dc0279fe9522ab4 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -127,6 +127,9 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, T(k, k), T->mb, A(m, k), ldam); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + for (n = k+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; @@ -156,6 +159,8 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, A(m, k), ldam, A(m, n), ldam); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } /* Restore the original location of the tiles */ diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c index ef2a600802ac27c2956017e405af7a2ce2baa4f9..c6c5ceeaa3e7a422700bfa9b4f0fc488df9990c6 100644 --- a/compute/pzgelqf_param.c +++ b/compute/pzgelqf_param.c @@ -135,6 +135,8 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, T(k, p), T->mb, A(m, p), ldam); } + RUNTIME_data_flush( sequence, D(k, p) ); + RUNTIME_data_flush( sequence, T(k, p) ); } /* Setting the order of the tiles */ @@ -188,6 +190,8 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, A(m, p), ldam, A(m, n), ldam); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } /* Restore the original location of the tiles */ diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index 82c63ae144166c77b852674684f56c040689dc78..e8f7638fbecea65ab14a3904e2d0c0d6aa568e4c 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -127,6 +127,8 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, T(k, N), T->mb, A(m, N), ldam); } + RUNTIME_data_flush( sequence, D(k, N) ); + RUNTIME_data_flush( sequence, T(k, N) ); for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; @@ -158,6 +160,8 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, A(m, N), ldam, A(m, n), ldam); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } } for (RD = BS; RD < A->nt-k; RD *= 2) { @@ -195,6 +199,8 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, A (m, N ), ldam, A (m, N+RD), ldam); } + RUNTIME_data_flush( sequence, A (k, N+RD) ); + RUNTIME_data_flush( sequence, T2(k, N+RD) ); } } diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index cb06ffea49445db55210c83c04e9c2615a5d1704..38574eb128a302126c87994cc4eca8ca1c79f4e8 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -121,6 +121,8 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, T(k, k), T->mb, A(k, n), ldak); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; @@ -153,6 +155,8 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, A(k, n), ldak, A(m, n), ldam); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } /* Restore the original location of the tiles */ diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c index 897c935c56f5036cc0cff2e63010c2834eaabf6c..efc866dabf22abfeb57c8aaad6a147b2749b877d 100644 --- a/compute/pzgeqrf_param.c +++ b/compute/pzgeqrf_param.c @@ -132,6 +132,8 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, T(m, k), T->mb, A(m, n), ldam); } + RUNTIME_data_flush( sequence, D(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } /* Setting the order of the tiles */ @@ -185,6 +187,8 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, A(p, n), ldap, A(m, n), ldam); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } /* Restore the original location of the tiles */ diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index 8263a77917d209e984d7e928ea9c0ac863951aa1..2235fceb6de0d0bef056e22c33a1db77808f46b0 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -125,6 +125,8 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, T(M, k), T->mb, A(M, n), ldaM); } + RUNTIME_data_flush( sequence, D(M, k) ); + RUNTIME_data_flush( sequence, T(M, k) ); for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; @@ -156,6 +158,8 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, A(M, n), ldaM, A(m, n), ldam); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } } for (RD = BS; RD < A->mt-k; RD *= 2) { @@ -194,6 +198,8 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, A (M, n), ldaM, A (M+RD, n), ldaMRD); } + RUNTIME_data_flush( sequence, A (M+RD, k) ); + RUNTIME_data_flush( sequence, T2(M+RD, k) ); } } diff --git a/compute/pzunglq.c b/compute/pzunglq.c index f6f83a0b56ace9a31dcb87ed927951b3bf09254f..b34e519a7cfe42a023693b0f3e8ae76cb1a6a1ae 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -116,6 +116,8 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc Q(m, k), ldqm, Q(m, n), ldqm); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -146,6 +148,8 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc T(k, k), T->mb, Q(m, k), ldqm); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); RUNTIME_iteration_pop(morse); } diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c index 9ca050a4db78c7682e30b832a3d07bac0cd204c2..47a83950101f8b0756b20d3e1fecd35c96e8da46 100644 --- a/compute/pzunglq_param.c +++ b/compute/pzunglq_param.c @@ -127,6 +127,8 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des Q(m, p), ldqm, Q(m, n), ldqm); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } T = TS; @@ -165,7 +167,10 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des T(k, p), T->mb, Q(m, p), ldqm); } + RUNTIME_data_flush( sequence, D(k, p) ); + RUNTIME_data_flush( sequence, T(k, p) ); } + RUNTIME_iteration_pop(morse); } diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index f40c63638be5ab70244a0a6f6daff3562624edf9..adb58487874ff71f17e467024c7ee44d04aa3238 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -113,6 +113,9 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, Q (m, N ), ldqm, Q (m, N+RD), ldqm); } + + RUNTIME_data_flush( sequence, A (k, N+RD) ); + RUNTIME_data_flush( sequence, T2(k, N+RD) ); } } for (N = k; N < A->nt; N += BS) { @@ -140,6 +143,9 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, Q(m, N), ldqm, Q(m, n), ldqm); } + + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -171,6 +177,8 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, T(k, N), T->mb, Q(m, N), ldqm); } + RUNTIME_data_flush( sequence, D(k, N) ); + RUNTIME_data_flush( sequence, T(k, N) ); } RUNTIME_iteration_pop(morse); } diff --git a/compute/pzungqr.c b/compute/pzungqr.c index 2bd2b302f46586a33f07a16b768b6264091474c5..1c85fc7a63a8d3bec7eb71622bb252b74f5d9fd1 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -118,6 +118,8 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc Q(k, n), ldqk, Q(m, n), ldqm); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } #if defined(CHAMELEON_COPY_DIAG) @@ -149,6 +151,9 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc T(k, k), T->mb, Q(k, n), ldqk); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + RUNTIME_iteration_pop(morse); } diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c index fe7777d095cf4deb3e8b7df28abe77f4f251f6af..35107c789d81e8ce527a408a26d7eccb8ced70d3 100644 --- a/compute/pzungqr_param.c +++ b/compute/pzungqr_param.c @@ -133,6 +133,8 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, Q(p, n), ldqp, Q(m, n), ldqm); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } T = TS; @@ -174,7 +176,10 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, T(m, k), T->mb, Q(m, n), ldqm); } + RUNTIME_data_flush( sequence, D(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } + RUNTIME_iteration_pop(morse); } diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index 9b3422cd4afae779360c8a5d5ca71682bd32e6d1..4b16f52284bd6f625df8b634ba6f329a888d5d14 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -116,6 +116,9 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, Q (M, n), ldqM, Q (M+RD, n), ldqMRD); } + + RUNTIME_data_flush( sequence, A (M+RD, k) ); + RUNTIME_data_flush( sequence, T2(M+RD, k) ); } } for (M = k; M < A->mt; M += BS) { @@ -146,6 +149,8 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, Q(M, n), ldqM, Q(m, n), ldqm); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } #if defined(CHAMELEON_COPY_DIAG) @@ -178,6 +183,8 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, T(M, k), T->mb, Q(M, n), ldqM); } + RUNTIME_data_flush( sequence, D(M, k) ); + RUNTIME_data_flush( sequence, T(M, k) ); } RUNTIME_iteration_pop(morse); } diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index df686642bc6831ce37ae65a92f0be9e5befd0bea..9e0ddf3839110cdd569f2c0613e2c6472a14f8fe 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -127,6 +127,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(k, n), ldbk); } + + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + for (m = k+1; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); @@ -146,6 +150,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, B(k, n), ldbk, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, A(k, m) ); + RUNTIME_data_flush( sequence, T(k, m) ); } /* Restore the original location of the tiles */ @@ -187,6 +194,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, B(k, n), ldbk, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, A(k, m) ); + RUNTIME_data_flush( sequence, T(k, m) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -216,6 +226,8 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(k, n), ldbk); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); RUNTIME_iteration_pop(morse); } } @@ -250,6 +262,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, B(m, k), ldbm, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -281,6 +296,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, B(m, k), ldbm); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + RUNTIME_iteration_pop(morse); } } @@ -319,6 +337,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(m, k), ldbm); } + + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + for (n = k+1; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; for (m = 0; m < B->mt; m++) { @@ -338,6 +360,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, B(m, k), ldbm, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } /* Restore the original location of the tiles */ diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index c37ea1b59b484c14a23a2b63533e72d6cc30f709..6bfbdb1e7174b033749065d3f7e90dec52ad45ee 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -127,6 +127,9 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, T(k, p), T->mb, B(p, n), ldbp); } + + RUNTIME_data_flush( sequence, D(k, p) ); + RUNTIME_data_flush( sequence, T(k, p) ); } /* Setting the order of the tiles*/ @@ -167,6 +170,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, B(p, n), ldbp, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, m) ); + RUNTIME_data_flush( sequence, T(k, m) ); } /* Restore the original location of the tiles */ @@ -226,6 +231,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, B(p, n), ldbp, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, m) ); + RUNTIME_data_flush( sequence, T(k, m) ); } T = TS; @@ -264,7 +271,11 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, T(k, p), T->mb, B(p, n), ldbp); } + + RUNTIME_data_flush( sequence, D(k, p) ); + RUNTIME_data_flush( sequence, T(k, p) ); } + RUNTIME_iteration_pop(morse); } } @@ -318,6 +329,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, B(m, p), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } T = TS; @@ -356,7 +369,11 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, T(k, p), T->mb, B(m, p), ldbm); } + + RUNTIME_data_flush( sequence, D(k, p) ); + RUNTIME_data_flush( sequence, T(k, p) ); } + RUNTIME_iteration_pop(morse); } } @@ -403,7 +420,11 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, T(k, p), TS->mb, B(m, p), ldbm); } + + RUNTIME_data_flush( sequence, D(k, p) ); + RUNTIME_data_flush( sequence, T(k, p) ); } + /* Setting the order of tiles */ libhqr_walk_stepk(qrtree, k, tiles + (k+1)); @@ -443,6 +464,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, B(m, p), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } RUNTIME_iteration_pop(morse); diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index cceda501cb3b64716548cc4722de92d87b2eef69..539b991698e6d2803200522b874fa7e1d3b8b558 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -123,6 +123,9 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, N), T->mb, B(N, n), ldbN); } + RUNTIME_data_flush( sequence, D(k, N) ); + RUNTIME_data_flush( sequence, T(k, N) ); + for (m = N+1; m < chameleon_min(N+BS, A->nt); m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); @@ -143,6 +146,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B(N, n), ldbN, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, m) ); + RUNTIME_data_flush( sequence, T(k, m) ); } } for (RD = BS; RD < A->nt-k; RD *= 2) { @@ -168,6 +173,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B (N, n), ldbN, B (N+RD, n), ldbNRD); } + RUNTIME_data_flush( sequence, A (k, N+RD) ); + RUNTIME_data_flush( sequence, T2(k, N+RD) ); } } @@ -214,6 +221,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B (N, n), ldbN, B (N+RD, n), ldbNRD); } + RUNTIME_data_flush( sequence, A (k, N+RD) ); + RUNTIME_data_flush( sequence, T2(k, N+RD) ); } } for (N = k; N < A->nt; N += BS) { @@ -241,6 +250,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B(N, n), ldbN, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, m) ); + RUNTIME_data_flush( sequence, T(k, m) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -271,6 +282,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, N), T->mb, B(N, n), ldbN); } + RUNTIME_data_flush( sequence, D(k, N) ); + RUNTIME_data_flush( sequence, T(k, N) ); } RUNTIME_iteration_pop(morse); } @@ -311,6 +324,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B (m, N ), ldbm, B (m, N+RD), ldbm); } + RUNTIME_data_flush( sequence, A (k, N+RD) ); + RUNTIME_data_flush( sequence, T2(k, N+RD) ); } } for (N = k; N < A->nt; N += BS) { @@ -337,6 +352,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B(m, N), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -368,6 +385,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, N), T->mb, B(m, N), ldbm); } + RUNTIME_data_flush( sequence, D(k, N) ); + RUNTIME_data_flush( sequence, T(k, N) ); } RUNTIME_iteration_pop(morse); @@ -410,6 +429,9 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, T(k, N), T->mb, B(m, N), ldbm); } + RUNTIME_data_flush( sequence, D(k, N) ); + RUNTIME_data_flush( sequence, T(k, N) ); + for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; for (m = 0; m < B->mt; m++) { @@ -431,6 +453,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B(m, N), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(k, n) ); + RUNTIME_data_flush( sequence, T(k, n) ); } } for (RD = BS; RD < A->nt-k; RD *= 2) { @@ -455,6 +479,8 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B (m, N ), ldbm, B (m, N+RD), ldbm); } + RUNTIME_data_flush( sequence, A (k, N+RD) ); + RUNTIME_data_flush( sequence, T2(k, N+RD) ); } } diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index 3a0f3c18d97e41252869371e1555a1a197cd5308..c1897dcc1db6d8ac82d911afc274b1082f310074 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -127,6 +127,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(k, n), ldbk); } + + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + for (m = k+1; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldam = BLKLDD(A, m); @@ -147,6 +151,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, B(k, n), ldbk, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } /* Restore the original location of the tiles */ @@ -189,7 +196,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, B(k, n), ldbk, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } + #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, @@ -218,6 +228,8 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(k, n), ldbk); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); RUNTIME_iteration_pop(morse); } } @@ -254,6 +266,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, B(m, k), ldbm, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, A(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -285,6 +300,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, B(m, k), ldbm); } + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + RUNTIME_iteration_pop(morse); } } @@ -323,6 +341,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(m, k), ldbm); } + + RUNTIME_data_flush( sequence, D(k) ); + RUNTIME_data_flush( sequence, T(k, k) ); + for (n = k+1; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); @@ -343,6 +365,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, B(m, k), ldbm, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, A(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } /* Restore the original location of the tiles */ diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index 0ad3b1f87eee68a324af18d45bc5138fb3644144..55c2b74104127457e7977e11da0a14fcceeaa7a1 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -127,6 +127,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, T(m, k), T->mb, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, D(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } /* Setting the order of the tiles*/ @@ -168,7 +170,9 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, B(p, n), ldbp, B(m, n), ldbm); } - } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); + } /* Restore the original location of the tiles */ for (n = 0; n < B->nt; n++) { @@ -227,6 +231,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, B(p, n), ldbp, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } T = TS; @@ -266,7 +272,11 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, T(m, k), T->mb, B(m, n), ldbm); } + + RUNTIME_data_flush( sequence, D(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } + RUNTIME_iteration_pop(morse); } } @@ -320,6 +330,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, B(m, p), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } T = TS; @@ -359,6 +371,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, T(n, k), T->mb, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, D(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } RUNTIME_iteration_pop(morse); } @@ -405,7 +419,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, T(n, k), T->mb, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, D(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } + /* Setting the order of tiles */ libhqr_walk_stepk(qrtree, k, tiles + (k+1)); @@ -446,6 +463,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, B(m, p), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } RUNTIME_iteration_pop(morse); diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 46edc8be91f4ae24975b9bd2d8ec428938d3009f..c4d6e3e34367dad7d10634662935ac042d32e280 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -123,6 +123,9 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, T(M, k), T->mb, B(M, n), ldbM); } + RUNTIME_data_flush( sequence, D(M, k) ); + RUNTIME_data_flush( sequence, T(M, k) ); + for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldbm = BLKLDD(B, m); @@ -144,6 +147,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B(M, n), ldbM, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } } for (RD = BS; RD < A->mt-k; RD *= 2) { @@ -169,6 +174,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B (M, n), ldbM, B (M+RD, n), ldbMRD); } + RUNTIME_data_flush( sequence, A (M+RD, k) ); + RUNTIME_data_flush( sequence, T2(M+RD, k) ); } } @@ -214,6 +221,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B (M, n), ldbM, B (M+RD, n), ldbMRD); } + RUNTIME_data_flush( sequence, A (M+RD, k) ); + RUNTIME_data_flush( sequence, T2(M+RD, k) ); } } for (M = k; M < A->mt; M += BS) { @@ -242,6 +251,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B(M, n), ldbM, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(m, k) ); + RUNTIME_data_flush( sequence, T(m, k) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -270,6 +281,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, T(M, k), T->mb, B(M, n), ldbM); } + RUNTIME_data_flush( sequence, D(M, k) ); + RUNTIME_data_flush( sequence, T(M, k) ); } RUNTIME_iteration_pop(morse); } @@ -309,6 +322,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B (m, M), ldbm, B (m, M+RD), ldbm); } + RUNTIME_data_flush( sequence, A (M+RD, k) ); + RUNTIME_data_flush( sequence, T2(M+RD, k) ); } } for (M = k; M < A->mt; M += BS) { @@ -337,6 +352,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B(m, M), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -367,6 +384,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, T(M, k), T->mb, B(m, M), ldbm); } + RUNTIME_data_flush( sequence, D(M, k) ); + RUNTIME_data_flush( sequence, T(M, k) ); } RUNTIME_iteration_pop(morse); @@ -408,6 +427,9 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, T(M, k), T->mb, B(m, M), ldbm); } + RUNTIME_data_flush( sequence, D(M, k) ); + RUNTIME_data_flush( sequence, T(M, k) ); + for (n = M+1; n < chameleon_min(M+BS, A->mt); n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); @@ -429,6 +451,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B(m, M), ldbm, B(m, n), ldbm); } + RUNTIME_data_flush( sequence, A(n, k) ); + RUNTIME_data_flush( sequence, T(n, k) ); } } for (RD = BS; RD < A->mt-k; RD *= 2) { @@ -453,6 +477,8 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, B (m, M ), ldbm, B (m, M+RD), ldbm); } + RUNTIME_data_flush( sequence, A (M+RD, k) ); + RUNTIME_data_flush( sequence, T2(M+RD, k) ); } }