Commit f5514729 authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

flops counting estimation test

parent a969bab6
......@@ -154,9 +154,10 @@ private:
{
if (_last_column_factorized)
return;
_logger.notify_facto_begin();
MORSE_enum trans = Arithmetik<S>::mtrans;
namespace fps = lapacke::flops;
int64_t flops = 0;
/* STEP 1: Loop over each block in last column to
......@@ -171,7 +172,7 @@ private:
MORSE_desc_t *T = _tau[i].get();
int err = chameleon::ormqr<S>(trans, A.get(), T, C.get(), _seq.get());
flops += lapacke::flops::left_ormqr<S>(_nbRHS, _nbRHS, _nbRHS);
flops += fps::left_ormqr<S>(_nbRHS, _nbRHS, _nbRHS);
if (err != 0) {
FABULOUS_THROW(Kernel, "ormqr 'step' err="<<err);
}
......@@ -182,7 +183,7 @@ private:
MorseDesc2<S> A = get_sub_hess(k, k, 2, 1);
int err = chameleon::geqrf<S>(A.get(), tau, _seq.get());
flops += lapacke::flops::geqrf<S>(_nbRHS, _nbRHS);
flops += fps::geqrf<S>(_nbRHS, _nbRHS);
if (err != 0) {
FABULOUS_THROW(Kernel, "geqrf 'last block' err="<<err);
}
......@@ -193,12 +194,17 @@ private:
/* STEP 3: Apply Q^H generated at step 2 to last block of RHS */
MorseDesc2<S> C = get_sub_rhs(k, 0, 2, 1);
err = chameleon::ormqr<S>(trans, A.get(), tau.get(), C.get(), _seq.get());
flops += lapacke::flops::left_ormqr<S>(_nbRHS, _nbRHS, _nbRHS);
flops += fps::left_ormqr<S>(_nbRHS, _nbRHS, _nbRHS);
if (err != 0) {
FABULOUS_THROW(Kernel, "ormqr 'RHS' err="<<err);
}
_last_column_factorized = true;
FABULOUS_DEBUG( "flops estimated 1 (individual kernel estimation)="<<flops);
int64_t flops2 = (fps::geqrf<S>(_nb_vect, _nb_vect)
- fps::geqrf<S>(_nb_vect, _nb_vect-_nbRHS));
FABULOUS_DEBUG( "flops estimated 2 (global estimation)="<<flops2);
_logger.notify_facto_end(flops);
}
......@@ -288,7 +294,8 @@ public:
// compute the solution
chameleon::trsm<S>(R.get(), Lambda_tmp.get(), _seq.get());
int flops = lapacke::flops::left_trsm<S>(_nb_vect, _nbRHS);
namespace fps = lapacke::flops;
int flops = fps::left_trsm<S>(_nb_vect, _nbRHS);
MORSE_Tile_to_Lapack(Lambda_tmp.get(), _Y.get_ptr(), _Y.get_leading_dim());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment