Commit 7eaa05a3 authored by Matthias Messner's avatar Matthias Messner

some optimizations in pACA and logging of timings in M2L kernel

parent 94ec7f7f
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
class FTreeCoordinate; class FTreeCoordinate;
/// for verbosity only!!! // for verbosity only!!!
//#define COUNT_BLOCKED_INTERACTIONS //#define COUNT_BLOCKED_INTERACTIONS
// if timings should be logged
//#define LOG_TIMINGS
/** /**
* @author Matthias Messner(matthias.messner@inria.fr) * @author Matthias Messner(matthias.messner@inria.fr)
...@@ -79,6 +81,10 @@ class FChebSymKernel ...@@ -79,6 +81,10 @@ class FChebSymKernel
} }
#ifdef LOG_TIMINGS
FTic time;
FReal t_m2l_1, t_m2l_2, t_m2l_3;
#endif
public: public:
/** /**
...@@ -95,6 +101,12 @@ public: ...@@ -95,6 +101,12 @@ public:
Loc(NULL), Mul(NULL), countExp(NULL) Loc(NULL), Mul(NULL), countExp(NULL)
{ {
this->allocateMemoryForPermutedExpansions(); this->allocateMemoryForPermutedExpansions();
#ifdef LOG_TIMINGS
t_m2l_1 = FReal(0.);
t_m2l_2 = FReal(0.);
t_m2l_3 = FReal(0.);
#endif
} }
...@@ -120,6 +132,13 @@ public: ...@@ -120,6 +132,13 @@ public:
if (Loc!=NULL) delete [] Loc; if (Loc!=NULL) delete [] Loc;
if (Mul!=NULL) delete [] Mul; if (Mul!=NULL) delete [] Mul;
if (countExp!=NULL) delete [] countExp; if (countExp!=NULL) delete [] countExp;
#ifdef LOG_TIMINGS
std::cout << "- Permutation took " << t_m2l_1 << "s"
<< "\n- GEMMT and GEMM took " << t_m2l_2 << "s"
<< "\n- Unpermutation took " << t_m2l_3 << "s"
<< std::endl;
#endif
} }
...@@ -161,6 +180,10 @@ public: ...@@ -161,6 +180,10 @@ public:
const int NumSourceCells, const int NumSourceCells,
const int TreeLevel) const int TreeLevel)
{ {
#ifdef LOG_TIMINGS
time.tic();
#endif
// permute and copy multipole expansion // permute and copy multipole expansion
memset(countExp, 0, sizeof(int) * 343); memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) { for (unsigned int idx=0; idx<343; ++idx) {
...@@ -190,6 +213,10 @@ public: ...@@ -190,6 +213,10 @@ public:
} }
} }
#ifdef LOG_TIMINGS
t_m2l_1 += time.tacAndElapsed();
#endif
#ifdef COUNT_BLOCKED_INTERACTIONS //////////////////////////////////// #ifdef COUNT_BLOCKED_INTERACTIONS ////////////////////////////////////
unsigned int count_lidx = 0; unsigned int count_lidx = 0;
...@@ -207,6 +234,10 @@ public: ...@@ -207,6 +234,10 @@ public:
#endif /////////////////////////////////////////////////////////////// #endif ///////////////////////////////////////////////////////////////
#ifdef LOG_TIMINGS
time.tic();
#endif
// multiply (mat-mat-mul) // multiply (mat-mat-mul)
FReal Compressed [nnodes * 24]; FReal Compressed [nnodes * 24];
const FReal scale = AbstractBaseClass::MatrixKernel->getScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel); const FReal scale = AbstractBaseClass::MatrixKernel->getScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel);
...@@ -225,6 +256,14 @@ public: ...@@ -225,6 +256,14 @@ public:
} }
} }
#ifdef LOG_TIMINGS
t_m2l_2 += time.tacAndElapsed();
#endif
#ifdef LOG_TIMINGS
time.tic();
#endif
// permute and add contribution to local expansions // permute and add contribution to local expansions
FReal *const LocalExpansion = TargetCell->getLocal(); FReal *const LocalExpansion = TargetCell->getLocal();
memset(countExp, 0, sizeof(int) * 343); memset(countExp, 0, sizeof(int) * 343);
...@@ -254,6 +293,11 @@ public: ...@@ -254,6 +293,11 @@ public:
} }
} }
#ifdef LOG_TIMINGS
t_m2l_3 += time.tacAndElapsed();
#endif
} }
......
...@@ -158,6 +158,7 @@ void pACA(const ComputerType& Computer, ...@@ -158,6 +158,7 @@ void pACA(const ComputerType& Computer,
// initialize rank k and UV' // initialize rank k and UV'
k = 0; k = 0;
const FReal eps2 = eps * eps;
const int maxk = (nx + ny) / 2; const int maxk = (nx + ny) / 2;
U = new FReal[nx * maxk]; U = new FReal[nx * maxk];
V = new FReal[ny * maxk]; V = new FReal[ny * maxk];
...@@ -186,12 +187,14 @@ void pACA(const ComputerType& Computer, ...@@ -186,12 +187,14 @@ void pACA(const ComputerType& Computer,
// find max of residual and argmax // find max of residual and argmax
FReal maxval = 0.; FReal maxval = 0.;
for (unsigned int j=0; j<ny; ++j) for (unsigned int j=0; j<ny; ++j) {
if (c[j] && maxval < FMath::Abs(colV[j])) { const FReal abs_val = FMath::Abs(colV[j]);
maxval = FMath::Abs(colV[j]); if (c[j] && maxval < abs_val) {
maxval = abs_val;
J = j; J = j;
} }
// scale pivot v }
// find pivot and scale column of V
const FReal pivot = FReal(1.) / colV[J]; const FReal pivot = FReal(1.) / colV[J];
FBlas::scal(ny, pivot, colV); FBlas::scal(ny, pivot, colV);
...@@ -207,11 +210,13 @@ void pACA(const ComputerType& Computer, ...@@ -207,11 +210,13 @@ void pACA(const ComputerType& Computer,
// find max of residual and argmax // find max of residual and argmax
maxval = 0.; maxval = 0.;
for (unsigned int i=0; i<nx; ++i) for (unsigned int i=0; i<nx; ++i) {
if (r[i] && maxval < FMath::Abs(colU[i])) { const FReal abs_val = FMath::Abs(colU[i]);
maxval = FMath::Abs(colU[i]); if (r[i] && maxval < abs_val) {
maxval = abs_val;
I = i; I = i;
} }
}
//////////////////////////////////////////// ////////////////////////////////////////////
// increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk // increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk
...@@ -225,7 +230,7 @@ void pACA(const ComputerType& Computer, ...@@ -225,7 +230,7 @@ void pACA(const ComputerType& Computer,
// increment low-rank // increment low-rank
k++; k++;
} while (norm2uv > eps*eps * norm2S); } while (norm2uv > eps2 * norm2S);
delete [] r; delete [] r;
delete [] c; delete [] c;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment