Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 7eaa05a3 authored by Matthias Messner's avatar Matthias Messner
Browse files

some optimizations in pACA and logging of timings in M2L kernel

parent 94ec7f7f
Branches
Tags
No related merge requests found
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
class FTreeCoordinate; class FTreeCoordinate;
/// for verbosity only!!! // for verbosity only!!!
//#define COUNT_BLOCKED_INTERACTIONS //#define COUNT_BLOCKED_INTERACTIONS
// if timings should be logged
//#define LOG_TIMINGS
/** /**
* @author Matthias Messner(matthias.messner@inria.fr) * @author Matthias Messner(matthias.messner@inria.fr)
...@@ -79,6 +81,10 @@ class FChebSymKernel ...@@ -79,6 +81,10 @@ class FChebSymKernel
} }
#ifdef LOG_TIMINGS
FTic time;
FReal t_m2l_1, t_m2l_2, t_m2l_3;
#endif
public: public:
/** /**
...@@ -95,6 +101,12 @@ public: ...@@ -95,6 +101,12 @@ public:
Loc(NULL), Mul(NULL), countExp(NULL) Loc(NULL), Mul(NULL), countExp(NULL)
{ {
this->allocateMemoryForPermutedExpansions(); this->allocateMemoryForPermutedExpansions();
#ifdef LOG_TIMINGS
t_m2l_1 = FReal(0.);
t_m2l_2 = FReal(0.);
t_m2l_3 = FReal(0.);
#endif
} }
...@@ -120,6 +132,13 @@ public: ...@@ -120,6 +132,13 @@ public:
if (Loc!=NULL) delete [] Loc; if (Loc!=NULL) delete [] Loc;
if (Mul!=NULL) delete [] Mul; if (Mul!=NULL) delete [] Mul;
if (countExp!=NULL) delete [] countExp; if (countExp!=NULL) delete [] countExp;
#ifdef LOG_TIMINGS
std::cout << "- Permutation took " << t_m2l_1 << "s"
<< "\n- GEMMT and GEMM took " << t_m2l_2 << "s"
<< "\n- Unpermutation took " << t_m2l_3 << "s"
<< std::endl;
#endif
} }
...@@ -161,6 +180,10 @@ public: ...@@ -161,6 +180,10 @@ public:
const int NumSourceCells, const int NumSourceCells,
const int TreeLevel) const int TreeLevel)
{ {
#ifdef LOG_TIMINGS
time.tic();
#endif
// permute and copy multipole expansion // permute and copy multipole expansion
memset(countExp, 0, sizeof(int) * 343); memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) { for (unsigned int idx=0; idx<343; ++idx) {
...@@ -190,6 +213,10 @@ public: ...@@ -190,6 +213,10 @@ public:
} }
} }
#ifdef LOG_TIMINGS
t_m2l_1 += time.tacAndElapsed();
#endif
#ifdef COUNT_BLOCKED_INTERACTIONS //////////////////////////////////// #ifdef COUNT_BLOCKED_INTERACTIONS ////////////////////////////////////
unsigned int count_lidx = 0; unsigned int count_lidx = 0;
...@@ -207,6 +234,10 @@ public: ...@@ -207,6 +234,10 @@ public:
#endif /////////////////////////////////////////////////////////////// #endif ///////////////////////////////////////////////////////////////
#ifdef LOG_TIMINGS
time.tic();
#endif
// multiply (mat-mat-mul) // multiply (mat-mat-mul)
FReal Compressed [nnodes * 24]; FReal Compressed [nnodes * 24];
const FReal scale = AbstractBaseClass::MatrixKernel->getScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel); const FReal scale = AbstractBaseClass::MatrixKernel->getScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel);
...@@ -225,6 +256,14 @@ public: ...@@ -225,6 +256,14 @@ public:
} }
} }
#ifdef LOG_TIMINGS
t_m2l_2 += time.tacAndElapsed();
#endif
#ifdef LOG_TIMINGS
time.tic();
#endif
// permute and add contribution to local expansions // permute and add contribution to local expansions
FReal *const LocalExpansion = TargetCell->getLocal(); FReal *const LocalExpansion = TargetCell->getLocal();
memset(countExp, 0, sizeof(int) * 343); memset(countExp, 0, sizeof(int) * 343);
...@@ -254,6 +293,11 @@ public: ...@@ -254,6 +293,11 @@ public:
} }
} }
#ifdef LOG_TIMINGS
t_m2l_3 += time.tacAndElapsed();
#endif
} }
......
...@@ -158,6 +158,7 @@ void pACA(const ComputerType& Computer, ...@@ -158,6 +158,7 @@ void pACA(const ComputerType& Computer,
// initialize rank k and UV' // initialize rank k and UV'
k = 0; k = 0;
const FReal eps2 = eps * eps;
const int maxk = (nx + ny) / 2; const int maxk = (nx + ny) / 2;
U = new FReal[nx * maxk]; U = new FReal[nx * maxk];
V = new FReal[ny * maxk]; V = new FReal[ny * maxk];
...@@ -186,12 +187,14 @@ void pACA(const ComputerType& Computer, ...@@ -186,12 +187,14 @@ void pACA(const ComputerType& Computer,
// find max of residual and argmax // find max of residual and argmax
FReal maxval = 0.; FReal maxval = 0.;
for (unsigned int j=0; j<ny; ++j) for (unsigned int j=0; j<ny; ++j) {
if (c[j] && maxval < FMath::Abs(colV[j])) { const FReal abs_val = FMath::Abs(colV[j]);
maxval = FMath::Abs(colV[j]); if (c[j] && maxval < abs_val) {
maxval = abs_val;
J = j; J = j;
} }
// scale pivot v }
// find pivot and scale column of V
const FReal pivot = FReal(1.) / colV[J]; const FReal pivot = FReal(1.) / colV[J];
FBlas::scal(ny, pivot, colV); FBlas::scal(ny, pivot, colV);
...@@ -207,11 +210,13 @@ void pACA(const ComputerType& Computer, ...@@ -207,11 +210,13 @@ void pACA(const ComputerType& Computer,
// find max of residual and argmax // find max of residual and argmax
maxval = 0.; maxval = 0.;
for (unsigned int i=0; i<nx; ++i) for (unsigned int i=0; i<nx; ++i) {
if (r[i] && maxval < FMath::Abs(colU[i])) { const FReal abs_val = FMath::Abs(colU[i]);
maxval = FMath::Abs(colU[i]); if (r[i] && maxval < abs_val) {
maxval = abs_val;
I = i; I = i;
} }
}
//////////////////////////////////////////// ////////////////////////////////////////////
// increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk // increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk
...@@ -225,7 +230,7 @@ void pACA(const ComputerType& Computer, ...@@ -225,7 +230,7 @@ void pACA(const ComputerType& Computer,
// increment low-rank // increment low-rank
k++; k++;
} while (norm2uv > eps*eps * norm2S); } while (norm2uv > eps2 * norm2S);
delete [] r; delete [] r;
delete [] c; delete [] c;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment