Commit 7eaa05a3 authored by Matthias Messner's avatar Matthias Messner

some optimizations in pACA and logging of timings in M2L kernel

parent 94ec7f7f
......@@ -13,9 +13,11 @@
class FTreeCoordinate;
/// for verbosity only!!!
// for verbosity only!!!
//#define COUNT_BLOCKED_INTERACTIONS
// if timings should be logged
//#define LOG_TIMINGS
/**
* @author Matthias Messner(matthias.messner@inria.fr)
......@@ -79,6 +81,10 @@ class FChebSymKernel
}
#ifdef LOG_TIMINGS
FTic time;
FReal t_m2l_1, t_m2l_2, t_m2l_3;
#endif
public:
/**
......@@ -95,6 +101,12 @@ public:
Loc(NULL), Mul(NULL), countExp(NULL)
{
this->allocateMemoryForPermutedExpansions();
#ifdef LOG_TIMINGS
t_m2l_1 = FReal(0.);
t_m2l_2 = FReal(0.);
t_m2l_3 = FReal(0.);
#endif
}
......@@ -120,6 +132,13 @@ public:
if (Loc!=NULL) delete [] Loc;
if (Mul!=NULL) delete [] Mul;
if (countExp!=NULL) delete [] countExp;
#ifdef LOG_TIMINGS
std::cout << "- Permutation took " << t_m2l_1 << "s"
<< "\n- GEMMT and GEMM took " << t_m2l_2 << "s"
<< "\n- Unpermutation took " << t_m2l_3 << "s"
<< std::endl;
#endif
}
......@@ -161,6 +180,10 @@ public:
const int NumSourceCells,
const int TreeLevel)
{
#ifdef LOG_TIMINGS
time.tic();
#endif
// permute and copy multipole expansion
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) {
......@@ -190,6 +213,10 @@ public:
}
}
#ifdef LOG_TIMINGS
t_m2l_1 += time.tacAndElapsed();
#endif
#ifdef COUNT_BLOCKED_INTERACTIONS ////////////////////////////////////
unsigned int count_lidx = 0;
......@@ -207,6 +234,10 @@ public:
#endif ///////////////////////////////////////////////////////////////
#ifdef LOG_TIMINGS
time.tic();
#endif
// multiply (mat-mat-mul)
FReal Compressed [nnodes * 24];
const FReal scale = AbstractBaseClass::MatrixKernel->getScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel);
......@@ -225,6 +256,14 @@ public:
}
}
#ifdef LOG_TIMINGS
t_m2l_2 += time.tacAndElapsed();
#endif
#ifdef LOG_TIMINGS
time.tic();
#endif
// permute and add contribution to local expansions
FReal *const LocalExpansion = TargetCell->getLocal();
memset(countExp, 0, sizeof(int) * 343);
......@@ -254,6 +293,11 @@ public:
}
}
#ifdef LOG_TIMINGS
t_m2l_3 += time.tacAndElapsed();
#endif
}
......
......@@ -158,6 +158,7 @@ void pACA(const ComputerType& Computer,
// initialize rank k and UV'
k = 0;
const FReal eps2 = eps * eps;
const int maxk = (nx + ny) / 2;
U = new FReal[nx * maxk];
V = new FReal[ny * maxk];
......@@ -186,12 +187,14 @@ void pACA(const ComputerType& Computer,
// find max of residual and argmax
FReal maxval = 0.;
for (unsigned int j=0; j<ny; ++j)
if (c[j] && maxval < FMath::Abs(colV[j])) {
maxval = FMath::Abs(colV[j]);
for (unsigned int j=0; j<ny; ++j) {
const FReal abs_val = FMath::Abs(colV[j]);
if (c[j] && maxval < abs_val) {
maxval = abs_val;
J = j;
}
// scale pivot v
}
// find pivot and scale column of V
const FReal pivot = FReal(1.) / colV[J];
FBlas::scal(ny, pivot, colV);
......@@ -207,11 +210,13 @@ void pACA(const ComputerType& Computer,
// find max of residual and argmax
maxval = 0.;
for (unsigned int i=0; i<nx; ++i)
if (r[i] && maxval < FMath::Abs(colU[i])) {
maxval = FMath::Abs(colU[i]);
for (unsigned int i=0; i<nx; ++i) {
const FReal abs_val = FMath::Abs(colU[i]);
if (r[i] && maxval < abs_val) {
maxval = abs_val;
I = i;
}
}
////////////////////////////////////////////
// increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk
......@@ -225,7 +230,7 @@ void pACA(const ComputerType& Computer,
// increment low-rank
k++;
} while (norm2uv > eps*eps * norm2S);
} while (norm2uv > eps2 * norm2S);
delete [] r;
delete [] c;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment