-
BRAMAS Berenger authoredBRAMAS Berenger authored
testChebTensorProduct.cpp 16.35 KiB
// ===================================================================================
// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012.
// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que
// la communauté scientifique l'utilise afin de le tester et de l'évaluer.
// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation
// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation
// expresse et préalable d'Inria.
// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord
// expresse préalable d'Inria constituerait donc le délit de contrefaçon.
// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer
// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu
// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur.
// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques
// relatives à l'usage du LOGICIEL
// ===================================================================================
// ==== CMAKE =====
// @FUSE_BLAS
// ================
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "../../Src/Utils/FTic.hpp"
#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FBlas.hpp"
#include "../../Src/Utils/FParameters.hpp"
#include "../../Src/Containers/FVector.hpp"
#include "../../Src/Utils/FAssertable.hpp"
#include "../../Src/Utils/FPoint.hpp"
#include "../../Src/Kernels/Chebyshev/FChebTensor.hpp"
#include "../../Src/Kernels/Chebyshev/FChebInterpolator.hpp"
void applyM2M(FReal *const S, FReal *const w, const unsigned int n, FReal *const W, const unsigned int N)
{ FBlas::gemtva(n, N, FReal(1.), S, w, W); }
void applym2m(FReal *const S, FReal *const w, const unsigned int n, FReal *const W, const unsigned int N)
{ FBlas::gemtm(n, n, n*n, FReal(1.), S, n, w, n, W, n); }
void applyL2L(FReal *const S, FReal *const F, const unsigned int n, FReal *const f, const unsigned int N)
{ FBlas::gemva(N, n, FReal(1.), S, F, f); }
void applyl2l(FReal *const S, FReal *const F, const unsigned int n, FReal *const f, const unsigned int N)
{ FBlas::gemm(n, n, n*n, FReal(1.), S, n, F, n, f, n); }
/**
* In this file we show how to use octree
*/
int main(int argc, char* argv[])
{
const unsigned int ORDER = 10;
const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
FPoint X[nnodes];
FChebInterpolator<ORDER> Interpolator;
{
FChebTensor<ORDER>::setRoots(FPoint(0.,0.,0.), FReal(2.), X);
FPoint x[nnodes];
// const FPoint cx(-.5, -.5, -.5);
// const FPoint cx(-.5, -.5, .5);
// const FPoint cx(-.5, .5, -.5);
// const FPoint cx(-.5, .5, .5);
// const FPoint cx( .5, -.5, -.5);
// const FPoint cx( .5, -.5, .5);
// const FPoint cx( .5, .5, -.5);
const FPoint cx( .5, .5, .5);
const FReal wx(1.);
FChebTensor<ORDER>::setRoots(cx, wx, x);
FReal w[nnodes], f[nnodes];
for (unsigned int n=0; n<nnodes; ++n) {
w[n] = f[n] = FReal(n);
// std::cout << w[n] << "\t" << X[n] << "\t" << x[n] << std::endl;
}
FReal coords[3][ORDER];
FChebTensor<ORDER>::setChebyshevRoots(cx, wx, coords);
// for (unsigned int n=0; n<ORDER; ++n) {
// std::cout << coords[0][n] << "\t"
// << coords[1][n] << "\t"
// << coords[2][n] << std::endl;
// }
FReal S[3][ORDER*ORDER];
Interpolator.assembleInterpolator(ORDER, coords[0], S[0]);
Interpolator.assembleInterpolator(ORDER, coords[1], S[1]);
Interpolator.assembleInterpolator(ORDER, coords[2], S[2]);
FReal Skron[nnodes * nnodes];
Interpolator.assembleInterpolator(nnodes, x, Skron);
FReal W0[nnodes];
for (unsigned int i=0; i<nnodes; ++i) W0[i] = FReal(0.);
applyM2M(Skron, w, nnodes, W0, nnodes);
FReal F0[nnodes];
for (unsigned int i=0; i<nnodes; ++i) F0[i] = FReal(0.);
applyL2L(Skron, f, nnodes, F0, nnodes);
unsigned int perm[3][nnodes];
for (unsigned int i=0; i<ORDER; ++i) {
for (unsigned int j=0; j<ORDER; ++j) {
for (unsigned int k=0; k<ORDER; ++k) {
const unsigned int index = k*ORDER*ORDER + j*ORDER + i;
perm[0][index] = k*ORDER*ORDER + j*ORDER + i;
perm[1][index] = i*ORDER*ORDER + k*ORDER + j;
perm[2][index] = j*ORDER*ORDER + i*ORDER + k;
}
}
}
// for (unsigned int n=0; n<nnodes; ++n)
// std::cout << perm[0][n] << "\t" << perm[1][n] << "\t" << perm[2][n] << std::endl;
FReal W[nnodes];
for (unsigned int i=0; i<nnodes; ++i) W[i] = FReal(0.);
applym2m(S[0], w, ORDER, W, ORDER);
for (unsigned int n=0; n<nnodes; ++n) w[n] = W[perm[1][n]];
applym2m(S[2], w, ORDER, W, ORDER);
for (unsigned int n=0; n<nnodes; ++n) w[perm[1][n]] = W[perm[2][n]];
applym2m(S[1], w, ORDER, W, ORDER);
for (unsigned int n=0; n<nnodes; ++n) w[perm[2][n]] = W[n];
FReal m2m_error(0.);
for (unsigned int n=0; n<nnodes; ++n) {
//std::cout << n << "\t" << w[n] << " - " << W0[n] << " = " << w[n]-W0[n] << std::endl;
m2m_error += w[n] - W0[n];
}
std::cout << "------------------------------------------"
<< "\n - M2M: ERROR = " << m2m_error << std::endl;
FReal F[nnodes];
for (unsigned int i=0; i<nnodes; ++i) F[i] = FReal(0.);
applyl2l(S[0], f, ORDER, F, ORDER);
for (unsigned int n=0; n<nnodes; ++n) f[n] = F[perm[1][n]];
applyl2l(S[2], f, ORDER, F, ORDER);
for (unsigned int n=0; n<nnodes; ++n) f[perm[1][n]] = F[perm[2][n]];
applyl2l(S[1], f, ORDER, F, ORDER);
for (unsigned int n=0; n<nnodes; ++n) f[perm[2][n]] = F[n];
FReal l2l_error(0.);
for (unsigned int n=0; n<nnodes; ++n) {
//std::cout << n << "\t" << f[n] << " - " << F0[n] << " = " << f[n]-F0[n] << std::endl;
l2l_error += f[n] - F0[n];
}
std::cout << "------------------------------------------"
<< "\n - L2L: ERROR = " << l2l_error << std::endl;
}
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
// P2M /////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
const unsigned int M = 10;
FReal points[3][M];
FReal weights[M];
FPoint lp[M];
FReal equivW[nnodes];
{ ////////////////////////////////////////////////////////
const FReal FRandMax = FReal(RAND_MAX);
for(unsigned int p=0; p<M; ++p){
points[0][p] = (FReal(rand())/FRandMax - FReal(.5)) * FReal(2.);
points[1][p] = (FReal(rand())/FRandMax - FReal(.5)) * FReal(2.);
points[2][p] = (FReal(rand())/FRandMax - FReal(.5)) * FReal(2.);
weights[p] = FReal(rand())/FRandMax;
lp[p].setX(points[0][p]);
lp[p].setY(points[1][p]);
lp[p].setZ(points[2][p]);
}
} ////////////////////////////////////////////////////////
{ // compute exact equivalent source values
for(unsigned int i=0; i<nnodes; ++i) equivW[i] = FReal(0.);
FReal Snorm[M * nnodes];
Interpolator.assembleInterpolator(M, lp, Snorm);
applyM2M(Snorm, weights, M, equivW, nnodes);
}
FReal W1;
FReal W2[3][ ORDER-1];
FReal W4[3][(ORDER-1)*(ORDER-1)];
FReal W8[ (ORDER-1)*(ORDER-1)*(ORDER-1)];
{ ////////////////////////////////////////////////////////
W1 = FReal(0.);
for(unsigned int i=0; i<ORDER-1; ++i)
W2[0][i] = W2[1][i] = W2[2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i)
W4[0][i] = W4[1][i] = W4[2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1)*(ORDER-1); ++i)
W8[i] = FReal(0.);
// loop over source particles
for (unsigned int p=0; p<M; ++p) {
FReal T_of_x[3][ORDER];
T_of_x[0][0] = FReal(1.); T_of_x[0][1] = points[0][p];
T_of_x[1][0] = FReal(1.); T_of_x[1][1] = points[1][p];
T_of_x[2][0] = FReal(1.); T_of_x[2][1] = points[2][p];
const FReal x2 = FReal(2.) * T_of_x[0][1]; // 1 flop
const FReal y2 = FReal(2.) * T_of_x[1][1]; // 1 flop
const FReal z2 = FReal(2.) * T_of_x[2][1]; // 1 flop
for (unsigned int j=2; j<ORDER; ++j) {
T_of_x[0][j] = x2 * T_of_x[0][j-1] - T_of_x[0][j-2]; // 2 flops
T_of_x[1][j] = y2 * T_of_x[1][j-1] - T_of_x[1][j-2]; // 2 flops
T_of_x[2][j] = z2 * T_of_x[2][j-1] - T_of_x[2][j-2]; // 2 flops
}
W1 += weights[p]; // 1 flop
for (unsigned int i=1; i<ORDER; ++i) {
const FReal wx = weights[p] * T_of_x[0][i]; // 1 flop
const FReal wy = weights[p] * T_of_x[1][i]; // 1 flop
const FReal wz = weights[p] * T_of_x[2][i]; // 1 flop
W2[0][i-1] += wx; // 1 flop
W2[1][i-1] += wy; // 1 flop
W2[2][i-1] += wz; // 1 flop
for (unsigned int j=1; j<ORDER; ++j) {
const FReal wxy = wx * T_of_x[1][j]; // 1 flop
const FReal wxz = wx * T_of_x[2][j]; // 1 flop
const FReal wyz = wy * T_of_x[2][j]; // 1 flop
W4[0][(j-1)*(ORDER-1) + (i-1)] += wxy; // 1 flop
W4[1][(j-1)*(ORDER-1) + (i-1)] += wxz; // 1 flop
W4[2][(j-1)*(ORDER-1) + (i-1)] += wyz; // 1 flop
for (unsigned int k=1; k<ORDER; ++k) {
const FReal wxyz = wxy * T_of_x[2][k]; // 1 flop
W8[(k-1)*(ORDER-1)*(ORDER-1) + (j-1)*(ORDER-1) + (i-1)] += wxyz; // 1 flop
} // flops: (ORDER-1) * 2
} // flops: (ORDER-1) * (6 + (ORDER-1) * 2)
} // flops: (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2))
} // flops: M * (3 + (ORDER-2) * 6 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2)))
} ////////////////////////////////////////////////////////
FReal F2[3][ORDER];
FReal F4[3][ORDER*ORDER];
FReal F8[ ORDER*ORDER*ORDER];
{ ////////////////////////////////////////////////////////
for(unsigned int i=0; i<ORDER; ++i)
F2[0][i] = F2[1][i] = F2[2][i] = FReal(0.);
for(unsigned int i=0; i<ORDER*ORDER; ++i)
F4[0][i] = F4[1][i] = F4[2][i] = FReal(0.);
for(unsigned int i=0; i<ORDER*ORDER*ORDER; ++i)
F8[i] = FReal(0.);
FReal T_of_y[ORDER * (ORDER-1)];
for (unsigned int o=1; o<ORDER; ++o)
for (unsigned int j=0; j<ORDER; ++j)
T_of_y[(o-1)*ORDER + j] = FReal(FChebRoots<ORDER>::T(o, FReal(FChebRoots<ORDER>::roots[j])));
for (unsigned int l=0; l<ORDER-1; ++l)
for (unsigned int i=0; i<ORDER; ++i) {
F2[0][i] += T_of_y[l*ORDER + i] * W2[0][l];
F2[1][i] += T_of_y[l*ORDER + i] * W2[1][l];
F2[2][i] += T_of_y[l*ORDER + i] * W2[2][l];
for (unsigned int m=0; m<ORDER-1; ++m)
for (unsigned int j=0; j<ORDER; ++j) {
F4[0][j*ORDER + i] += T_of_y[l*ORDER + i] * T_of_y[m*ORDER + j] * W4[0][m*(ORDER-1) + l];
F4[1][j*ORDER + i] += T_of_y[l*ORDER + i] * T_of_y[m*ORDER + j] * W4[1][m*(ORDER-1) + l];
F4[2][j*ORDER + i] += T_of_y[l*ORDER + i] * T_of_y[m*ORDER + j] * W4[2][m*(ORDER-1) + l];
for (unsigned int n=0; n<ORDER-1; ++n)
for (unsigned int k=0; k<ORDER; ++k)
F8[k*ORDER*ORDER + j*ORDER + i] +=
T_of_y[l*ORDER + i] * T_of_y[m*ORDER + j] * T_of_y[n*ORDER + k] *
W8[n*(ORDER-1)*(ORDER-1) + m*(ORDER-1) + l];
}
}
} ////////////////////////////////////////////////////////
FReal W[nnodes];
{
//for (unsigned int i=0; i<nnodes; ++i) W[i] = FReal(0.);
for (unsigned int i=0; i<ORDER; ++i) {
for (unsigned int j=0; j<ORDER; ++j) {
for (unsigned int k=0; k<ORDER; ++k) {
const unsigned int idx = k*ORDER*ORDER + j*ORDER + i;
W[idx] = (W1 +
FReal(2.) * (F2[0][i] + F2[1][j] + F2[2][k]) +
FReal(4.) * (F4[0][j*ORDER+i] + F4[1][k*ORDER+i] + F4[2][k*ORDER+j]) +
FReal(8.) * F8[idx]) / (ORDER*ORDER*ORDER);
}
}
}
}
FReal p2m_error(0.);
for (unsigned int i=0; i<nnodes; ++i) {
p2m_error += W[i] - equivW[i];
//std::cout << W[i] - equivW[i] << std::endl;
}
std::cout << "------------------------------------------"
<< "\n - P2M: ERROR = " << p2m_error << std::endl;
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
// L2P /////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
FReal exactf[M];
FReal f[M];
for(unsigned int i=0; i<M; ++i) exactf[i] = f[i] = FReal(0.);
{ // compute exact target values
FReal Snorm[M * nnodes];
Interpolator.assembleInterpolator(M, lp, Snorm);
applyL2L(Snorm, W, nnodes, exactf, M);
//for (unsigned int i=0; i<M; ++i)
// std::cout << exactf[i] << std::endl;
}
FReal f1;
{ // sum over interpolation points
FReal T_of_y[ORDER * (ORDER-1)];
for (unsigned int o=1; o<ORDER; ++o)
for (unsigned int j=0; j<ORDER; ++j)
T_of_y[(o-1)*ORDER + j] = FReal(FChebRoots<ORDER>::T(o, FReal(FChebRoots<ORDER>::roots[j])));
// set everything to zero
f1 = FReal(0.);
for(unsigned int i=0; i<ORDER-1; ++i)
W2[0][i] = W2[1][i] = W2[2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i)
W4[0][i] = W4[1][i] = W4[2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1)*(ORDER-1); ++i)
W8[i] = FReal(0.);
{
unsigned int nids[nnodes][3];
FChebTensor<ORDER>::setNodeIds(nids);
for (unsigned int idx=0; idx<nnodes; ++idx) {
f1 += W[idx];
const unsigned int i = nids[idx][0];
const unsigned int j = nids[idx][1];
const unsigned int k = nids[idx][2];
//std::cout << i << "\t" << j << "\t" << k << std::endl;
for (unsigned int l=0; l<ORDER-1; ++l) {
W2[0][l] += T_of_y[l*ORDER+i] * W[idx];
W2[1][l] += T_of_y[l*ORDER+j] * W[idx];
W2[2][l] += T_of_y[l*ORDER+k] * W[idx];
for (unsigned int m=0; m<ORDER-1; ++m) {
W4[0][m*(ORDER-1)+l] += T_of_y[l*ORDER+i] * T_of_y[m*ORDER+j] * W[idx];
W4[1][m*(ORDER-1)+l] += T_of_y[l*ORDER+i] * T_of_y[m*ORDER+k] * W[idx];
W4[2][m*(ORDER-1)+l] += T_of_y[l*ORDER+j] * T_of_y[m*ORDER+k] * W[idx];
for (unsigned int n=0; n<ORDER-1; ++n)
W8[n*(ORDER-1)*(ORDER-1) + m*(ORDER-1) + l]
+= T_of_y[l*ORDER+i]*T_of_y[m*ORDER+j]*T_of_y[n*ORDER+k] * W[idx];
}
}
}
}
}
{ // sum over targets
for (unsigned int p=0; p<M; ++p) {
FReal T_of_x[3][ORDER];
{
T_of_x[0][0] = FReal(1.); T_of_x[0][1] = points[0][p];
T_of_x[1][0] = FReal(1.); T_of_x[1][1] = points[1][p];
T_of_x[2][0] = FReal(1.); T_of_x[2][1] = points[2][p];
const FReal x2 = FReal(2.) * T_of_x[0][1]; // 1 flop
const FReal y2 = FReal(2.) * T_of_x[1][1]; // 1 flop
const FReal z2 = FReal(2.) * T_of_x[2][1]; // 1 flop
for (unsigned int j=2; j<ORDER; ++j) {
T_of_x[0][j] = x2 * T_of_x[0][j-1] - T_of_x[0][j-2]; // 2 flops
T_of_x[1][j] = y2 * T_of_x[1][j-1] - T_of_x[1][j-2]; // 2 flops
T_of_x[2][j] = z2 * T_of_x[2][j-1] - T_of_x[2][j-2]; // 2 flops
}
}
FReal f2, f4, f8;
{
f2 = f4 = f8 = FReal(0.);
for (unsigned int l=1; l<ORDER; ++l) {
f2 +=
T_of_x[0][l] * W2[0][l-1] +
T_of_x[1][l] * W2[1][l-1] +
T_of_x[2][l] * W2[2][l-1];
for (unsigned int m=1; m<ORDER; ++m) {
f4 +=
T_of_x[0][l] * T_of_x[1][m] * W4[0][(m-1)*(ORDER-1)+(l-1)] +
T_of_x[0][l] * T_of_x[2][m] * W4[1][(m-1)*(ORDER-1)+(l-1)] +
T_of_x[1][l] * T_of_x[2][m] * W4[2][(m-1)*(ORDER-1)+(l-1)];
for (unsigned int n=1; n<ORDER; ++n)
f8 +=
T_of_x[0][l]*T_of_x[1][m]*T_of_x[2][n] *
W8[(n-1)*(ORDER-1)*(ORDER-1) + (m-1)*(ORDER-1) + (l-1)];
}
}
}
f[p] = (f1 + FReal(2.)*f2 + FReal(4.)*f4 + FReal(8.)*f8) / (ORDER*ORDER*ORDER);
}
}
FReal l2p_error(0.);
for (unsigned int i=0; i<M; ++i) {
l2p_error += f[i] - exactf[i];
//std::cout << exactf[i] << "\t" << f[i] << std::endl;
}
std::cout << "------------------------------------------"
<< "\n - L2P: ERROR = " << l2p_error << std::endl;
return 0;
}
// [--END--]
//for (unsigned int l=0; l<ORDER-1; ++l)
// for (unsigned int m=0; m<ORDER-1; ++m)
// for (unsigned int n=0; n<ORDER-1; ++n)
//
// for (unsigned int i=0; i<ORDER; ++i)
// for (unsigned int j=0; j<ORDER; ++j)
// for (unsigned int k=0; k<ORDER; ++k) {
//
// const unsigned int idx = k*ORDER*ORDER + j*ORDER + i;
//
// W2[0][l] += T_of_y[l*ORDER+i] * W[idx];
// W2[1][m] += T_of_y[m*ORDER+j] * W[idx];
// W2[2][n] += T_of_y[n*ORDER+k] * W[idx];
//
// W4[0][m*(ORDER-1) + l] += T_of_y[l*ORDER+i] * T_of_y[m*ORDER+j] * W[idx];
// W4[1][n*(ORDER-1) + l] += T_of_y[l*ORDER+i] * T_of_y[n*ORDER+k] * W[idx];
// W4[2][n*(ORDER-1) + m] += T_of_y[m*ORDER+j] * T_of_y[n*ORDER+k] * W[idx];
//
// W8[n*(ORDER-1)*(ORDER-1) + m*(ORDER-1) + l]
// += T_of_y[l*ORDER+i]*T_of_y[m*ORDER+j]*T_of_y[n*ORDER+k] * W[idx];
// }