FChebInterpolator.hpp 23.9 KB
Newer Older
1 2 3 4 5 6 7 8
#ifndef FCHEBINTERPOLATOR_HPP
#define FCHEBINTERPOLATOR_HPP


#include "./FChebMapping.hpp"
#include "./FChebTensor.hpp"
#include "./FChebRoots.hpp"

9
#include "../../Utils/FBlas.hpp"
10

11 12 13 14 15 16 17 18 19


/**
 * @author Matthias Messner (matthias.matthias@inria.fr)
 * Please read the license
 */

/**
 * @class FChebInterpolator
20
 *
21
 * The class @p FChebInterpolator defines the anterpolation (M2M) and
22
 * interpolation (L2L) concerning operations.
23 24 25 26 27 28 29 30 31 32 33
 */
template <int ORDER>
class FChebInterpolator : FNoCopyable
{
  // compile time constants and types
  enum {nnodes = TensorTraits<ORDER>::nnodes};
  typedef FChebRoots< ORDER>  BasisType;
  typedef FChebTensor<ORDER> TensorType;

  FReal T_of_roots[ORDER][ORDER];
	unsigned int node_ids[nnodes][3];
34 35
	FReal* ChildParentInterpolator[8];

36 37 38 39
	// permutations (only needed in the tensor product interpolation case)
	unsigned int perm[3][nnodes];


40 41 42 43 44
	/**
	 * Initialize the child - parent - interpolator, it is basically the matrix
	 * S which is precomputed and reused for all M2M and L2L operations, ie for
	 * all non leaf inter/anterpolations.
	 */
45
	void initM2MandL2L()
46
	{
COULAUD Olivier's avatar
COULAUD Olivier committed
47
		FPoint ParentRoots[nnodes], ChildRoots[nnodes];
48
		const FReal ParentWidth(2.);
COULAUD Olivier's avatar
COULAUD Olivier committed
49
		const FPoint ParentCenter(0., 0., 0.);
50 51
		FChebTensor<ORDER>::setRoots(ParentCenter, ParentWidth, ParentRoots);

COULAUD Olivier's avatar
COULAUD Olivier committed
52
		FPoint ChildCenter;
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
		const FReal ChildWidth(1.);
		
		// loop: child cells
		for (unsigned int child=0; child<8; ++child) {

			// allocate memory
			ChildParentInterpolator[child] = new FReal [nnodes * nnodes];

			// set child info
			FChebTensor<ORDER>::setRelativeChildCenter(child, ChildCenter);
			FChebTensor<ORDER>::setRoots(ChildCenter, ChildWidth, ChildRoots);

			// assemble child - parent - interpolator
			assembleInterpolator(nnodes, ChildRoots, ChildParentInterpolator[child]);
		}
	}

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
	/**
	 * Initialize the child - parent - interpolator, it is basically the matrix
	 * S which is precomputed and reused for all M2M and L2L operations, ie for
	 * all non leaf inter/anterpolations.
	 */
	void initTensorM2MandL2L()
	{
		FPoint ParentRoots[nnodes];
		FReal ChildCoords[3][ORDER];
		const FReal ParentWidth(2.);
		const FPoint ParentCenter(0., 0., 0.);
		FChebTensor<ORDER>::setRoots(ParentCenter, ParentWidth, ParentRoots);

		FPoint ChildCenter;
		const FReal ChildWidth(1.);
		
		// loop: child cells
		for (unsigned int child=0; child<8; ++child) {

			// set child info
			FChebTensor<ORDER>::setRelativeChildCenter(child, ChildCenter);
			FChebTensor<ORDER>::setChebyshevRoots(ChildCenter, ChildWidth, ChildCoords);

			// allocate memory
			ChildParentInterpolator[child] = new FReal [3 * ORDER*ORDER];
			assembleInterpolator(ORDER, ChildCoords[0], ChildParentInterpolator[child]);
			assembleInterpolator(ORDER, ChildCoords[1], ChildParentInterpolator[child] + 1 * ORDER*ORDER);
			assembleInterpolator(ORDER, ChildCoords[2], ChildParentInterpolator[child] + 2 * ORDER*ORDER);
		}


		// init permutations
		for (unsigned int i=0; i<ORDER; ++i) {
			for (unsigned int j=0; j<ORDER; ++j) {
				for (unsigned int k=0; k<ORDER; ++k) {
					const unsigned int index = k*ORDER*ORDER + j*ORDER + i;
					perm[0][index] = k*ORDER*ORDER + j*ORDER + i;
					perm[1][index] = i*ORDER*ORDER + k*ORDER + j;
					perm[2][index] = j*ORDER*ORDER + i*ORDER + k;
				}
			}
		}
		
	}

115 116 117 118


public:
	/**
119
	 * Constructor: Initialize the Chebyshev polynomials at the Chebyshev
120 121 122 123 124 125 126
	 * roots/interpolation point
	 */
	explicit FChebInterpolator()
	{
		// initialize chebyshev polynomials of root nodes: T_o(x_j)
    for (unsigned int o=1; o<ORDER; ++o)
      for (unsigned int j=0; j<ORDER; ++j)
messner's avatar
messner committed
127
        T_of_roots[o][j] = FReal(BasisType::T(o, FReal(BasisType::roots[j])));
128 129 130

		// initialize root node ids
		TensorType::setNodeIds(node_ids);
131 132 133

		// initialize interpolation operator for non M2M and L2L (non leaf
		// operations)
134 135 136 137

		//this -> initM2MandL2L();

		this -> initTensorM2MandL2L();
138 139 140 141 142 143 144 145 146 147
	}

	
	/**
	 * Destructor: Delete dynamically allocated memory for M2M and L2L operator
	 */
	~FChebInterpolator()
	{
		for (unsigned int child=0; child<8; ++child)
			delete [] ChildParentInterpolator[child];
148 149 150
	}


151 152 153 154 155 156 157 158 159 160
	/**
	 * Assembles the interpolator \f$S_\ell\f$ of size \f$N\times
	 * \ell^3\f$. Here local points is meant as points whose global coordinates
	 * have already been mapped to the reference interval [-1,1].
	 *
	 * @param[in] NumberOfLocalPoints
	 * @param[in] LocalPoints
	 * @param[out] Interpolator
	 */
	void assembleInterpolator(const unsigned int NumberOfLocalPoints,
COULAUD Olivier's avatar
COULAUD Olivier committed
161
				  const FPoint *const LocalPoints,
162
				  FReal *const Interpolator) const
COULAUD Olivier's avatar
COULAUD Olivier committed
163
	{
164 165
		// values of chebyshev polynomials of source particle: T_o(x_i)
		FReal T_of_x[ORDER][3];
166 167 168 169 170 171
		FReal c0, c1, c2 ;
		c0 = FReal(0.0) ;
		c1 = FReal(1.) / ORDER;
		c2 = FReal(2.) *c1 ;
		  // loop: local points (mapped in [-1,1])
		  for (unsigned int m=0; m<NumberOfLocalPoints; ++m) {
172 173 174 175 176 177 178 179 180 181 182 183
			// evaluate chebyshev polynomials at local points
			for (unsigned int o=1; o<ORDER; ++o) {
				T_of_x[o][0] = BasisType::T(o, LocalPoints[m].getX());
				T_of_x[o][1] = BasisType::T(o, LocalPoints[m].getY());
				T_of_x[o][2] = BasisType::T(o, LocalPoints[m].getZ());
			}
			
			// assemble interpolator
			for (unsigned int n=0; n<nnodes; ++n) {
				Interpolator[n*nnodes + m] = FReal(1.);
				for (unsigned int d=0; d<3; ++d) {
					const unsigned int j = node_ids[n][d];
184 185 186 187 188
					// FReal S_d = FReal(1.) / ORDER;
					// for (unsigned int o=1; o<ORDER; ++o)
					// 	S_d += FReal(2.) / ORDER * T_of_x[o][d] * T_of_roots[o][j];
					// Interpolator[n*nnodes + m] *= S_d;
					FReal S_d = c0 ;
189
					for (unsigned int o=1; o<ORDER; ++o)
190 191
						S_d +=  T_of_x[o][d] * T_of_roots[o][j];
					S_d = c1 + c2*S_d ; 
192 193 194 195 196 197 198 199
					Interpolator[n*nnodes + m] *= S_d;
				}
			}
			
		}
		
	}

200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226

	void assembleInterpolator(const unsigned int M, const FReal *const x, FReal *const S) const
	{
		// values of chebyshev polynomials of source particle: T_o(x_i)
		FReal T_of_x[ORDER];

		// loop: local points (mapped in [-1,1])
		for (unsigned int m=0; m<M; ++m) {
			// evaluate chebyshev polynomials at local points
			for (unsigned int o=1; o<ORDER; ++o)
				T_of_x[o] = BasisType::T(o, x[m]);
			
			for (unsigned int n=0; n<ORDER; ++n) {
				S[n*M + m] = FReal(1.) / ORDER;
				for (unsigned int o=1; o<ORDER; ++o)
					S[n*M + m] += FReal(2.) / ORDER * T_of_x[o] * T_of_roots[o][n];
			}
			
		}
		
	}






227 228
	
	/**
229 230
	 * Particle to moment: application of \f$S_\ell(y,\bar y_n)\f$
	 * (anterpolation, it is the transposed interpolation)
231 232
	 */
	template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
233
	void applyP2M(const FPoint& center,
234 235
								const FReal width,
								FReal *const multipoleExpansion,
236
								const ContainerClass *const sourceParticles) const;
237 238 239 240


	
	/**
241
	 * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ (interpolation)
242 243
	 */
	template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
244
	void applyL2P(const FPoint& center,
245 246
								const FReal width,
								const FReal *const localExpansion,
247
								ContainerClass *const localParticles) const;
248

249 250 251 252 253

	/**
	 * Local to particle operation: application of \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
	 */
	template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
254
	void applyL2PGradient(const FPoint& center,
255 256
												const FReal width,
												const FReal *const localExpansion,
257
												ContainerClass *const localParticles) const;
258

259 260 261 262 263
	/**
	 * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ and
	 * \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
	 */
	template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
264
	void applyL2PTotal(const FPoint& center,
265 266 267 268
										 const FReal width,
										 const FReal *const localExpansion,
										 ContainerClass *const localParticles) const;
	
269
	
270
	/*
271 272 273 274 275 276
	void applyM2M(const unsigned int ChildIndex,
								const FReal *const ChildExpansion,
								FReal *const ParentExpansion) const
	{
		FBlas::gemtva(nnodes, nnodes, FReal(1.),
									ChildParentInterpolator[ChildIndex],
messner's avatar
messner committed
277
									const_cast<FReal*>(ChildExpansion), ParentExpansion);
278
	}
279

280 281 282 283 284 285
	void applyL2L(const unsigned int ChildIndex,
								const FReal *const ParentExpansion,
								FReal *const ChildExpansion) const
	{
		FBlas::gemva(nnodes, nnodes, FReal(1.),
								 ChildParentInterpolator[ChildIndex],
messner's avatar
messner committed
286
								 const_cast<FReal*>(ParentExpansion), ChildExpansion);
287
	}
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
	*/
	

	
	void applyM2M(const unsigned int ChildIndex,
								const FReal *const ChildExpansion,
								FReal *const ParentExpansion) const
	{
		FReal Exp[nnodes], PermExp[nnodes];
		FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
								 ChildParentInterpolator[ChildIndex], ORDER,
								 const_cast<FReal*>(ChildExpansion), ORDER, PermExp, ORDER);
		
		for (unsigned int n=0; n<nnodes; ++n)	Exp[n] = PermExp[perm[1][n]];
		FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
								 ChildParentInterpolator[ChildIndex] + 2 * ORDER*ORDER, ORDER,
								 Exp, ORDER, PermExp, ORDER);

		for (unsigned int n=0; n<nnodes; ++n)	Exp[perm[1][n]] = PermExp[perm[2][n]];
		FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
								 ChildParentInterpolator[ChildIndex] + 1 * ORDER*ORDER, ORDER,
								 Exp, ORDER, PermExp, ORDER);

		for (unsigned int n=0; n<nnodes; ++n)	ParentExpansion[perm[2][n]] += PermExp[n];
	}


	void applyL2L(const unsigned int ChildIndex,
								const FReal *const ParentExpansion,
								FReal *const ChildExpansion) const
	{
		FReal Exp[nnodes], PermExp[nnodes];
		FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
								ChildParentInterpolator[ChildIndex], ORDER,
								const_cast<FReal*>(ParentExpansion), ORDER, PermExp, ORDER);
		
		for (unsigned int n=0; n<nnodes; ++n)	Exp[n] = PermExp[perm[1][n]];
		FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
								ChildParentInterpolator[ChildIndex] + 2 * ORDER*ORDER, ORDER,
								Exp, ORDER, PermExp, ORDER);
		
		for (unsigned int n=0; n<nnodes; ++n)	Exp[perm[1][n]] = PermExp[perm[2][n]];
		FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
								ChildParentInterpolator[ChildIndex] + 1 * ORDER*ORDER, ORDER,
								Exp, ORDER, PermExp, ORDER);

		for (unsigned int n=0; n<nnodes; ++n)	ChildExpansion[perm[2][n]] += PermExp[n];
	}
	
337 338
	
};
339 340 341



342 343 344 345 346 347 348 349 350 351




/**
 * Particle to moment: application of \f$S_\ell(y,\bar y_n)\f$
 * (anterpolation, it is the transposed interpolation)
 */
template <int ORDER>
template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
352
inline void FChebInterpolator<ORDER>::applyP2M(const FPoint& center,
353 354 355 356 357 358 359 360 361
																							 const FReal width,
																							 FReal *const multipoleExpansion,
																							 const ContainerClass *const sourceParticles) const
{
	// set all multipole expansions to zero
	FBlas::setzero(nnodes, multipoleExpansion);

	// allocate stuff
	const map_glob_loc map(center, width);
COULAUD Olivier's avatar
COULAUD Olivier committed
362
	FPoint localPosition;
363
	FReal T_of_x[ORDER][3];
364
	FReal S[3], c1;
365 366
	//
	FReal xpx,ypy,zpz ;
367
	c1 = FReal(8.) / nnodes ; // 1 flop
368 369 370 371 372
	// loop over source particles
	typename ContainerClass::ConstBasicIterator iter(*sourceParticles);
	while(iter.hasNotFinished()){

		// map global position to [-1,1]
373
		map(iter.data().getPosition(), localPosition); // 15 flops
374 375 376 377 378

		// evaluate chebyshev polynomials of source particle: T_o(x_i)
		T_of_x[0][0] = FReal(1.);	T_of_x[1][0] = localPosition.getX();
		T_of_x[0][1] = FReal(1.);	T_of_x[1][1] = localPosition.getY();
		T_of_x[0][2] = FReal(1.);	T_of_x[1][2] = localPosition.getZ();
379 380 381
		xpx = FReal(2.) * localPosition.getX() ; // 1 flop
		ypy = FReal(2.) * localPosition.getY() ; // 1 flop
		zpz = FReal(2.) * localPosition.getZ() ; // 1 flop
382

383
		for (unsigned int o=2; o<ORDER; ++o) {
384 385 386
			T_of_x[o][0] = xpx * T_of_x[o-1][0] - T_of_x[o-2][0]; // 2 flops
			T_of_x[o][1] = ypy * T_of_x[o-1][1] - T_of_x[o-2][1];	// 2 flops
			T_of_x[o][2] = zpz * T_of_x[o-1][2] - T_of_x[o-2][2]; // 2 flops
387 388 389 390 391 392
		}
		
		// anterpolate
		const FReal sourceValue = iter.data().getPhysicalValue();
		for (unsigned int n=0; n<nnodes; ++n) {
			const unsigned int j[3] = {node_ids[n][0], node_ids[n][1], node_ids[n][2]};
393 394 395
			S[0] = FReal(0.5) + T_of_x[1][0] * T_of_roots[1][j[0]]; // 2 flops 
			S[1] = FReal(0.5) + T_of_x[1][1] * T_of_roots[1][j[1]]; // 2 flops
			S[2] = FReal(0.5) + T_of_x[1][2] * T_of_roots[1][j[2]]; // 2 flops
396
			for (unsigned int o=2; o<ORDER; ++o) {
397 398 399
				S[0] += T_of_x[o][0] * T_of_roots[o][j[0]]; // 2 flops
				S[1] += T_of_x[o][1] * T_of_roots[o][j[1]]; // 2 flops
				S[2] += T_of_x[o][2] * T_of_roots[o][j[2]]; // 2 flops
400 401
			}
			// gather contributions
402
			multipoleExpansion[n]	+= c1 *	S[0] * S[1] * S[2] *	sourceValue; // 4 flops
403 404 405 406 407 408 409 410 411 412 413 414
		}
		// increment source iterator
		iter.gotoNext();
	}
}


/**
 * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ (interpolation)
 */
template <int ORDER>
template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
415
inline void FChebInterpolator<ORDER>::applyL2P(const FPoint& center,
416 417 418 419 420 421
																							 const FReal width,
																							 const FReal *const localExpansion,
																							 ContainerClass *const localParticles) const
{
	// allocate stuff
	const map_glob_loc map(center, width);
COULAUD Olivier's avatar
COULAUD Olivier committed
422
	FPoint localPosition;
423
	FReal T_of_x[ORDER][3];
424 425 426 427
	FReal xpx,ypy,zpz ;
	FReal S[3],c1;
	//
	c1 = FReal(8.) / nnodes ;
428 429 430 431 432 433 434 435 436 437
	typename ContainerClass::BasicIterator iter(*localParticles);
	while(iter.hasNotFinished()){
			
		// map global position to [-1,1]
		map(iter.data().getPosition(), localPosition);

		// evaluate chebyshev polynomials of source particle: T_o(x_i)
		T_of_x[0][0] = FReal(1.);	T_of_x[1][0] = localPosition.getX();
		T_of_x[0][1] = FReal(1.);	T_of_x[1][1] = localPosition.getY();
		T_of_x[0][2] = FReal(1.);	T_of_x[1][2] = localPosition.getZ();
438 439 440
		xpx = FReal(2.) * localPosition.getX() ;
		ypy = FReal(2.) * localPosition.getY() ;
		zpz = FReal(2.) * localPosition.getZ() ;
441
		for (unsigned int o=2; o<ORDER; ++o) {
442 443 444
			T_of_x[o][0] = xpx * T_of_x[o-1][0] - T_of_x[o-2][0];
			T_of_x[o][1] = ypy * T_of_x[o-1][1] - T_of_x[o-2][1];
			T_of_x[o][2] = zpz * T_of_x[o-1][2] - T_of_x[o-2][2];
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
		}

		// interpolate and increment target value
		FReal targetValue = iter.data().getPotential();
		for (unsigned int n=0; n<nnodes; ++n) {
			const unsigned int j[3] = {node_ids[n][0], node_ids[n][1], node_ids[n][2]};
			S[0] = T_of_x[1][0] * T_of_roots[1][j[0]];
			S[1] = T_of_x[1][1] * T_of_roots[1][j[1]];
			S[2] = T_of_x[1][2] * T_of_roots[1][j[2]];
			for (unsigned int o=2; o<ORDER; ++o) {
				S[0] += T_of_x[o][0] * T_of_roots[o][j[0]];
				S[1] += T_of_x[o][1] * T_of_roots[o][j[1]];
				S[2] += T_of_x[o][2] * T_of_roots[o][j[2]];
			}
			// gather contributions
460 461 462 463 464 465 466 467
			// S[0] *= FReal(2.); S[0] += FReal(1.);
			// S[1] *= FReal(2.); S[1] += FReal(1.);
			// S[2] *= FReal(2.); S[2] += FReal(1.);
			// targetValue	+= S[0] * S[1] * S[2] * localExpansion[n];
			S[0] += FReal(0.5);
			S[1] += FReal(0.5);
			S[2] += FReal(0.5);
			//
468
			targetValue	+= S[0] * S[1] * S[2] * localExpansion[n];
469
		}
470
		// scale
471 472
		//		targetValue /= nnodes;
		targetValue *= c1;
473

474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
		// set potential
		iter.data().setPotential(targetValue);
		// increment target iterator
		iter.gotoNext();
	}
}






/**
 * Local to particle operation: application of \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
 */
template <int ORDER>
template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
491
inline void FChebInterpolator<ORDER>::applyL2PGradient(const FPoint& center,
492 493 494 495 496 497
																											 const FReal width,
																											 const FReal *const localExpansion,
																											 ContainerClass *const localParticles) const
{
	// setup local to global mapping
	const map_glob_loc map(center, width);
COULAUD Olivier's avatar
COULAUD Olivier committed
498
	FPoint Jacobian;
499 500
	map.computeJacobian(Jacobian);
	const FReal jacobian[3] = {Jacobian.getX(), Jacobian.getY(), Jacobian.getZ()}; 
COULAUD Olivier's avatar
COULAUD Olivier committed
501
	FPoint localPosition;
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
	FReal T_of_x[ORDER][3];
	FReal U_of_x[ORDER][3];
	FReal P[3];

	typename ContainerClass::BasicIterator iter(*localParticles);
	while(iter.hasNotFinished()){
			
		// map global position to [-1,1]
		map(iter.data().getPosition(), localPosition);
			
		// evaluate chebyshev polynomials of source particle
		// T_0(x_i) and T_1(x_i)
		T_of_x[0][0] = FReal(1.);	T_of_x[1][0] = localPosition.getX();
		T_of_x[0][1] = FReal(1.);	T_of_x[1][1] = localPosition.getY();
		T_of_x[0][2] = FReal(1.);	T_of_x[1][2] = localPosition.getZ();
		// U_0(x_i) and U_1(x_i)
		U_of_x[0][0] = FReal(1.);	U_of_x[1][0] = localPosition.getX() * FReal(2.);
		U_of_x[0][1] = FReal(1.);	U_of_x[1][1] = localPosition.getY() * FReal(2.);
		U_of_x[0][2] = FReal(1.);	U_of_x[1][2] = localPosition.getZ() * FReal(2.);
		for (unsigned int o=2; o<ORDER; ++o) {
			// T_o(x_i)
			T_of_x[o][0] = FReal(2.)*localPosition.getX()*T_of_x[o-1][0] - T_of_x[o-2][0];
			T_of_x[o][1] = FReal(2.)*localPosition.getY()*T_of_x[o-1][1] - T_of_x[o-2][1];
			T_of_x[o][2] = FReal(2.)*localPosition.getZ()*T_of_x[o-1][2] - T_of_x[o-2][2];
			// U_o(x_i)
			U_of_x[o][0] = FReal(2.)*localPosition.getX()*U_of_x[o-1][0] - U_of_x[o-2][0];
			U_of_x[o][1] = FReal(2.)*localPosition.getY()*U_of_x[o-1][1] - U_of_x[o-2][1];
			U_of_x[o][2] = FReal(2.)*localPosition.getZ()*U_of_x[o-1][2] - U_of_x[o-2][2];
		}

		// scale, because dT_o/dx = oU_{o-1}
		for (unsigned int o=2; o<ORDER; ++o) {
			U_of_x[o-1][0] *= FReal(o);
			U_of_x[o-1][1] *= FReal(o);
			U_of_x[o-1][2] *= FReal(o);
		}

		// apply P and increment forces
		FReal forces[3] = {FReal(0.), FReal(0.), FReal(0.)};
		for (unsigned int n=0; n<nnodes; ++n) {
			
			// tensor indices of chebyshev nodes
			const unsigned int j[3] = {node_ids[n][0], node_ids[n][1], node_ids[n][2]};

			// f0 component //////////////////////////////////////
			P[0] = U_of_x[0][0] * T_of_roots[1][j[0]];
			P[1] = T_of_x[1][1] * T_of_roots[1][j[1]];
			P[2] = T_of_x[1][2] * T_of_roots[1][j[2]];
			for (unsigned int o=2; o<ORDER; ++o) {
				P[0] += U_of_x[o-1][0] * T_of_roots[o][j[0]];
				P[1] += T_of_x[o  ][1] * T_of_roots[o][j[1]];
				P[2] += T_of_x[o  ][2] * T_of_roots[o][j[2]];
			}
			P[0] *= FReal(2.);
			P[1] *= FReal(2.); P[1] += FReal(1.);
			P[2] *= FReal(2.); P[2] += FReal(1.);
558
			forces[0]	+= P[0] * P[1] * P[2] * localExpansion[n];
559 560 561 562 563 564 565 566 567 568 569 570 571

			// f1 component //////////////////////////////////////
			P[0] = T_of_x[1][0] * T_of_roots[1][j[0]];
			P[1] = U_of_x[0][1] * T_of_roots[1][j[1]];
			P[2] = T_of_x[1][2] * T_of_roots[1][j[2]];
			for (unsigned int o=2; o<ORDER; ++o) {
				P[0] += T_of_x[o  ][0] * T_of_roots[o][j[0]];
				P[1] += U_of_x[o-1][1] * T_of_roots[o][j[1]];
				P[2] += T_of_x[o  ][2] * T_of_roots[o][j[2]];
			}
			P[0] *= FReal(2.); P[0] += FReal(1.);
			P[1] *= FReal(2.); 
			P[2] *= FReal(2.); P[2] += FReal(1.);
572
			forces[1]	+= P[0] * P[1] * P[2] * localExpansion[n];
573 574 575 576 577 578 579 580 581 582 583 584 585

			// f2 component //////////////////////////////////////
			P[0] = T_of_x[1][0] * T_of_roots[1][j[0]];
			P[1] = T_of_x[1][1] * T_of_roots[1][j[1]];
			P[2] = U_of_x[0][2] * T_of_roots[1][j[2]];
			for (unsigned int o=2; o<ORDER; ++o) {
				P[0] += T_of_x[o  ][0] * T_of_roots[o][j[0]];
				P[1] += T_of_x[o  ][1] * T_of_roots[o][j[1]];
				P[2] += U_of_x[o-1][2] * T_of_roots[o][j[2]];
			}
			P[0] *= FReal(2.); P[0] += FReal(1.);
			P[1] *= FReal(2.); P[1] += FReal(1.);
			P[2] *= FReal(2.);
586
			forces[2]	+= P[0] * P[1] * P[2] * localExpansion[n];
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
		}

		// scale forces
		forces[0] *= jacobian[0] / nnodes;
		forces[1] *= jacobian[1] / nnodes;
		forces[2] *= jacobian[2] / nnodes;

		// set computed forces
		iter.data().incForces(forces[0] * iter.data().getPhysicalValue(),
													forces[1] * iter.data().getPhysicalValue(),
													forces[2] * iter.data().getPhysicalValue());

		// increment iterator
		iter.gotoNext();
	}
}


605 606 607 608 609 610 611

/**
 * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ and
 * \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
 */
template <int ORDER>
template <class ContainerClass>
COULAUD Olivier's avatar
COULAUD Olivier committed
612
inline void FChebInterpolator<ORDER>::applyL2PTotal(const FPoint& center,
613 614 615 616 617 618
																										const FReal width,
																										const FReal *const localExpansion,
																										ContainerClass *const localParticles) const
{
	// setup local to global mapping
	const map_glob_loc map(center, width);
COULAUD Olivier's avatar
COULAUD Olivier committed
619
	FPoint Jacobian;
620
	map.computeJacobian(Jacobian); // 6 flops
621
	const FReal jacobian[3] = {Jacobian.getX(), Jacobian.getY(), Jacobian.getZ()}; 
COULAUD Olivier's avatar
COULAUD Olivier committed
622
	FPoint localPosition;
623 624
	FReal T_of_x[ORDER][3];
	FReal U_of_x[ORDER][3];
625
	FReal P[6];
626 627
	//
	FReal xpx,ypy,zpz ;
628
	FReal c1 = FReal(8.0) / nnodes; // 1 flop
629
	//
630 631 632 633
	typename ContainerClass::BasicIterator iter(*localParticles);
	while(iter.hasNotFinished()){
			
		// map global position to [-1,1]
634
		map(iter.data().getPosition(), localPosition); // 15 flops
635 636 637
			
		// evaluate chebyshev polynomials of source particle
		// T_0(x_i) and T_1(x_i)
638 639 640
		xpx = FReal(2.) * localPosition.getX(); // 1 flop
		ypy = FReal(2.) * localPosition.getY(); // 1 flop
		zpz = FReal(2.) * localPosition.getZ(); // 1 flop
641
		//
642 643 644 645
		T_of_x[0][0] = FReal(1.);	T_of_x[1][0] = localPosition.getX();
		T_of_x[0][1] = FReal(1.);	T_of_x[1][1] = localPosition.getY();
		T_of_x[0][2] = FReal(1.);	T_of_x[1][2] = localPosition.getZ();
		// U_0(x_i) and U_1(x_i)
646 647 648 649 650 651
		// U_of_x[0][0] = FReal(1.);	U_of_x[1][0] = localPosition.getX() * FReal(2.);
		// U_of_x[0][1] = FReal(1.);	U_of_x[1][1] = localPosition.getY() * FReal(2.);
		// U_of_x[0][2] = FReal(1.);	U_of_x[1][2] = localPosition.getZ() * FReal(2.);
		U_of_x[0][0] = FReal(1.);	U_of_x[1][0] = xpx;
		U_of_x[0][1] = FReal(1.);	U_of_x[1][1] = ypy;
		U_of_x[0][2] = FReal(1.);	U_of_x[1][2] = zpz;
652 653
		for (unsigned int o=2; o<ORDER; ++o) {
			// T_o(x_i)
654 655 656 657 658 659 660
			// T_of_x[o][0] = FReal(2.)*localPosition.getX()*T_of_x[o-1][0] - T_of_x[o-2][0];
			// T_of_x[o][1] = FReal(2.)*localPosition.getY()*T_of_x[o-1][1] - T_of_x[o-2][1];
			// T_of_x[o][2] = FReal(2.)*localPosition.getZ()*T_of_x[o-1][2] - T_of_x[o-2][2];
			// // U_o(x_i)
			// U_of_x[o][0] = FReal(2.)*localPosition.getX()*U_of_x[o-1][0] - U_of_x[o-2][0];
			// U_of_x[o][1] = FReal(2.)*localPosition.getY()*U_of_x[o-1][1] - U_of_x[o-2][1];
			// U_of_x[o][2] = FReal(2.)*localPosition.getZ()*U_of_x[o-1][2] - U_of_x[o-2][2];
661 662 663
			T_of_x[o][0] = xpx * T_of_x[o-1][0] - T_of_x[o-2][0]; // 2 flops 
			T_of_x[o][1] = ypy * T_of_x[o-1][1] - T_of_x[o-2][1]; // 2 flops
			T_of_x[o][2] = zpz * T_of_x[o-1][2] - T_of_x[o-2][2]; // 2 flops
664
			// U_o(x_i)
665 666 667
			U_of_x[o][0] = xpx * U_of_x[o-1][0] - U_of_x[o-2][0]; // 2 flops
			U_of_x[o][1] = ypy * U_of_x[o-1][1] - U_of_x[o-2][1]; // 2 flops
			U_of_x[o][2] = zpz * U_of_x[o-1][2] - U_of_x[o-2][2]; // 2 flops
668 669 670 671
		}

		// scale, because dT_o/dx = oU_{o-1}
		for (unsigned int o=2; o<ORDER; ++o) {
672 673 674
			U_of_x[o-1][0] *= FReal(o); // 1 flops
			U_of_x[o-1][1] *= FReal(o); // 1 flops
			U_of_x[o-1][2] *= FReal(o); // 1 flops
675 676 677 678 679
		}

		// apply P and increment forces
		FReal potential = FReal(0.);
		FReal forces[3] = {FReal(0.), FReal(0.), FReal(0.)};
680 681 682
		//
		// Optimization:
		//   Here we compute 1/2 S and 1/2 P  rather S and F like in the paper
683
		for (unsigned int n=0; n<nnodes; ++n) {
684 685 686
		  
		  // tensor indices of chebyshev nodes
		  const unsigned int j[3] = {node_ids[n][0], node_ids[n][1], node_ids[n][2]};
COULAUD Olivier's avatar
COULAUD Olivier committed
687
		  //
688 689 690 691 692 693
		  P[0] = FReal(0.5) + T_of_x[1][0] * T_of_roots[1][j[0]]; // 2 flops 
		  P[1] = FReal(0.5) + T_of_x[1][1] * T_of_roots[1][j[1]]; // 2 flops
		  P[2] = FReal(0.5) + T_of_x[1][2] * T_of_roots[1][j[2]]; // 2 flops
		  P[3] = U_of_x[0][0] * T_of_roots[1][j[0]]; // 1 flop
		  P[4] = U_of_x[0][1] * T_of_roots[1][j[1]]; // 1 flop
		  P[5] = U_of_x[0][2] * T_of_roots[1][j[2]]; // 1 flop
694
		  for (unsigned int o=2; o<ORDER; ++o) {
695 696 697 698 699 700
		    P[0] += T_of_x[o  ][0] * T_of_roots[o][j[0]]; // 2 flop
		    P[1] += T_of_x[o  ][1] * T_of_roots[o][j[1]]; // 2 flop
		    P[2] += T_of_x[o  ][2] * T_of_roots[o][j[2]]; // 2 flop
		    P[3] += U_of_x[o-1][0] * T_of_roots[o][j[0]]; // 2 flop
		    P[4] += U_of_x[o-1][1] * T_of_roots[o][j[1]]; // 2 flop
		    P[5] += U_of_x[o-1][2] * T_of_roots[o][j[2]]; // 2 flop
701 702
		  }
		  //
703 704 705 706
		  potential	+= P[0] * P[1] * P[2] * localExpansion[n]; // 4 flops
		  forces[0]	+= P[3] * P[1] * P[2] * localExpansion[n]; // 4 flops
		  forces[1]	+= P[0] * P[4] * P[2] * localExpansion[n]; // 4 flops
		  forces[2]	+= P[0] * P[1] * P[5] * localExpansion[n]; // 4 flops
707
		}
708
		//
709 710 711 712
		potential *= c1 ; // 1 flop
		forces[0] *= jacobian[0] *c1; // 2 flops 
		forces[1] *= jacobian[1] *c1; // 2 flops
		forces[2] *= jacobian[2] *c1; // 2 flops
713
		// set computed potential
714 715
		iter.data().incPotential(potential); // 1 flop
		
716 717
		// set computed forces
		iter.data().incForces(forces[0] * iter.data().getPhysicalValue(),
718 719
													forces[1] * iter.data().getPhysicalValue(),
													forces[2] * iter.data().getPhysicalValue()); // 6 flops
720 721 722 723 724 725 726

		// increment iterator
		iter.gotoNext();
	}
}


727
#endif