diff --git a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
index 05debe8fbd1df59a8c296eb1444c4481e7aa3260..f3943bdf7e251fe9f0e887c83bfbe8357e72ff3c 100644
--- a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
+++ b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
@@ -114,38 +114,80 @@ public:
 	{
 		// loop: target particles
 		typename ContainerClass::BasicIterator iTargets(*TargetParticles);
-		while (iTargets.hasNotFinished()) {
-			ParticleClass& Target = iTargets.data();
-
-			{ // loop: source particles (target leaf cell == source leaf cell)
-				typename ContainerClass::ConstBasicIterator iSources(*SourceParticles);
-				while (iSources.hasNotFinished()) {
-					const ParticleClass& Source = iSources.data();
-					// only if target and source are not identical
-					if (&Target != &Source)	this->directInteraction(Target, Source);
-					// progress sources
-					iSources.gotoNext();
+
+		if (TargetParticles != SourceParticles) {
+
+			while (iTargets.hasNotFinished()) {
+				ParticleClass& Target = iTargets.data();
+				
+				{ // loop: source particles (target leaf cell == source leaf cell)
+					typename ContainerClass::ConstBasicIterator iSources(*SourceParticles);
+					while (iSources.hasNotFinished()) {
+						const ParticleClass& Source = iSources.data();
+						// only if target and source are not identical
+						this->directInteraction(Target, Source);
+						// progress sources
+						iSources.gotoNext();
+					}
+				}
+				
+				{ // loop: source particles (target leaf cell != source leaf cell)
+					for (unsigned int idx=0; idx<27; ++idx) {
+						if (NeighborSourceParticles[idx]) {
+							typename ContainerClass::ConstBasicIterator	iSources(*NeighborSourceParticles[idx]);
+							while (iSources.hasNotFinished()) {
+								const ParticleClass& Source = iSources.data();
+								// target and source cannot be identical
+								this->directInteraction(Target, Source);
+								// progress sources
+								iSources.gotoNext();
+							}
+						}
+					}
 				}
+		
+				// progress targets
+				iTargets.gotoNext();
 			}
-			
-			{ // loop: source particles (target leaf cell != source leaf cell)
-				for (unsigned int idx=0; idx<27; ++idx) {
-					if (NeighborSourceParticles[idx]) {
-						typename ContainerClass::ConstBasicIterator	iSources(*NeighborSourceParticles[idx]);
-						while (iSources.hasNotFinished()) {
-							const ParticleClass& Source = iSources.data();
-							// target and source cannot be identical
-							this->directInteraction(Target, Source);
-							// progress sources
-							iSources.gotoNext();
+
+		} else {
+
+			while (iTargets.hasNotFinished()) {
+				ParticleClass& Target = iTargets.data();
+				
+				{ // loop: source particles  (target leaf cell == source leaf cell)
+					typename ContainerClass::BasicIterator iSources = iTargets;
+					iSources.gotoNext();
+					while (iSources.hasNotFinished()) {
+						ParticleClass& Source = iSources.data();
+						// only if target and source are not identical
+						this->directInteractionMutual(Target, Source);
+						// progress sources
+						iSources.gotoNext();
+					}
+				}
+				
+				{ // loop: source particles (target leaf cell != source leaf cell)
+					for (unsigned int idx=0; idx<=13; ++idx) {
+						if (NeighborSourceParticles[idx]) {
+							typename ContainerClass::BasicIterator iSources(*NeighborSourceParticles[idx]);
+							while (iSources.hasNotFinished()) {
+								ParticleClass& Source = iSources.data();
+								// target and source cannot be identical
+								this->directInteractionMutual(Target, Source);
+								// progress sources
+								iSources.gotoNext();
+							}
 						}
 					}
 				}
+		
+				// progress targets
+				iTargets.gotoNext();
 			}
-			
-			// progress targets
-			iTargets.gotoNext();
+
 		}
+
 	}
 
 
@@ -164,6 +206,21 @@ private:
 		Target.incForces(force.getX(), force.getY(), force.getZ());
 	}
 
+	void directInteractionMutual(ParticleClass& Target, ParticleClass& Source) const
+	{
+		const FReal one_over_r = MatrixKernel->evaluate(Target.getPosition(), Source.getPosition());
+		const FReal wt = Target.getPhysicalValue();
+		const FReal ws = Source.getPhysicalValue();
+		// potential
+		Target.incPotential(one_over_r * ws);
+		Source.incPotential(one_over_r * wt);
+		// force
+		F3DPosition force(Source.getPosition() - Target.getPosition());
+		force *= ((ws*wt) * (one_over_r*one_over_r*one_over_r));
+		Target.incForces(  force.getX(),    force.getY(),    force.getZ());
+		Source.incForces((-force.getX()), (-force.getY()), (-force.getZ()));
+	}
+
 };
 
 
diff --git a/Src/Kernels/Chebyshev/FChebInterpolator.hpp b/Src/Kernels/Chebyshev/FChebInterpolator.hpp
index a2e48175da75d92c0099ca5b113df9e7a76e50ad..e9670c22d3ca66a75fab87820ea109fa06edc2d8 100644
--- a/Src/Kernels/Chebyshev/FChebInterpolator.hpp
+++ b/Src/Kernels/Chebyshev/FChebInterpolator.hpp
@@ -169,6 +169,16 @@ public:
 												const FReal *const localExpansion,
 												ContainerClass *const localParticles) const;
 
+	/**
+	 * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ and
+	 * \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
+	 */
+	template <class ContainerClass>
+	void applyL2PTotal(const F3DPosition& center,
+										 const FReal width,
+										 const FReal *const localExpansion,
+										 ContainerClass *const localParticles) const;
+	
 	
 	void applyM2M(const unsigned int ChildIndex,
 								const FReal *const ChildExpansion,
@@ -305,8 +315,11 @@ inline void FChebInterpolator<ORDER>::applyL2P(const F3DPosition& center,
 			S[0] *= FReal(2.); S[0] += FReal(1.);
 			S[1] *= FReal(2.); S[1] += FReal(1.);
 			S[2] *= FReal(2.); S[2] += FReal(1.);
-			targetValue	+= FReal(1.) / nnodes *	S[0] * S[1] * S[2] * localExpansion[n];
+			targetValue	+= S[0] * S[1] * S[2] * localExpansion[n];
 		}
+		// scale
+		targetValue /= nnodes;
+
 		// set potential
 		iter.data().setPotential(targetValue);
 		// increment target iterator
@@ -438,4 +451,136 @@ inline void FChebInterpolator<ORDER>::applyL2PGradient(const F3DPosition& center
 }
 
 
+
+/**
+ * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ and
+ * \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
+ */
+template <int ORDER>
+template <class ContainerClass>
+inline void FChebInterpolator<ORDER>::applyL2PTotal(const F3DPosition& center,
+																										const FReal width,
+																										const FReal *const localExpansion,
+																										ContainerClass *const localParticles) const
+{
+	// setup local to global mapping
+	const map_glob_loc map(center, width);
+	F3DPosition Jacobian;
+	map.computeJacobian(Jacobian);
+	const FReal jacobian[3] = {Jacobian.getX(), Jacobian.getY(), Jacobian.getZ()}; 
+	F3DPosition localPosition;
+	FReal T_of_x[ORDER][3];
+	FReal U_of_x[ORDER][3];
+	FReal P[3];
+
+	typename ContainerClass::BasicIterator iter(*localParticles);
+	while(iter.hasNotFinished()){
+			
+		// map global position to [-1,1]
+		map(iter.data().getPosition(), localPosition);
+			
+		// evaluate chebyshev polynomials of source particle
+		// T_0(x_i) and T_1(x_i)
+		T_of_x[0][0] = FReal(1.);	T_of_x[1][0] = localPosition.getX();
+		T_of_x[0][1] = FReal(1.);	T_of_x[1][1] = localPosition.getY();
+		T_of_x[0][2] = FReal(1.);	T_of_x[1][2] = localPosition.getZ();
+		// U_0(x_i) and U_1(x_i)
+		U_of_x[0][0] = FReal(1.);	U_of_x[1][0] = localPosition.getX() * FReal(2.);
+		U_of_x[0][1] = FReal(1.);	U_of_x[1][1] = localPosition.getY() * FReal(2.);
+		U_of_x[0][2] = FReal(1.);	U_of_x[1][2] = localPosition.getZ() * FReal(2.);
+		for (unsigned int o=2; o<ORDER; ++o) {
+			// T_o(x_i)
+			T_of_x[o][0] = FReal(2.)*localPosition.getX()*T_of_x[o-1][0] - T_of_x[o-2][0];
+			T_of_x[o][1] = FReal(2.)*localPosition.getY()*T_of_x[o-1][1] - T_of_x[o-2][1];
+			T_of_x[o][2] = FReal(2.)*localPosition.getZ()*T_of_x[o-1][2] - T_of_x[o-2][2];
+			// U_o(x_i)
+			U_of_x[o][0] = FReal(2.)*localPosition.getX()*U_of_x[o-1][0] - U_of_x[o-2][0];
+			U_of_x[o][1] = FReal(2.)*localPosition.getY()*U_of_x[o-1][1] - U_of_x[o-2][1];
+			U_of_x[o][2] = FReal(2.)*localPosition.getZ()*U_of_x[o-1][2] - U_of_x[o-2][2];
+		}
+
+		// scale, because dT_o/dx = oU_{o-1}
+		for (unsigned int o=2; o<ORDER; ++o) {
+			U_of_x[o-1][0] *= FReal(o);
+			U_of_x[o-1][1] *= FReal(o);
+			U_of_x[o-1][2] *= FReal(o);
+		}
+
+		// apply P and increment forces
+		FReal potential = FReal(0.);
+		FReal forces[3] = {FReal(0.), FReal(0.), FReal(0.)};
+		for (unsigned int n=0; n<nnodes; ++n) {
+			
+			// tensor indices of chebyshev nodes
+			const unsigned int j[3] = {node_ids[n][0], node_ids[n][1], node_ids[n][2]};
+
+			// f0 component //////////////////////////////////////
+			P[0] = U_of_x[0][0] * T_of_roots[1][j[0]];
+			P[1] = T_of_x[1][1] * T_of_roots[1][j[1]];
+			P[2] = T_of_x[1][2] * T_of_roots[1][j[2]];
+			// for potential
+			FReal S0 = T_of_x[1][0] * T_of_roots[1][j[0]];
+			for (unsigned int o=2; o<ORDER; ++o) {
+				P[0] += U_of_x[o-1][0] * T_of_roots[o][j[0]];
+				P[1] += T_of_x[o  ][1] * T_of_roots[o][j[1]];
+				P[2] += T_of_x[o  ][2] * T_of_roots[o][j[2]];
+				// for potential
+				S0   += T_of_x[o][0] * T_of_roots[o][j[0]];
+			}
+			P[0] *= FReal(2.);
+			P[1] *= FReal(2.); P[1] += FReal(1.);
+			P[2] *= FReal(2.); P[2] += FReal(1.);
+			forces[0]	+= P[0] * P[1] * P[2] * localExpansion[n];
+			// for potential
+			S0 *= FReal(2.); S0 += FReal(1.);
+			potential	+= FReal(1.) / nnodes *	S0 * P[1] * P[2] * localExpansion[n];
+
+			// f1 component //////////////////////////////////////
+			P[0] = T_of_x[1][0] * T_of_roots[1][j[0]];
+			P[1] = U_of_x[0][1] * T_of_roots[1][j[1]];
+			P[2] = T_of_x[1][2] * T_of_roots[1][j[2]];
+			for (unsigned int o=2; o<ORDER; ++o) {
+				P[0] += T_of_x[o  ][0] * T_of_roots[o][j[0]];
+				P[1] += U_of_x[o-1][1] * T_of_roots[o][j[1]];
+				P[2] += T_of_x[o  ][2] * T_of_roots[o][j[2]];
+			}
+			P[0] *= FReal(2.); P[0] += FReal(1.);
+			P[1] *= FReal(2.); 
+			P[2] *= FReal(2.); P[2] += FReal(1.);
+			forces[1]	+= P[0] * P[1] * P[2] * localExpansion[n];
+
+			// f2 component //////////////////////////////////////
+			P[0] = T_of_x[1][0] * T_of_roots[1][j[0]];
+			P[1] = T_of_x[1][1] * T_of_roots[1][j[1]];
+			P[2] = U_of_x[0][2] * T_of_roots[1][j[2]];
+			for (unsigned int o=2; o<ORDER; ++o) {
+				P[0] += T_of_x[o  ][0] * T_of_roots[o][j[0]];
+				P[1] += T_of_x[o  ][1] * T_of_roots[o][j[1]];
+				P[2] += U_of_x[o-1][2] * T_of_roots[o][j[2]];
+			}
+			P[0] *= FReal(2.); P[0] += FReal(1.);
+			P[1] *= FReal(2.); P[1] += FReal(1.);
+			P[2] *= FReal(2.);
+			forces[2]	+= P[0] * P[1] * P[2] * localExpansion[n];
+		}
+
+		// scale forces
+		forces[0] *= jacobian[0] / nnodes;
+		forces[1] *= jacobian[1] / nnodes;
+		forces[2] *= jacobian[2] / nnodes;
+
+		// set computed potential
+		iter.data().incPotential(potential);
+
+		// set computed forces
+		iter.data().incForces(forces[0] * iter.data().getPhysicalValue(),
+													forces[1] * iter.data().getPhysicalValue(),
+													forces[2] * iter.data().getPhysicalValue());
+
+		// increment iterator
+		iter.gotoNext();
+	}
+}
+
+
 #endif
diff --git a/Src/Kernels/Chebyshev/FChebKernel.hpp b/Src/Kernels/Chebyshev/FChebKernel.hpp
index 40a9a656a591f7d77e016d4166e2a1b45ceedd7d..7feceabc75af6ef0e5307308ff34a7f279dc13f5 100644
--- a/Src/Kernels/Chebyshev/FChebKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebKernel.hpp
@@ -165,13 +165,20 @@ public:
 		M2LHandler->applyU(LeafCell->getLocal() + AbstractBaseClass::nnodes,
 											 const_cast<CellClass*>(LeafCell)->getLocal());
 		
-		// 2.a) apply Sx
 		const F3DPosition LeafCellCenter(getLeafCellCenter(LeafCell->getCoordinate()));
-		AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
-																							AbstractBaseClass::BoxWidthLeaf,
-																							LeafCell->getLocal(),
-																							TargetParticles);
-		// 2.b) apply Px (grad Sx)
+
+		//// 2.a) apply Sx
+		//AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
+		//																					AbstractBaseClass::BoxWidthLeaf,
+		//																					LeafCell->getLocal(),
+		//																					TargetParticles);
+		//// 2.b) apply Px (grad Sx)
+		//AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter,
+		//																									AbstractBaseClass::BoxWidthLeaf,
+		//																									LeafCell->getLocal(),
+		//																									TargetParticles);
+
+		// 2.c) apply Sx and Px (grad Sx)
 		AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter,
 																											AbstractBaseClass::BoxWidthLeaf,
 																											LeafCell->getLocal(),
diff --git a/Src/Kernels/Chebyshev/FChebSymKernel.hpp b/Src/Kernels/Chebyshev/FChebSymKernel.hpp
index d2af4e0c0e32c7651cad651a88e80984eaf295b3..59a5a05e07cfbea90c935a8738fe046767c85f4f 100644
--- a/Src/Kernels/Chebyshev/FChebSymKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebSymKernel.hpp
@@ -313,17 +313,24 @@ public:
 	void L2P(const CellClass* const LeafCell,
 					 ContainerClass* const TargetParticles)
 	{
-		// a) apply Sx
 		const F3DPosition LeafCellCenter(getLeafCellCenter(LeafCell->getCoordinate()));
-		AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
-																							AbstractBaseClass::BoxWidthLeaf,
-																							LeafCell->getLocal(),
-																							TargetParticles);
-		// b) apply Px (grad Sx)
-		AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter,
-																											AbstractBaseClass::BoxWidthLeaf,
-																											LeafCell->getLocal(),
-																											TargetParticles);
+
+//		// a) apply Sx
+//		AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
+//																							AbstractBaseClass::BoxWidthLeaf,
+//																							LeafCell->getLocal(),
+//																							TargetParticles);
+//		// b) apply Px (grad Sx)
+//		AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter,
+//																											AbstractBaseClass::BoxWidthLeaf,
+//																											LeafCell->getLocal(),
+//																											TargetParticles);
+
+		// c) apply Sx and Px (grad Sx)
+		AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter,
+																									 AbstractBaseClass::BoxWidthLeaf,
+																									 LeafCell->getLocal(),
+																									 TargetParticles);
 	}
 
 
diff --git a/Tests/Kernels/testCompareKernels.cpp b/Tests/Kernels/testCompareKernels.cpp
index a6e5f90546612fc97d32818ce192f05202b2cd94..f71229e9f7fe9828035a04366c4d4b0ee5aec92e 100644
--- a/Tests/Kernels/testCompareKernels.cpp
+++ b/Tests/Kernels/testCompareKernels.cpp
@@ -190,8 +190,8 @@ int main(int argc, char* argv[])
     {	// begin Chebyshef kernel
 
         // accuracy
-        const unsigned int ORDER = 5;
-        const FReal epsilon = FReal(1e-5);
+        const unsigned int ORDER = 7;
+        const FReal epsilon = FReal(1e-7);
 
         FReal* p1;  p1  = NULL;
         FReal* f1;  p1  = NULL;
diff --git a/Tests/Utils/testChebSxUCBSy.cpp b/Tests/Utils/testChebSxUCBSy.cpp
index a7235fe01eeee428d4fb499f9d04e2b8750ec27d..38a6ca4e47d08fbe4db1cb30c2b00dc9efe9f6e0 100644
--- a/Tests/Utils/testChebSxUCBSy.cpp
+++ b/Tests/Utils/testChebSxUCBSy.cpp
@@ -26,6 +26,7 @@
 
 #include "../../Src/Utils/FTic.hpp"
 #include "../../Src/Utils/FMath.hpp"
+#include "../../Src/Utils/FParameters.hpp"
 
 #include "../../Src/Containers/FVector.hpp"
 
@@ -136,7 +137,7 @@ int main(int argc, char* argv[])
 	////////////////////////////////////////////////////////////////////
 	// approximative computation
 	const unsigned int ORDER = 5;
-	const FReal epsilon = FReal(atof(argv[1]));
+	const FReal epsilon = FParameters::getValue(argc, argv, "-eps", 1e-5);
 	const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
 	typedef FChebInterpolator<ORDER> InterpolatorClass;
 	InterpolatorClass S;
@@ -181,7 +182,7 @@ int main(int argc, char* argv[])
 	*/
 
 	// Interpolate f_i = \sum_m^L S(x_i,\bar x_m) * F_m
-	S.applyL2P(cx, width, F, X.getTargets());
+	S.applyL2PTotal(cx, width, F, X.getTargets());
 
 	time.tac();
 	std::cout << "Done in " << time.elapsed() << "sec." << std::endl;