Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
ScalFMM
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
5
Issues
5
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
solverstack
ScalFMM
Commits
0f163780
Commit
0f163780
authored
May 23, 2012
by
Matthias Messner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
merged individual files from starpu_dev to master branch
parent
95b3fe18
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
937 additions
and
369 deletions
+937
-369
Src/Containers/FTreeCoordinate.hpp
Src/Containers/FTreeCoordinate.hpp
+6
-0
Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
+7
-5
Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp
Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp
+70
-24
Src/Kernels/Chebyshev/FChebInterpolator.hpp
Src/Kernels/Chebyshev/FChebInterpolator.hpp
+32
-25
Src/Kernels/Chebyshev/FChebSymKernel.hpp
Src/Kernels/Chebyshev/FChebSymKernel.hpp
+15
-143
Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp
Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp
+270
-0
Src/Kernels/Chebyshev/FChebSymmetries.hpp
Src/Kernels/Chebyshev/FChebSymmetries.hpp
+4
-33
Src/Kernels/Spherical/FAbstractSphericalKernel.hpp
Src/Kernels/Spherical/FAbstractSphericalKernel.hpp
+11
-35
Src/Kernels/Spherical/FHarmonic.hpp
Src/Kernels/Spherical/FHarmonic.hpp
+15
-41
Src/ScalFmmConfig.h.cmake
Src/ScalFmmConfig.h.cmake
+12
-0
Src/Utils/FBlas.hpp
Src/Utils/FBlas.hpp
+4
-4
Src/Utils/FComplexe.hpp
Src/Utils/FComplexe.hpp
+0
-13
Src/Utils/FMath.hpp
Src/Utils/FMath.hpp
+17
-1
Src/Utils/FPoint.hpp
Src/Utils/FPoint.hpp
+1
-17
Src/Utils/FSpherical.hpp
Src/Utils/FSpherical.hpp
+0
-24
Tests/Kernels/testCountM2LChebFlop.cpp
Tests/Kernels/testCountM2LChebFlop.cpp
+373
-0
Tests/Kernels/testFlopsChebAlgorithm.cpp
Tests/Kernels/testFlopsChebAlgorithm.cpp
+6
-4
Tests/Utils/testChebBinarySymM2L.cpp
Tests/Utils/testChebBinarySymM2L.cpp
+94
-0
No files found.
Src/Containers/FTreeCoordinate.hpp
View file @
0f163780
...
...
@@ -48,6 +48,12 @@ public:
data
[
2
]
=
inZ
;
}
explicit
FTreeCoordinate
(
const
int
inPosition
[
3
])
{
data
[
0
]
=
inPosition
[
0
];
data
[
1
]
=
inPosition
[
1
];
data
[
2
]
=
inPosition
[
2
];
}
/**
* Copy constructor
* @param other the source class to copy
...
...
Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
View file @
0f163780
...
...
@@ -60,7 +60,6 @@ protected:
BoxCorner
.
getZ
()
+
(
FReal
(
Coordinate
.
getZ
())
+
FReal
(
.5
))
*
BoxWidthLeaf
);
}
public:
/**
* The constructor initializes all constant attributes and it reads the
...
...
@@ -80,6 +79,9 @@ public:
/* empty */
}
const
InterpolatorClass
*
const
getPtrToInterpolator
()
const
{
return
Interpolator
.
getPtr
();
}
virtual
void
P2M
(
CellClass
*
const
LeafCell
,
const
ContainerClass
*
const
SourceParticles
)
=
0
;
...
...
@@ -193,12 +195,12 @@ public:
private:
void
directInteraction
(
ParticleClass
&
Target
,
const
ParticleClass
&
Source
)
const
//
21
overall flops
void
directInteraction
(
ParticleClass
&
Target
,
const
ParticleClass
&
Source
)
const
//
34
overall flops
{
FPoint
xy
(
Source
.
getPosition
()
-
Target
.
getPosition
());
// 3 flops
const
FReal
one_over_r
=
FReal
(
1.
)
/
FMath
::
Sqrt
(
xy
.
getX
()
*
xy
.
getX
()
+
xy
.
getY
()
*
xy
.
getY
()
+
xy
.
getZ
()
*
xy
.
getZ
());
// 1 + 1
+ 1 + 5 = 8
flops
xy
.
getZ
()
*
xy
.
getZ
());
// 1 + 1
5 + 5 = 21
flops
const
FReal
wt
=
Target
.
getPhysicalValue
();
const
FReal
ws
=
Source
.
getPhysicalValue
();
// potential
...
...
@@ -208,12 +210,12 @@ private:
Target
.
incForces
(
xy
.
getX
(),
xy
.
getY
(),
xy
.
getZ
());
// 3 flops
}
void
directInteractionMutual
(
ParticleClass
&
Target
,
ParticleClass
&
Source
)
const
//
26
overall flops
void
directInteractionMutual
(
ParticleClass
&
Target
,
ParticleClass
&
Source
)
const
//
39
overall flops
{
FPoint
xy
(
Source
.
getPosition
()
-
Target
.
getPosition
());
// 3 flops
const
FReal
one_over_r
=
FReal
(
1.
)
/
FMath
::
Sqrt
(
xy
.
getX
()
*
xy
.
getX
()
+
xy
.
getY
()
*
xy
.
getY
()
+
xy
.
getZ
()
*
xy
.
getZ
());
// 1 + 1
+ 1 + 5 = 8
flops
xy
.
getZ
()
*
xy
.
getZ
());
// 1 + 1
5 + 5 = 21
flops
const
FReal
wt
=
Target
.
getPhysicalValue
();
const
FReal
ws
=
Source
.
getPhysicalValue
();
// potential
...
...
Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp
View file @
0f163780
...
...
@@ -37,7 +37,7 @@ class FChebFlopsSymKernel
:
public
FAbstractKernels
<
ParticleClass
,
CellClass
,
ContainerClass
>
{
enum
{
nnodes
=
TensorTraits
<
ORDER
>::
nnodes
};
public:
/// Handler to deal with all symmetries: Stores permutation indices and
/// vectors to reduce 343 different interactions to 16 only.
struct
SymmetryHandler
;
...
...
@@ -46,31 +46,41 @@ class FChebFlopsSymKernel
const
FSmartPointer
<
MatrixKernelClass
,
FSmartPointerMemory
>
MatrixKernel
;
const
FSmartPointer
<
SymmetryHandler
,
FSmartPointerMemory
>
SymHandler
;
/// tree height
const
unsigned
int
inTreeHeight
;
// count permuted local and multipole expansions
unsigned
int
*
countExp
;
unsigned
long
long
flopsP2M
,
flopsM2M
,
flopsM2L
,
flopsL2L
,
flopsL2P
,
flopsP2P
;
unsigned
long
long
*
flopsPerLevelM2M
,
*
flopsPerLevelM2L
,
*
flopsPerLevelL2L
;
// start flop counters
unsigned
int
countFlopsP2MorL2P
(
const
unsigned
int
N
)
const
{
return
1
+
N
*
(
nnodes
*
(
6
*
ORDER
-
2
)
+
6
*
ORDER
+
6
);
}
//{ return N*nnodes*(15*ORDER-19) + ORDER*(3*ORDER-6) + 2; }
//{ return N * (nnodes*(24*ORDER-26) - 1); }
unsigned
int
countFlopsM2MorL2L
()
const
{
return
nnodes
*
(
nnodes
*
2
-
1
);
}
//unsigned int countFlopsL2PGradient(const unsigned int N) const
//{ return 3 * N * (nnodes*(17*ORDER-21) - 1); }
unsigned
int
countFlopsL2PTotal
(
const
unsigned
int
N
)
const
{
return
7
+
N
*
(
nnodes
*
(
12
*
ORDER
-
1
)
+
15
*
ORDER
+
2
);
}
//{ return N * (nnodes*2 -1 ) + countFlopsL2PGradient(N); }
{
return
3
*
nnodes
*
(
2
*
ORDER
-
1
);
}
unsigned
int
countFlopsM2L
(
const
unsigned
int
nexp
,
const
unsigned
int
rank
)
const
{
return
nexp
*
(
4
*
nnodes
*
rank
-
rank
-
nnodes
);
}
unsigned
int
countFlopsP2P
()
const
{
return
21
;
}
{
return
34
;
}
unsigned
int
countFlopsP2Pmutual
()
const
{
return
26
;
}
{
return
39
;
}
unsigned
int
countFlopsP2M
(
const
unsigned
int
N
)
const
{
const
unsigned
first
=
N
*
(
18
+
(
ORDER
-
2
)
*
6
+
(
ORDER
-
1
)
*
(
6
+
(
ORDER
-
1
)
*
(
6
+
(
ORDER
-
1
)
*
2
)));
const
unsigned
W2
=
3
*
ORDER
*
(
2
*
(
ORDER
-
1
)
-
1
);
const
unsigned
W4
=
3
*
(
ORDER
*
(
ORDER
-
1
)
*
(
2
*
(
ORDER
-
1
)
-
1
)
+
ORDER
*
ORDER
*
(
2
*
(
ORDER
-
1
)
-
1
));
const
unsigned
W8
=
3
*
(
2
*
(
ORDER
-
1
)
-
1
)
*
(
ORDER
*
(
ORDER
-
1
)
*
(
ORDER
-
1
)
+
ORDER
*
ORDER
*
(
ORDER
-
1
)
+
nnodes
);
return
first
+
W2
+
W4
+
W8
+
nnodes
*
11
;
}
unsigned
int
countFlopsL2PTotal
(
const
unsigned
int
N
)
const
{
const
unsigned
W0
=
nnodes
;
const
unsigned
W2
=
3
*
(
ORDER
-
1
)
*
ORDER
*
ORDER
*
2
*
ORDER
;
const
unsigned
W4
=
3
*
ORDER
*
(
ORDER
-
1
)
*
(
ORDER
-
1
)
*
2
*
ORDER
;
const
unsigned
W8
=
(
ORDER
-
1
)
*
(
ORDER
-
1
)
*
(
ORDER
-
1
)
*
(
2
*
ORDER
-
1
);
const
unsigned
second
=
N
*
(
38
+
(
ORDER
-
2
)
*
15
+
(
ORDER
-
1
)
*
((
ORDER
-
1
)
*
(
27
+
(
ORDER
-
1
)
*
16
)))
+
6
;
return
W0
+
W2
+
W4
+
W8
+
second
;
}
// end flop counters
public:
...
...
@@ -79,14 +89,22 @@ public:
* precomputed and compressed M2L operators from a binary file (an
* runtime_error is thrown if the required file is not valid).
*/
FChebFlopsSymKernel
(
const
int
inTreeHeight
,
FChebFlopsSymKernel
(
const
int
_
inTreeHeight
,
const
FPoint
&
inBoxCenter
,
const
FReal
inBoxWidth
,
const
FReal
Epsilon
)
:
MatrixKernel
(
new
MatrixKernelClass
()),
SymHandler
(
new
SymmetryHandler
(
MatrixKernel
.
getPtr
(),
Epsilon
)),
flopsP2M
(
0
),
flopsM2M
(
0
),
flopsM2L
(
0
),
flopsL2L
(
0
),
flopsL2P
(
0
),
flopsP2P
(
0
)
{
countExp
=
new
unsigned
int
[
343
];
}
SymHandler
(
new
SymmetryHandler
(
MatrixKernel
.
getPtr
(),
Epsilon
)),
inTreeHeight
(
_inTreeHeight
),
flopsP2M
(
0
),
flopsM2M
(
0
),
flopsM2L
(
0
),
flopsL2L
(
0
),
flopsL2P
(
0
),
flopsP2P
(
0
),
flopsPerLevelM2M
(
NULL
),
flopsPerLevelM2L
(
NULL
),
flopsPerLevelL2L
(
NULL
)
{
countExp
=
new
unsigned
int
[
343
];
flopsPerLevelM2M
=
new
unsigned
long
long
[
inTreeHeight
];
flopsPerLevelM2L
=
new
unsigned
long
long
[
inTreeHeight
];
flopsPerLevelL2L
=
new
unsigned
long
long
[
inTreeHeight
];
for
(
unsigned
int
level
=
0
;
level
<
inTreeHeight
;
++
level
)
flopsPerLevelM2M
[
level
]
=
flopsPerLevelM2L
[
level
]
=
flopsPerLevelL2L
[
level
]
=
0
;
}
...
...
@@ -110,26 +128,48 @@ public:
<<
"
\n
- Flops for L2L = "
<<
flopsL2L
<<
"
\n
- Flops for L2P = "
<<
flopsL2P
<<
"
\n
- Flops for P2P = "
<<
flopsP2P
<<
"
\n
- Overall Flops = "
<<
flopsP2M
+
flopsM2M
+
flopsM2L
+
flopsL2L
+
flopsL2P
+
flopsP2P
<<
"
\n
==================================================
\n
"
<<
std
::
endl
;
std
::
cout
<<
"
\n
=================================================="
<<
"
\n
- Flops for M2M"
<<
std
::
endl
;
for
(
unsigned
int
level
=
0
;
level
<
inTreeHeight
;
++
level
)
std
::
cout
<<
" |- at level "
<<
level
<<
" flops = "
<<
flopsPerLevelM2M
[
level
]
<<
std
::
endl
;
std
::
cout
<<
"=================================================="
<<
"
\n
- Flops for M2L"
<<
std
::
endl
;
for
(
unsigned
int
level
=
0
;
level
<
inTreeHeight
;
++
level
)
std
::
cout
<<
" |- at level "
<<
level
<<
" flops = "
<<
flopsPerLevelM2L
[
level
]
<<
std
::
endl
;
std
::
cout
<<
"=================================================="
<<
"
\n
- Flops for L2L"
<<
std
::
endl
;
for
(
unsigned
int
level
=
0
;
level
<
inTreeHeight
;
++
level
)
std
::
cout
<<
" |- at level "
<<
level
<<
" flops = "
<<
flopsPerLevelL2L
[
level
]
<<
std
::
endl
;
std
::
cout
<<
"=================================================="
<<
std
::
endl
;
if
(
flopsPerLevelM2M
)
delete
[]
flopsPerLevelM2M
;
if
(
flopsPerLevelM2L
)
delete
[]
flopsPerLevelM2L
;
if
(
flopsPerLevelL2L
)
delete
[]
flopsPerLevelL2L
;
}
void
P2M
(
CellClass
*
const
/* not needed */
,
const
ContainerClass
*
const
SourceParticles
)
{
flopsP2M
+=
countFlopsP2M
orL2P
(
SourceParticles
->
getSize
());
flopsP2M
+=
countFlopsP2M
(
SourceParticles
->
getSize
());
}
void
M2M
(
CellClass
*
const
FRestrict
/* not needed */
,
const
CellClass
*
const
FRestrict
*
const
FRestrict
ChildCells
,
const
int
/* not needed */
)
const
int
TreeLevel
)
{
unsigned
int
flops
=
0
;
for
(
unsigned
int
ChildIndex
=
0
;
ChildIndex
<
8
;
++
ChildIndex
)
if
(
ChildCells
[
ChildIndex
])
flopsM2M
+=
countFlopsM2MorL2L
();
if
(
ChildCells
[
ChildIndex
])
flops
+=
countFlopsM2MorL2L
();
flopsM2M
+=
flops
;
flopsPerLevelM2M
[
TreeLevel
]
+=
flops
;
}
...
...
@@ -138,24 +178,30 @@ public:
void
M2L
(
CellClass
*
const
FRestrict
/* not needed */
,
const
CellClass
*
SourceCells
[
343
],
const
int
/* not needed */
,
const
int
/* not needed*/
)
const
int
TreeLevel
)
{
unsigned
int
flops
=
0
;
// count how ofter each of the 16 interactions is used
memset
(
countExp
,
0
,
sizeof
(
int
)
*
343
);
for
(
unsigned
int
idx
=
0
;
idx
<
343
;
++
idx
)
if
(
SourceCells
[
idx
])
countExp
[
SymHandler
->
pindices
[
idx
]]
++
;
// multiply (mat-mat-mul)
for
(
unsigned
int
pidx
=
0
;
pidx
<
343
;
++
pidx
)
if
(
countExp
[
pidx
])
flopsM2L
+=
countFlopsM2L
(
countExp
[
pidx
],
SymHandler
->
LowRank
[
pidx
])
+
countExp
[
pidx
]
*
nnodes
;
if
(
countExp
[
pidx
])
flops
+=
countFlopsM2L
(
countExp
[
pidx
],
SymHandler
->
LowRank
[
pidx
])
+
countExp
[
pidx
]
*
nnodes
;
flopsM2L
+=
flops
;
flopsPerLevelM2L
[
TreeLevel
]
+=
flops
;
}
void
L2L
(
const
CellClass
*
const
FRestrict
/* not needed */
,
CellClass
*
FRestrict
*
const
FRestrict
ChildCells
,
const
int
/* not needed */
)
const
int
TreeLevel
)
{
unsigned
int
flops
=
0
;
for
(
unsigned
int
ChildIndex
=
0
;
ChildIndex
<
8
;
++
ChildIndex
)
if
(
ChildCells
[
ChildIndex
])
flopsL2L
+=
countFlopsM2MorL2L
()
+
nnodes
;
if
(
ChildCells
[
ChildIndex
])
flops
+=
countFlopsM2MorL2L
()
+
nnodes
;
flopsL2L
+=
flops
;
flopsPerLevelL2L
[
TreeLevel
]
+=
flops
;
}
...
...
Src/Kernels/Chebyshev/FChebInterpolator.hpp
View file @
0f163780
...
...
@@ -412,8 +412,12 @@ public:
const
FReal
*
const
*
const
getChildParentInterpolator
()
const
{
return
ChildParentInterpolator
;
}
const
unsigned
int
*
const
getPermutationsM2ML2L
(
unsigned
int
i
)
const
{
return
perm
[
i
];
}
...
...
@@ -487,16 +491,19 @@ public:
FReal
*
const
ParentExpansion
)
const
{
FReal
Exp
[
nnodes
],
PermExp
[
nnodes
];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas
::
gemtm
(
ORDER
,
ORDER
,
ORDER
*
ORDER
,
FReal
(
1.
),
ChildParentInterpolator
[
ChildIndex
],
ORDER
,
const_cast
<
FReal
*>
(
ChildExpansion
),
ORDER
,
PermExp
,
ORDER
);
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
Exp
[
n
]
=
PermExp
[
perm
[
1
][
n
]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas
::
gemtm
(
ORDER
,
ORDER
,
ORDER
*
ORDER
,
FReal
(
1.
),
ChildParentInterpolator
[
ChildIndex
]
+
2
*
ORDER
*
ORDER
,
ORDER
,
Exp
,
ORDER
,
PermExp
,
ORDER
);
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
Exp
[
perm
[
1
][
n
]]
=
PermExp
[
perm
[
2
][
n
]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas
::
gemtm
(
ORDER
,
ORDER
,
ORDER
*
ORDER
,
FReal
(
1.
),
ChildParentInterpolator
[
ChildIndex
]
+
1
*
ORDER
*
ORDER
,
ORDER
,
Exp
,
ORDER
,
PermExp
,
ORDER
);
...
...
@@ -510,24 +517,26 @@ public:
FReal
*
const
ChildExpansion
)
const
{
FReal
Exp
[
nnodes
],
PermExp
[
nnodes
];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas
::
gemm
(
ORDER
,
ORDER
,
ORDER
*
ORDER
,
FReal
(
1.
),
ChildParentInterpolator
[
ChildIndex
],
ORDER
,
const_cast
<
FReal
*>
(
ParentExpansion
),
ORDER
,
PermExp
,
ORDER
);
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
Exp
[
n
]
=
PermExp
[
perm
[
1
][
n
]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas
::
gemm
(
ORDER
,
ORDER
,
ORDER
*
ORDER
,
FReal
(
1.
),
ChildParentInterpolator
[
ChildIndex
]
+
2
*
ORDER
*
ORDER
,
ORDER
,
Exp
,
ORDER
,
PermExp
,
ORDER
);
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
Exp
[
perm
[
1
][
n
]]
=
PermExp
[
perm
[
2
][
n
]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas
::
gemm
(
ORDER
,
ORDER
,
ORDER
*
ORDER
,
FReal
(
1.
),
ChildParentInterpolator
[
ChildIndex
]
+
1
*
ORDER
*
ORDER
,
ORDER
,
Exp
,
ORDER
,
PermExp
,
ORDER
);
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
ChildExpansion
[
perm
[
2
][
n
]]
+=
PermExp
[
n
];
}
// total flops count: 3 * ORDER*ORDER*ORDER * (2*ORDER-1)
};
...
...
@@ -1124,40 +1133,40 @@ inline void FChebInterpolator<ORDER>::applyL2PTotal(const FPoint& center,
// IMPORTANT: NOT CHANGE ORDER OF COMPUTATIONS!!! ////////////////
//////////////////////////////////////////////////////////////////
// W2[0] /////////////////
// W2[0] /////////////////
(ORDER-1)*ORDER*ORDER * 2*ORDER
FBlas
::
gemtm
(
ORDER
,
ORDER
-
1
,
ORDER
*
ORDER
,
FReal
(
1.
),
const_cast
<
FReal
*>
(
T
),
ORDER
,
const_cast
<
FReal
*>
(
localExpansion
),
ORDER
,
F2
,
ORDER
-
1
);
for
(
unsigned
int
l
=
0
;
l
<
ORDER
-
1
;
++
l
)
{
W2
[
0
][
l
]
=
F2
[
l
];
for
(
unsigned
int
j
=
1
;
j
<
ORDER
*
ORDER
;
++
j
)
W2
[
0
][
l
]
+=
F2
[
j
*
(
ORDER
-
1
)
+
l
];
}
// W4[0] /////////////////
// W4[0] /////////////////
ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm5
.
permute
(
F2
,
F4
);
FBlas
::
gemtm
(
ORDER
,
ORDER
-
1
,
ORDER
*
(
ORDER
-
1
),
FReal
(
1.
),
const_cast
<
FReal
*>
(
T
),
ORDER
,
F4
,
ORDER
,
G4
,
ORDER
-
1
);
for
(
unsigned
int
l
=
0
;
l
<
ORDER
-
1
;
++
l
)
for
(
unsigned
int
m
=
0
;
m
<
ORDER
-
1
;
++
m
)
{
W4
[
0
][
m
*
(
ORDER
-
1
)
+
l
]
=
G4
[
l
*
ORDER
*
(
ORDER
-
1
)
+
m
];
for
(
unsigned
int
k
=
1
;
k
<
ORDER
;
++
k
)
W4
[
0
][
m
*
(
ORDER
-
1
)
+
l
]
+=
G4
[
l
*
ORDER
*
(
ORDER
-
1
)
+
k
*
(
ORDER
-
1
)
+
m
];
}
// W8 ////////////////////
// W8 ////////////////////
(ORDER-1)*(ORDER-1)*(ORDER-1) * (2*ORDER-1)
perm8
.
permute
(
G4
,
G8
);
FReal
F8
[(
ORDER
-
1
)
*
(
ORDER
-
1
)
*
(
ORDER
-
1
)];
FBlas
::
gemtm
(
ORDER
,
ORDER
-
1
,
(
ORDER
-
1
)
*
(
ORDER
-
1
),
FReal
(
1.
),
const_cast
<
FReal
*>
(
T
),
ORDER
,
G8
,
ORDER
,
F8
,
ORDER
-
1
);
perm9
.
permute
(
F8
,
W8
);
// W4[1] /////////////////
// W4[1] /////////////////
ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm6
.
permute
(
F2
,
F4
);
FBlas
::
gemtm
(
ORDER
,
ORDER
-
1
,
(
ORDER
-
1
)
*
ORDER
,
FReal
(
1.
),
const_cast
<
FReal
*>
(
T
),
ORDER
,
F4
,
ORDER
,
G4
,
ORDER
-
1
);
for
(
unsigned
int
l
=
0
;
l
<
ORDER
-
1
;
++
l
)
for
(
unsigned
int
n
=
0
;
n
<
ORDER
-
1
;
++
n
)
{
W4
[
1
][
n
*
(
ORDER
-
1
)
+
l
]
=
G4
[
l
*
(
ORDER
-
1
)
+
n
];
for
(
unsigned
int
j
=
1
;
j
<
ORDER
;
++
j
)
W4
[
1
][
n
*
(
ORDER
-
1
)
+
l
]
+=
G4
[
j
*
(
ORDER
-
1
)
*
(
ORDER
-
1
)
+
l
*
(
ORDER
-
1
)
+
n
];
}
// W2[1] /////////////////
// W2[1] /////////////////
(ORDER-1)*ORDER*ORDER * 2*ORDER
perm3
.
permute
(
localExpansion
,
lE
);
FBlas
::
gemtm
(
ORDER
,
ORDER
-
1
,
ORDER
*
ORDER
,
FReal
(
1.
),
const_cast
<
FReal
*>
(
T
),
ORDER
,
lE
,
ORDER
,
F2
,
ORDER
-
1
);
for
(
unsigned
int
i
=
0
;
i
<
ORDER
-
1
;
++
i
)
{
W2
[
1
][
i
]
=
F2
[
i
];
for
(
unsigned
int
j
=
1
;
j
<
ORDER
*
ORDER
;
++
j
)
W2
[
1
][
i
]
+=
F2
[
j
*
(
ORDER
-
1
)
+
i
];
}
// W4[2] /////////////////
// W4[2] /////////////////
ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm7
.
permute
(
F2
,
F4
);
FBlas
::
gemtm
(
ORDER
,
ORDER
-
1
,
(
ORDER
-
1
)
*
ORDER
,
FReal
(
1.
),
const_cast
<
FReal
*>
(
T
),
ORDER
,
F4
,
ORDER
,
G4
,
ORDER
-
1
);
for
(
unsigned
int
m
=
0
;
m
<
ORDER
-
1
;
++
m
)
for
(
unsigned
int
n
=
0
;
n
<
ORDER
-
1
;
++
n
)
{
W4
[
2
][
n
*
(
ORDER
-
1
)
+
m
]
=
G4
[
m
*
ORDER
*
(
ORDER
-
1
)
+
n
];
for
(
unsigned
int
i
=
1
;
i
<
ORDER
;
++
i
)
W4
[
2
][
n
*
(
ORDER
-
1
)
+
m
]
+=
G4
[
m
*
ORDER
*
(
ORDER
-
1
)
+
i
*
(
ORDER
-
1
)
+
n
];
}
// W2[2] /////////////////
// W2[2] /////////////////
(ORDER-1)*ORDER*ORDER * 2*ORDER
perm4
.
permute
(
localExpansion
,
lE
);
FBlas
::
gemtm
(
ORDER
,
ORDER
-
1
,
ORDER
*
ORDER
,
FReal
(
1.
),
const_cast
<
FReal
*>
(
T
),
ORDER
,
lE
,
ORDER
,
F2
,
ORDER
-
1
);
for
(
unsigned
int
i
=
0
;
i
<
ORDER
-
1
;
++
i
)
{
W2
[
2
][
i
]
=
F2
[
i
];
...
...
@@ -1198,15 +1207,13 @@ inline void FChebInterpolator<ORDER>::applyL2PTotal(const FPoint& center,
U_of_x
[
1
][
j
]
=
y2
*
U_of_x
[
1
][
j
-
1
]
-
U_of_x
[
1
][
j
-
2
];
// 2 flops
U_of_x
[
2
][
j
]
=
z2
*
U_of_x
[
2
][
j
-
1
]
-
U_of_x
[
2
][
j
-
2
];
// 2 flops
}
// scale, because dT_j/dx = jU_{j-1}
for
(
unsigned
int
j
=
2
;
j
<
ORDER
;
++
j
)
{
U_of_x
[
0
][
j
-
1
]
*=
FReal
(
j
);
// 1 flops
U_of_x
[
1
][
j
-
1
]
*=
FReal
(
j
);
// 1 flops
U_of_x
[
2
][
j
-
1
]
*=
FReal
(
j
);
// 1 flops
}
}
}
// 3 + (ORDER-2)*15
// apply P and increment forces
FReal
potential
=
FReal
(
0.
);
...
...
@@ -1233,19 +1240,19 @@ inline void FChebInterpolator<ORDER>::applyL2PTotal(const FPoint& center,
f4
[
3
]
+=
T_of_x
[
0
][
l
]
*
U_of_x
[
2
][
m
-
1
]
*
w4
[
1
]
+
T_of_x
[
1
][
l
]
*
U_of_x
[
2
][
m
-
1
]
*
w4
[
2
];
// 6 flops
for
(
unsigned
int
n
=
1
;
n
<
ORDER
;
++
n
)
{
const
FReal
w8
=
W8
[(
n
-
1
)
*
(
ORDER
-
1
)
*
(
ORDER
-
1
)
+
(
m
-
1
)
*
(
ORDER
-
1
)
+
(
l
-
1
)];
f8
[
0
]
+=
T_of_x
[
0
][
l
]
*
T_of_x
[
1
][
m
]
*
T_of_x
[
2
][
n
]
*
w8
;
f8
[
1
]
+=
U_of_x
[
0
][
l
-
1
]
*
T_of_x
[
1
][
m
]
*
T_of_x
[
2
][
n
]
*
w8
;
f8
[
2
]
+=
T_of_x
[
0
][
l
]
*
U_of_x
[
1
][
m
-
1
]
*
T_of_x
[
2
][
n
]
*
w8
;
f8
[
3
]
+=
T_of_x
[
0
][
l
]
*
T_of_x
[
1
][
m
]
*
U_of_x
[
2
][
n
-
1
]
*
w8
;
}
//
ORDER * 4
flops
}
//
ORDER * (9 + ORDER * 4
) flops
}
//
ORDER * (ORDER * (9 + ORDER * 4
)) flops
f8
[
0
]
+=
T_of_x
[
0
][
l
]
*
T_of_x
[
1
][
m
]
*
T_of_x
[
2
][
n
]
*
w8
;
// 4 flops
f8
[
1
]
+=
U_of_x
[
0
][
l
-
1
]
*
T_of_x
[
1
][
m
]
*
T_of_x
[
2
][
n
]
*
w8
;
// 4 flops
f8
[
2
]
+=
T_of_x
[
0
][
l
]
*
U_of_x
[
1
][
m
-
1
]
*
T_of_x
[
2
][
n
]
*
w8
;
// 4 flops
f8
[
3
]
+=
T_of_x
[
0
][
l
]
*
T_of_x
[
1
][
m
]
*
U_of_x
[
2
][
n
-
1
]
*
w8
;
// 4 flops
}
//
(ORDER-1) * 16
flops
}
//
(ORDER-1) * (27 + (ORDER-1) * 16
) flops
}
//
(ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16
)) flops
}
potential
=
(
f1
+
FReal
(
2.
)
*
f2
[
0
]
+
FReal
(
4.
)
*
f4
[
0
]
+
FReal
(
8.
)
*
f8
[
0
])
/
nnodes
;
// 7 flops
forces
[
0
]
=
(
FReal
(
2.
)
*
f2
[
1
]
+
FReal
(
4.
)
*
f4
[
1
]
+
FReal
(
8.
)
*
f8
[
1
])
*
jacobian
[
0
]
/
nnodes
;
//
6
flops
forces
[
1
]
=
(
FReal
(
2.
)
*
f2
[
2
]
+
FReal
(
4.
)
*
f4
[
2
]
+
FReal
(
8.
)
*
f8
[
2
])
*
jacobian
[
1
]
/
nnodes
;
//
6
flops
forces
[
2
]
=
(
FReal
(
2.
)
*
f2
[
3
]
+
FReal
(
4.
)
*
f4
[
3
]
+
FReal
(
8.
)
*
f8
[
3
])
*
jacobian
[
2
]
/
nnodes
;
//
6
flops
}
//
7 + ORDER * (ORDER * (9 + ORDER * 4
)) flops
forces
[
0
]
=
(
FReal
(
2.
)
*
f2
[
1
]
+
FReal
(
4.
)
*
f4
[
1
]
+
FReal
(
8.
)
*
f8
[
1
])
*
jacobian
[
0
]
/
nnodes
;
//
7
flops
forces
[
1
]
=
(
FReal
(
2.
)
*
f2
[
2
]
+
FReal
(
4.
)
*
f4
[
2
]
+
FReal
(
8.
)
*
f8
[
2
])
*
jacobian
[
1
]
/
nnodes
;
//
7
flops
forces
[
2
]
=
(
FReal
(
2.
)
*
f2
[
3
]
+
FReal
(
4.
)
*
f4
[
3
]
+
FReal
(
8.
)
*
f8
[
3
])
*
jacobian
[
2
]
/
nnodes
;
//
7
flops
}
//
28 + (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16
)) flops
// set computed potential
iter
.
data
().
incPotential
(
potential
);
// 1 flop
...
...
@@ -1257,7 +1264,7 @@ inline void FChebInterpolator<ORDER>::applyL2PTotal(const FPoint& center,
// increment target iterator
iter
.
gotoNext
();
}
// N * (
7 + ORDER * (ORDER * (9 + ORDER * 4)))
flops
}
// N * (
38 + (ORDER-2)*15 + (ORDER-1)*((ORDER-1) * (27 + (ORDER-1) * 16))) + 6
flops
}
...
...
Src/Kernels/Chebyshev/FChebSymKernel.hpp
View file @
0f163780
...
...
@@ -8,7 +8,7 @@
#include "./FAbstractChebKernel.hpp"
#include "./FChebInterpolator.hpp"
#include "./FChebSym
metries
.hpp"
#include "./FChebSym
M2LHandler
.hpp"
class
FTreeCoordinate
;
...
...
@@ -35,16 +35,12 @@ template <class ParticleClass, class CellClass, class ContainerClass, class Matr
class
FChebSymKernel
:
public
FAbstractChebKernel
<
ParticleClass
,
CellClass
,
ContainerClass
,
MatrixKernelClass
,
ORDER
>
{
typedef
FAbstractChebKernel
<
ParticleClass
,
CellClass
,
ContainerClass
,
MatrixKernelClass
,
ORDER
>
AbstractBase
Class
;
typedef
FAbstractChebKernel
<
ParticleClass
,
CellClass
,
ContainerClass
,
MatrixKernelClass
,
ORDER
>
AbstractBaseClass
;
typedef
SymmetryHandler
<
ORDER
>
SymmetryHandler
Class
;
enum
{
nnodes
=
AbstractBaseClass
::
nnodes
};
/// Handler to deal with all symmetries: Stores permutation indices and
/// vectors to reduce 343 different interactions to 16 only.
struct
SymmetryHandler
;
/// Needed for handling all symmetries
const
FSmartPointer
<
SymmetryHandler
,
FSmartPointerMemory
>
SymHandler
;
const
FSmartPointer
<
SymmetryHandler
Class
,
FSmartPointerMemory
>
SymHandler
;
// permuted local and multipole expansions
FReal
**
Loc
;
...
...
@@ -74,8 +70,8 @@ class FChebSymKernel
for
(
int
k
=
0
;
k
<=
j
;
++
k
)
{
const
unsigned
int
idx
=
(
i
+
3
)
*
7
*
7
+
(
j
+
3
)
*
7
+
(
k
+
3
);
assert
(
Mul
[
idx
]
==
NULL
||
Loc
[
idx
]
==
NULL
);
Mul
[
idx
]
=
new
FReal
[
30
*
nnodes
];
Loc
[
idx
]
=
new
FReal
[
30
*
nnodes
];
Mul
[
idx
]
=
new
FReal
[
24
*
nnodes
];
Loc
[
idx
]
=
new
FReal
[
24
*
nnodes
];
}
}
...
...
@@ -92,7 +88,7 @@ public:
const
FReal
inBoxWidth
,
const
FReal
Epsilon
)
:
AbstractBaseClass
(
inTreeHeight
,
inBoxCenter
,
inBoxWidth
),
SymHandler
(
new
SymmetryHandler
(
AbstractBaseClass
::
MatrixKernel
.
getPtr
(),
Epsilon
)),
SymHandler
(
new
SymmetryHandler
Class
(
AbstractBaseClass
::
MatrixKernel
.
getPtr
(),
Epsilon
)),
Loc
(
NULL
),
Mul
(
NULL
),
countExp
(
NULL
)
{
this
->
allocateMemoryForPermutedExpansions
();
...
...
@@ -124,6 +120,10 @@ public:
}
const
SymmetryHandlerClass
*
const
getPtrToSymHandler
()
const
{
return
SymHandler
.
getPtr
();
}
void
P2M
(
CellClass
*
const
LeafCell
,
const
ContainerClass
*
const
SourceParticles
)
...
...
@@ -174,15 +174,17 @@ public:
}
// multiply (mat-mat-mul)
FReal
Compressed
[
nnodes
*
30
];
FReal
Compressed
[
nnodes
*
24
];
const
FReal
CellWidth
(
AbstractBaseClass
::
BoxWidth
/
FReal
(
FMath
::
pow
(
2
,
TreeLevel
)));
const
FReal
scale
(
AbstractBaseClass
::
MatrixKernel
->
getScaleFactor
(
CellWidth
));
for
(
unsigned
int
pidx
=
0
;
pidx
<
343
;
++
pidx
)
{
const
unsigned
int
count
=
countExp
[
pidx
];
if
(
count
)
{
const
unsigned
int
rank
=
SymHandler
->
LowRank
[
pidx
];
// rank * count * (2*nnodes-1) flops
FBlas
::
gemtm
(
nnodes
,
rank
,
count
,
FReal
(
1.
),
SymHandler
->
K
[
pidx
]
+
rank
*
nnodes
,
nnodes
,
Mul
[
pidx
],
nnodes
,
Compressed
,
rank
);
// nnodes *count * (2*rank-1) flops
FBlas
::
gemm
(
nnodes
,
rank
,
count
,
scale
,
SymHandler
->
K
[
pidx
],
nnodes
,
Compressed
,
rank
,
Loc
[
pidx
],
nnodes
);
}
...
...
@@ -197,6 +199,7 @@ public:
const
unsigned
int
count
=
(
countExp
[
pidx
])
++
;
const
FReal
*
const
loc
=
Loc
[
pidx
]
+
count
*
nnodes
;
const
unsigned
int
*
const
pvec
=
SymHandler
->
pvectors
[
idx
];
// nnodes flops
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
LocalExpansion
[
n
]
+=
loc
[
pvec
[
n
]];
}
...
...
@@ -345,137 +348,6 @@ public:
/**
* Handler to deal with all symmetries: Stores permutation indices and vectors
* to reduce 343 different interactions to 16 only.
*/
template
<
class
ParticleClass
,
class
CellClass
,
class
ContainerClass
,
class
MatrixKernelClass
,
int
ORDER
>
struct
FChebSymKernel
<
ParticleClass
,
CellClass
,
ContainerClass
,
MatrixKernelClass
,
ORDER
>
::
SymmetryHandler
{
// M2L operators
FReal
*
K
[
343
];
int
LowRank
[
343
];
// permutation vectors and permutated indices
unsigned
int
pvectors
[
343
][
nnodes
];
unsigned
int
pindices
[
343
];
/** Constructor */
SymmetryHandler
(
const
MatrixKernelClass
*
const
MatrixKernel
,
const
double
Epsilon
)
{
// init all 343 item to zero, because effectively only 16 exist
for
(
unsigned
int
t
=
0
;
t
<
343
;
++
t
)
{
K
[
t
]
=
NULL
;
LowRank
[
t
]
=
0
;
}
// set permutation vector and indices
const
FChebSymmetries
<
ORDER
>
Symmetries
;
for
(
int
i
=-
3
;
i
<=
3
;
++
i
)
for
(
int
j
=-
3
;
j
<=
3
;
++
j
)
for
(
int
k
=-
3
;
k
<=
3
;
++
k
)
if
(
abs
(
i
)
>
1
||
abs
(
j
)
>
1
||
abs
(
k
)
>
1
)
{
const
unsigned
int
idx
=
((
i
+
3
)
*
7
+
(
j
+
3
))
*
7
+
(
k
+
3
);
pindices
[
idx
]
=
Symmetries
.
getPermutationArrayAndIndex
(
i
,
j
,
k
,
pvectors
[
idx
]);
}
// precompute 16 M2L operators
this
->
precomputeSVD
(
MatrixKernel
,
Epsilon
);
}
/** Destructor */
~
SymmetryHandler
()
{
for
(
unsigned
int
t
=
0
;
t
<
343
;
++
t
)
if
(
K
[
t
]
!=
NULL
)
delete
[]
K
[
t
];
}
private:
void
precomputeSVD
(
const
MatrixKernelClass
*
const
MatrixKernel
,
const
double
Epsilon
)
{
// interpolation points of source (Y) and target (X) cell
FPoint
X
[
nnodes
],
Y
[
nnodes
];
// set roots of target cell (X)
FChebTensor
<
ORDER
>::
setRoots
(
FPoint
(
0.
,
0.
,
0.
),
FReal
(
2.
),
X
);
// temporary matrix
FReal
*
U
=
new
FReal
[
nnodes
*
nnodes
];
// needed for the SVD
const
unsigned
int
LWORK
=
2
*
(
3
*
nnodes
+
nnodes
);
FReal
*
const
WORK
=
new
FReal
[
LWORK
];
FReal
*
const
VT
=
new
FReal
[
nnodes
*
nnodes
];
FReal
*
const
S
=
new
FReal
[
nnodes
];
unsigned
int
counter
=
0
;
for
(
int
i
=
2
;
i
<=
3
;
++
i
)
{
for
(
int
j
=
0
;
j
<=
i
;
++
j
)
{
for
(
int
k
=
0
;
k
<=
j
;
++
k
)
{
// assemble matrix
const
FPoint
cy
(
FReal
(
2.
*
i
),
FReal
(
2.
*
j
),
FReal
(
2.
*
k
));
FChebTensor
<
ORDER
>::
setRoots
(
cy
,
FReal
(
2.
),
Y
);
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
for
(
unsigned
int
m
=
0
;
m
<
nnodes
;
++
m
)
U
[
n
*
nnodes
+
m
]
=
MatrixKernel
->
evaluate
(
X
[
m
],
Y
[
n
]);
// applying weights ////////////////////////////////////////
FReal
weights
[
nnodes
];
FChebTensor
<
ORDER
>::
setRootOfWeights
(
weights
);
for
(
unsigned
int
n
=
0
;
n
<
nnodes
;
++
n
)
{
FBlas
::
scal
(
nnodes
,
weights
[
n
],
U
+
n
,
nnodes
);
// scale rows
FBlas
::
scal
(
nnodes
,
weights
[
n
],
U
+
n
*
nnodes
);
// scale cols
}
//////////////////////////////////////////////////////////
// truncated singular value decomposition of matrix
const
unsigned
int
info
=
FBlas
::
gesvd
(
nnodes
,
nnodes
,
U
,
S
,
VT
,
nnodes
,
LWORK
,
WORK
);
if
(
info
!=
0
)
throw
std
::
runtime_error
(
"SVD did not converge with "
+
info
);
const
unsigned
int
rank
=
getRank
<
ORDER
>
(
S
,
Epsilon
);