FSphericalBlockBlasKernel.hpp 12.7 KB
Newer Older
1
// See LICENCE file at project root
2 3 4 5 6
#ifndef FSPHERICALBLOCKBLASKERNEL_HPP
#define FSPHERICALBLOCKBLASKERNEL_HPP

#include "FAbstractSphericalKernel.hpp"

7 8 9
#include "../../Utils/FMemUtils.hpp"
#include "../../Utils/FBlas.hpp"
#include "../../Containers/FVector.hpp"
10 11 12 13 14

/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* This class is a spherical harmonic kernels using block blas
*/
15 16
template< class FReal, class CellClass, class ContainerClass>
class FSphericalBlockBlasKernel : public FAbstractSphericalKernel<FReal, CellClass,ContainerClass> {
17
protected:
18
    typedef FAbstractSphericalKernel<FReal, CellClass,ContainerClass> Parent;
19

berenger-bramas's avatar
berenger-bramas committed
20 21
    /** A interaction properties */
    struct ComputationPair {
22 23 24
        const FComplex<FReal>* FRestrict pole;
        FComplex<FReal>* FRestrict local;
        explicit ComputationPair(const FComplex<FReal>* const inPole = 0, FComplex<FReal>*const inLocal = 0)
berenger-bramas's avatar
berenger-bramas committed
25 26 27
            : pole(inPole), local(inLocal) {}
    };

28 29 30 31
    const int FF_MATRIX_ROW_DIM;     //< The blas matrix number of rows
    const int FF_MATRIX_COLUMN_DIM;  //< The blas matrix number of columns
    const int FF_MATRIX_SIZE;        //< The blas matrix size

berenger-bramas's avatar
berenger-bramas committed
32 33
    const int BlockSize;             //< The size of a block

34 35 36
    FComplex<FReal>*const multipoleMatrix;                //< To copy all the multipole vectors
    FComplex<FReal>*const localMatrix;                    //< To save all the local vectors result
    FSmartPointer<FComplex<FReal>**> preM2LTransitions;    //< The pre-computation for the M2L based on the level and the 189 possibilities
37

berenger-bramas's avatar
berenger-bramas committed
38 39 40
    FVector<ComputationPair> interactions[343];     //< All the current interaction


41 42
    /** To access te precomputed M2L transfer matrixes */
    int indexM2LTransition(const int idxX,const int idxY,const int idxZ) const {
43
        return (( ( ((idxX+3) * 7) + (idxY+3))*7 ) + (idxZ+3));
44 45
    }

46

47 48
    /** Alloc and init pre-vectors*/
    void allocAndInit(){
49
        FHarmonic<FReal> blasHarmonic(Parent::devP * 2);
50

51
        // Matrix to fill and then transposed
52
        FComplex<FReal>*const workMatrix = new FComplex<FReal>[FF_MATRIX_SIZE];
53

54 55
        // M2L transfer, there is a maximum of 3 neighbors in each direction,
        // so 6 in each dimension
56
        FReal treeWidthAtLevel = Parent::boxWidth;
57 58
        preM2LTransitions = new FComplex<FReal>**[Parent::treeHeight];
        memset(preM2LTransitions.getPtr(), 0, sizeof(FComplex<FReal>**) * (Parent::treeHeight));
59

60
        for(int idxLevel = 0 ; idxLevel < Parent::treeHeight ; ++idxLevel ){
61 62
            preM2LTransitions[idxLevel] = new FComplex<FReal>*[(7 * 7 * 7)];
            memset(preM2LTransitions[idxLevel], 0, sizeof(FComplex<FReal>*) * (7*7*7));
63 64 65 66 67

            for(int idxX = -3 ; idxX <= 3 ; ++idxX ){
                for(int idxY = -3 ; idxY <= 3 ; ++idxY ){
                    for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){
                        if(FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1){
68
                            // Compute harmonic
69 70
                            const FPoint<FReal> relativePos( FReal(-idxX) * treeWidthAtLevel , FReal(-idxY) * treeWidthAtLevel , FReal(-idxZ) * treeWidthAtLevel );
                            blasHarmonic.computeOuter(FSpherical<FReal>(relativePos));
71

72
                            // Reset Matrix
73 74
                            FMemUtils::setall<FComplex<FReal>>(workMatrix, FComplex<FReal>(), FF_MATRIX_SIZE);
                            FComplex<FReal>* FRestrict fillTransfer = workMatrix;
75 76 77 78 79 80 81 82

                            for(int M = 0 ; M <= Parent::devP ; ++M){
                                for (int m = 0 ;  m <= M ; ++m){
                                    for (int N = 0 ; N <= Parent::devP ; ++N){
                                        for (int n = 0 ; n <= 2*N ;  ++n, ++fillTransfer){
                                            const int k = N-n-m;
                                            if (k < 0){
                                                const FReal pow_of_minus_1 = FReal((k&1) ? -1 : 1);
COULAUD Olivier's avatar
COULAUD Olivier committed
83 84
                                                fillTransfer->setReal( pow_of_minus_1 * blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)-k].real());
                                                fillTransfer->setImag((-pow_of_minus_1) * blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)-k].imag());
85 86 87 88 89 90 91 92 93
                                            }
                                            else{
                                                (*fillTransfer) = blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)+k];
                                            }

                                        }
                                    }
                                }
                            }
94 95

                            // Transpose and copy result
96
                            FComplex<FReal>*const matrix = new FComplex<FReal>[FF_MATRIX_SIZE];
97 98 99 100 101
                            for(int idxRow = 0 ; idxRow < FF_MATRIX_ROW_DIM ; ++idxRow){
                                for(int idxCol = 0 ; idxCol < FF_MATRIX_COLUMN_DIM ; ++idxCol){
                                    matrix[idxCol * FF_MATRIX_ROW_DIM + idxRow] = workMatrix[idxCol + idxRow * FF_MATRIX_COLUMN_DIM];
                                }
                            }
102
                            preM2LTransitions[idxLevel][indexM2LTransition(idxX,idxY,idxZ)] = matrix;
103 104 105 106 107 108
                        }
                    }
                }
            }
            treeWidthAtLevel /= 2;
        }
109 110 111

        // Clean
        delete[] workMatrix;
112 113 114 115 116 117 118 119 120 121
    }


public:
    /** Constructor
      * @param inDevP the polynomial degree
      * @param inThreeHeight the height of the tree
      * @param inBoxWidth the size of the simulation box
      * @param inPeriodicLevel the number of level upper to 0 that will be requiried
      */
122
    FSphericalBlockBlasKernel(const int inDevP, const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, const int inBlockSize = 512)
123
        : Parent(inDevP, inTreeHeight, inBoxWidth, inBoxCenter),
124 125
          FF_MATRIX_ROW_DIM(Parent::harmonic.getExpSize()), FF_MATRIX_COLUMN_DIM(Parent::harmonic.getNExpSize()),
          FF_MATRIX_SIZE(FF_MATRIX_ROW_DIM * FF_MATRIX_COLUMN_DIM),
berenger-bramas's avatar
berenger-bramas committed
126
          BlockSize(inBlockSize),
127 128
          multipoleMatrix(new FComplex<FReal>[inBlockSize * FF_MATRIX_COLUMN_DIM]),
          localMatrix(new FComplex<FReal>[inBlockSize * FF_MATRIX_ROW_DIM]),
129
          preM2LTransitions(nullptr){
130 131 132 133 134 135 136 137
        allocAndInit();
    }

    /** Copy constructor */
    FSphericalBlockBlasKernel(const FSphericalBlockBlasKernel& other)
        : Parent(other),
          FF_MATRIX_ROW_DIM(other.FF_MATRIX_ROW_DIM), FF_MATRIX_COLUMN_DIM(other.FF_MATRIX_COLUMN_DIM),
          FF_MATRIX_SIZE(other.FF_MATRIX_SIZE),
berenger-bramas's avatar
berenger-bramas committed
138
          BlockSize(other.BlockSize),
139 140
          multipoleMatrix(new FComplex<FReal>[other.BlockSize * FF_MATRIX_COLUMN_DIM]),
          localMatrix(new FComplex<FReal>[other.BlockSize * FF_MATRIX_ROW_DIM]),
141 142
          preM2LTransitions(other.preM2LTransitions) {

143 144 145 146
    }

    /** Destructor */
    ~FSphericalBlockBlasKernel(){
berenger-bramas's avatar
berenger-bramas committed
147 148
        delete[] multipoleMatrix;
        delete[] localMatrix;
149
        if(preM2LTransitions.isLast()){
150
            for(int idxLevel = 0 ; idxLevel < Parent::treeHeight ; ++idxLevel ){
151 152 153
                for(int idxX = -3 ; idxX <= 3 ; ++idxX ){
                    for(int idxY = -3 ; idxY <= 3 ; ++idxY ){
                        for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){
154
                            delete[] preM2LTransitions[idxLevel][indexM2LTransition(idxX,idxY,idxZ)];
155 156 157 158
                        }
                    }
                }
            }
159
            FMemUtils::DeleteAllArray(preM2LTransitions.getPtr(), Parent::treeHeight);
160
        }
161 162 163
    }

    /** M2L with a cell and all the existing neighbors */
164 165 166 167 168 169 170 171
    template<class SymbolicData>
    void M2L(typename CellClass::local_expansion_t * const FRestrict TargetExpansion,
             const SymbolicData* const TargetSymb,
             const typename CellClass::multipole_t * const FRestrict SourceMultipoles[],
             const SymbolicData* const FRestrict /*SourceSymbs*/[],
             const int neighborPositions[],
             const int inSize)
    {
172
        // For all neighbors compute M2L
173 174
        for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
            const int idxNeigh = neighborPositions[idxExistingNeigh];
175 176 177
            interactions[idxNeigh].push(
                ComputationPair(SourceMultipoles[idxExistingNeigh]->get(),
                                TargetExpansion->get()));
berenger-bramas's avatar
berenger-bramas committed
178

179
            if( interactions[idxNeigh].getSize() == BlockSize){
180
                multipoleToLocal( idxNeigh, static_cast<int>(TargetSymb->getLevel()));
181
            }
182 183
        }
    }
184
    /** the needFinishedM2LEvent method is needed   */
185
    constexpr static bool NeedFinishedM2LEvent(){
186 187
   	 	 return true ;
    }
berenger-bramas's avatar
berenger-bramas committed
188
    /** Do we have some computation to do in the buffers */
Quentin Khan's avatar
Quentin Khan committed
189
    void finishedLevelM2L(const int inLevel) override {
berenger-bramas's avatar
berenger-bramas committed
190 191 192 193 194 195
        for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){
            if( interactions[idxNeigh].getSize() ){
                multipoleToLocal( idxNeigh, inLevel);
            }
        }
    }
196 197 198 199

    /** preExpNExp
      * @param exp an exponent vector to create an computable vector
      */
200
    void preExpNExp(FComplex<FReal>* const exp) const {
201 202 203 204 205 206 207 208 209 210 211 212 213 214
        for(int j = Parent::devP; j>= 0 ; --j){
            // Position in 'exp':  (j*(j+1)*0.5) + k
            // Position in 'nexp':  j*(j+1)      + k
            const int j_j1       = j*(j+1);
            const int j_j1_div_2 = int(j_j1 * 0.5);

            // Positive (or null) orders:
            for(int k = j ; k >= 0; --k){
                exp[j_j1 + k] = exp[j_j1_div_2 + k];
            }

            // Negative orders:
            FReal minus_1_pow_k = FReal( j&1 ? -1 : 1);
            for(int k = -j ; k < 0 ; ++k ){
COULAUD Olivier's avatar
COULAUD Olivier committed
215 216
                exp[j_j1 + k].setReal(minus_1_pow_k * exp[j_j1 + (-k)].real());
                exp[j_j1 + k].setImag((-minus_1_pow_k) * exp[j_j1 + (-k)].imag());
217 218 219 220 221 222 223
                minus_1_pow_k = -minus_1_pow_k;
            }
        }
    }

    /** M2L
    *We compute the conversion of multipole_exp_src in *p_center_of_exp_src to
224
    *a local expansion in *p_center_of_exp_target, and add the result to local_expansion_target.
225 226 227
    *
    *O_n^l (with n=0..P, l=-n..n) being the former multipole expansion terms
    *(whose center is *p_center_of_multipole_exp_src) we have for the new local
228
    *expansion terms (whose center is *p_center_of_local_expansion_target):
229 230 231 232 233 234 235 236 237 238
    *
    *L_j^k = sum{n=0..+}
    *sum{l=-n..n}
    *O_n^l Outer_{j+n}^{-k-l}(rho, alpha, beta)
    *
    *where (rho, alpha, beta) are the spherical coordinates of the vector :
    *p_center_of_local_exp_src - *p_center_of_multipole_exp_target
    *
    *Remark: here we have always j+n >= |-k-l|
    *
239
    * const FComplex<FReal>*const M2LTransfer = preM2LTransitions[inLevel][interactionIndex];
berenger-bramas's avatar
berenger-bramas committed
240 241
     *   for(int idxK = 0 ; idxK < interactions[interactionIndex].getSize() ; ++idxK){
     *       for(int idxRow = 0 ; idxRow < FF_MATRIX_ROW_DIM ; ++idxRow){
242
     *           FComplex<FReal> compute;
berenger-bramas's avatar
berenger-bramas committed
243 244 245 246 247 248
     *           for(int idxCol = 0 ; idxCol < FF_MATRIX_COLUMN_DIM ; ++idxCol){
     *               compute.addMul(M2LTransfer[idxCol * FF_MATRIX_ROW_DIM + idxRow], multipoleMatrix[idxCol + idxK * FF_MATRIX_COLUMN_DIM]);
     *           }
     *           localMatrix[FF_MATRIX_ROW_DIM * idxK + idxRow] = compute;
     *       }
     *   }
249
    */
berenger-bramas's avatar
berenger-bramas committed
250 251 252
    void multipoleToLocal(const int interactionIndex, const int inLevel){
        for(int idxInter = 0 ; idxInter < interactions[interactionIndex].getSize() ; ++idxInter){
            // Copy original vector and compute exp2nexp
253
            FMemUtils::copyall<FComplex<FReal>>(&multipoleMatrix[idxInter * FF_MATRIX_COLUMN_DIM],
berenger-bramas's avatar
berenger-bramas committed
254
                                          interactions[interactionIndex][idxInter].pole, FF_MATRIX_COLUMN_DIM);
berenger-bramas's avatar
berenger-bramas committed
255

berenger-bramas's avatar
berenger-bramas committed
256 257 258
            // Get a computable vector
            preExpNExp(&multipoleMatrix[idxInter * FF_MATRIX_COLUMN_DIM]);
        }
259

berenger-bramas's avatar
berenger-bramas committed
260 261 262 263 264
        const FReal one[2] = {1.0 , 0.0};

        FBlas::c_gemm(
                    FF_MATRIX_ROW_DIM,
                    FF_MATRIX_COLUMN_DIM,
265
                    int(interactions[interactionIndex].getSize()),
berenger-bramas's avatar
berenger-bramas committed
266
                    one,
267
                    FComplex<FReal>::ToFReal(preM2LTransitions[inLevel][interactionIndex]),
berenger-bramas's avatar
berenger-bramas committed
268
                    FF_MATRIX_ROW_DIM,
269
                    FComplex<FReal>::ToFReal(multipoleMatrix),
berenger-bramas's avatar
berenger-bramas committed
270
                    FF_MATRIX_COLUMN_DIM,
271
                    FComplex<FReal>::ToFReal(localMatrix),
berenger-bramas's avatar
berenger-bramas committed
272
                    FF_MATRIX_ROW_DIM);
273

berenger-bramas's avatar
berenger-bramas committed
274
        for(int idxInter = 0 ; idxInter < interactions[interactionIndex].getSize() ; ++idxInter){
275
            FMemUtils::addall<FComplex<FReal>>(interactions[interactionIndex][idxInter].local, &localMatrix[idxInter * FF_MATRIX_ROW_DIM],  FF_MATRIX_ROW_DIM);
berenger-bramas's avatar
berenger-bramas committed
276 277 278
        }

        interactions[interactionIndex].clear();
279 280 281 282 283 284
    }
};



#endif // FSPHERICALBLOCKBLASKERNEL_HPP