Attention une mise à jour du service Gitlab va être effectuée le mardi 18 janvier (et non lundi 17 comme annoncé précédemment) entre 18h00 et 18h30. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes.

FUnifM2LHandler.hpp 18.7 KB
Newer Older
1
// ===================================================================================
2
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner
3
4
5
6
7
8
9
10
11
12
13
14
15
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.  
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info". 
// "http://www.gnu.org/licenses".
// ===================================================================================
16
17
18
// Keep in private GIT
// @SCALFMM_PRIVATE

19
20
21
22
23
24
25
26
27
28
#ifndef FUNIFM2LHANDLER_HPP
#define FUNIFM2LHANDLER_HPP

#include <numeric>
#include <stdexcept>
#include <string>
#include <sstream>
#include <fstream>
#include <typeinfo>

29
30
31
#include "Utils/FBlas.hpp"
#include "Utils/FTic.hpp"
#include "Utils/FDft.hpp"
32

33
#include "Utils/FComplex.hpp"
34
35


36
#include "FUnifTensor.hpp"
37

38
39
/*!  Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. 
PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/
40
template < class FReal,int ORDER, typename MatrixKernelClass>
41
static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplex<FReal>* &FC, const int SeparationCriterion = 1)
42
{
43
44
45
46
47
    // allocate memory and store compressed M2L operators
    if (FC) throw std::runtime_error("M2L operators are already set");
    // dimensions of operators
    const unsigned int order = ORDER;
    const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
48
    const unsigned int ninteractions = 316+26*(SeparationCriterion<1 ? 1 : 0) + 1*(SeparationCriterion<0 ? 1 : 0);
49
    typedef FUnifTensor<FReal,ORDER> TensorType;
50
51

    // interpolation points of source (Y) and target (X) cell
52
    FPoint<FReal> X[nnodes], Y[nnodes];
53
    // set roots of target cell (X)
54
    TensorType::setRoots(FPoint<FReal>(0.,0.,0.), CellWidth, X);
55

56
    // allocate memory and compute 316 m2l operators (342 if separation equals 0, 343 if separation equals -1)
57
    FReal    *_C;
58
    FComplex<FReal> *_FC;
59
60
61
62

    // reduce storage from nnodes^2=order^6 to (2order-1)^3
    const unsigned int rc = (2*order-1)*(2*order-1)*(2*order-1);
    _C = new FReal [rc];
63
    _FC = new FComplex<FReal> [rc * ninteractions];
64
65
66
67
68
69

    // initialize root node ids pairs
    unsigned int node_ids_pairs[rc][2];
    TensorType::setNodeIdsPairs(node_ids_pairs);
    // init Discrete Fourier Transformator
    const int dimfft = 1; // unidim FFT since fully circulant embedding
70
    FFftw<FReal,FComplex<FReal>,dimfft> Dft(rc);
71
72
73
74
75
76
77
78
    // get first column of K via permutation
    unsigned int perm[rc];
    TensorType::setStoragePermutation(perm);

    unsigned int counter = 0;
    for (int i=-3; i<=3; ++i) {
        for (int j=-3; j<=3; ++j) {
            for (int k=-3; k<=3; ++k) {
79
                if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) {
80
                    // set roots of source cell (Y)
81
                    const FPoint<FReal> cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k));
82
                    FUnifTensor<FReal,order>::setRoots(cy, CellWidth, Y);
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
                    // evaluate m2l operator
                    unsigned int ido=0;
                    for(unsigned int l=0; l<2*order-1; ++l)
                        for(unsigned int m=0; m<2*order-1; ++m)
                            for(unsigned int n=0; n<2*order-1; ++n){   
                    
                                // store value at current position in C
                                // use permutation if DFT is used because 
                                // the storage of the first column is required
                                // i.e. C[0] C[rc-1] C[rc-2] .. C[1] < WRONG!
                                // i.e. C[rc-1] C[0] C[1] .. C[rc-2] < RIGHT!
                                //                _C[counter*rc + ido]
                                _C[perm[ido]]
                                  = MatrixKernel->evaluate(X[node_ids_pairs[ido][0]], 
                                                           Y[node_ids_pairs[ido][1]]);
                                ido++;
                            }

                    // Apply Discrete Fourier Transformation
                    Dft.applyDFT(_C,_FC+counter*rc);

                    // increment interaction counter
                    counter++;
                }
            }
108
        }
109
110
    }
    if (counter != ninteractions)
111
        throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions));
112
113
114
115
116
117
118
119
120

    // Free _C
    delete [] _C;

    // store FC
    counter = 0;
    // reduce storage if real valued kernel
    const unsigned int opt_rc = rc/2+1;
    // allocate M2L
121
    FC = new FComplex<FReal>[343 * opt_rc];
122
123
124
125
126

    for (int i=-3; i<=3; ++i)
        for (int j=-3; j<=3; ++j)
            for (int k=-3; k<=3; ++k) {
                const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
127
                if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) {
128
129
130
131
132
133
                    FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC + counter*rc), 
                                  reinterpret_cast<FReal*>(FC + idx*opt_rc));
                    counter++;
                } else { 
                    FBlas::c_setzero(opt_rc, reinterpret_cast<FReal*>(FC + idx*opt_rc));
                }
134
135
      }

136
    if (counter != ninteractions)
137
        throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions));
138
    delete [] _FC;      
139
140
141
}


142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158


/**
 * @author Pierre Blanchard (pierre.blanchard@inria.fr)
 * @class FUnifM2LHandler
 * Please read the license
 *
 * This class precomputes and efficiently stores the M2L operators
 * \f$[K_1,\dots,K_{316}]\f$ for all (\f$7^3-3^3 = 316\f$ possible interacting
 * cells in the far-field) interactions for the Lagrange interpolation
 * approach. The resulting Lagrange operators have a Circulant Toeplitz 
 * structure and can be applied efficiently in Fourier Space. Hence, the
 * originally \f$K_t\f$ of size \f$\ell^3\times\ell^3\f$ times \f$316\f$ for
 * all interactions is reduced to \f$316\f$ \f$C_t\f$, each of size \f$2\ell-1\f$.
 *
 * @tparam ORDER interpolation order \f$\ell\f$
 */
159
template < class FReal, int ORDER, KERNEL_FUNCTION_TYPE TYPE> class FUnifM2LHandler;
160
161

/*! Specialization for homogeneous kernel functions */
162
163
template < class FReal, int ORDER>
class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS>
164
{
165
166
167
168
169
170
    enum {order = ORDER,
          nnodes = TensorTraits<ORDER>::nnodes,
          ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
          rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)};

    /// M2L Operators (stored in Fourier space)
171
    FSmartPointer< FComplex<FReal>,FSmartArrayMemory> FC;
172
173

    /// Utils
174
    typedef FUnifTensor<FReal,ORDER> TensorType;
175
176
177
178
    unsigned int node_diff[nnodes*nnodes];

    /// DFT specific
    static const int dimfft = 1; // unidim FFT since fully circulant embedding
179
    typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
180
181
182
    DftClass Dft;
    const unsigned int opt_rc; // specific to real valued kernel

183
184
    /// Leaf level separation criterion
    const int LeafLevelSeparationCriterion;
185
186
187
188
189
190
191
192
193
194
195

    static const std::string getFileName()
    {
        const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f');
        std::stringstream stream;
        stream << "m2l_k"/*<< MatrixKernelClass::getID()*/ << "_" << precision_type
                     << "_o" << order << ".bin";
        return stream.str();
    }

    
196
public:
197
    template <typename MatrixKernelClass>
198
    FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1)
199
        : FC(nullptr), Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
200
    {    
201

202
203
        // initialize root node ids
        TensorType::setNodeIdsDiff(node_diff);
204

205
206
        // Compute and Set M2L Operators
        ComputeAndSet(MatrixKernel);
207

208
    }
209

210
211
212
213
    /*
     * Copy constructor
     */
    FUnifM2LHandler(const FUnifM2LHandler& other)
214
      : FC(other.FC), Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
215
216
217
    {    
        // copy node_diff
        memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
218
219
    }

220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
    ~FUnifM2LHandler()
    { }

    /**
     * Computes and sets the matrix \f$C_t\f$
     */
    template <typename MatrixKernelClass>
    void ComputeAndSet(const MatrixKernelClass *const MatrixKernel)
    {
        // measure time
        FTic time; time.tic();
        // check if aready set
        if (FC) throw std::runtime_error("M2L operator already set");
        // Compute matrix of interactions
        const FReal ReferenceCellWidth = FReal(2.);
235
        FComplex<FReal>* pFC = NULL;
236
        Compute<FReal,order>(MatrixKernel,ReferenceCellWidth,pFC,LeafLevelSeparationCriterion);
237
238
239
        FC.assign(pFC);

        // Compute memory usage
240
        unsigned long sizeM2L = 343*opt_rc*sizeof(FComplex<FReal>);
241
242
243
244
245
246


        // write info
        std::cout << "Compute and set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" B) in "
                  << time.tacAndElapsed() << "sec."   << std::endl;
    }
247
248
249
250

    unsigned long long getMemory() const {
        return 343*opt_rc*sizeof(FComplex<FReal>);
    }        
251
252
253
254
255
256
257
258

    /**
    * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be
    * seen as part of the L2L operation.
    *
    * @param[in] X transformed local expansion of size \f$r\f$
    * @param[out] x local expansion of size \f$\ell^3\f$
    */
259
    void unapplyZeroPaddingAndDFT(const FComplex<FReal> *const FX, FReal *const x) const
260
261
262
263
    { 
        FReal Px[rc];
        FBlas::setzero(rc,Px);
        // Apply forward Discrete Fourier Transform
264
        Dft.applyIDFTNorm(FX,Px);
265
266
        // Unapply Zero Padding
        for (unsigned int j=0; j<nnodes; ++j)
267
            x[j]=Px[node_diff[nnodes-j-1]];
268
    }
269

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
    /**
     * The M2L operation \f$X+=C_tY\f$ is performed in Fourier space by 
     * application of the convolution theorem (i.e. \f$FX+=FC_t:FY\f$), 
     * where \f$FY\f$ is the transformed multipole expansion and 
     * \f$FX\f$ is the transformed local expansion, both padded with zeros and
     * of size \f$r_c=(2\times ORDER-1)^3\f$ or \f$r_c^{opt}=\frac{r_c}{2}+1\f$ 
     * for real valued kernels. The index \f$t\f$ denotes the transfer vector 
     * of the target cell to the source cell.
     *
     * @param[in] transfer transfer vector
     * @param[in] FY transformed multipole expansion
     * @param[out] FX transformed local expansion
     * @param[in] CellWidth needed for the scaling of the compressed M2L operators which are based on a homogeneous matrix kernel computed for the reference cell width \f$w=2\f$, ie in \f$[-1,1]^3\f$.
     */
    void applyFC(const unsigned int idx, const unsigned int, const FReal scale,
285
                 const FComplex<FReal> *const FY, FComplex<FReal> *const FX) const
286
287
288
    {
        // Perform entrywise product manually
        for (unsigned int j=0; j<opt_rc; ++j){
289
            FX[j].addMul(FComplex<FReal>(scale*FC[idx*opt_rc + j].getReal(),
290
291
292
293
                                  scale*FC[idx*opt_rc + j].getImag()),
                         FY[j]);
        }
    }
294
295


296
297
298
299
300
301
302
    /**
     * Transform densities \f$Y= DFT(y)\f$ of a source cell. This operation
     * can be seen as part of the M2M operation.
     *
     * @param[in] y multipole expansion of size \f$\ell^3\f$
     * @param[out] Y transformed multipole expansion of size \f$r\f$
     */
303
    void applyZeroPaddingAndDFT(FReal *const y, FComplex<FReal> *const FY) const
304
305
306
307
308
309
310
311
312
    {
        FReal Py[rc];
        FBlas::setzero(rc,Py);
        // Apply Zero Padding
        for (unsigned int i=0; i<nnodes; ++i)
            Py[node_diff[i*nnodes]]=y[i];
        // Apply forward Discrete Fourier Transform
        Dft.applyDFT(Py,FY);
    }
313
314
315
316
317


};


318
/*! Specialization for non-homogeneous kernel functions */
319
320
template <class FReal, int ORDER>
class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS>
321
{
322
323
324
325
326
327
    enum {order = ORDER,
          nnodes = TensorTraits<ORDER>::nnodes,
          ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
          rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)};

    /// M2L Operators (stored in Fourier space for each level)
328
    FSmartPointer< FComplex<FReal>*,FSmartArrayMemory> FC;
329
330
331
332
    /// Homogeneity specific variables
    const unsigned int TreeHeight;
    const FReal RootCellWidth;
    /// Utils
333
    typedef FUnifTensor<FReal,ORDER> TensorType;
334
335
336
    unsigned int node_diff[nnodes*nnodes];
    /// DFT specific
    static const int dimfft = 1; // unidim FFT since fully circulant embedding
337
    typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast real-valued Discrete Fourier Transformator
338
339
340
    DftClass Dft;
    const unsigned int opt_rc; // specific to real valued kernel

341
342
    /// Leaf level separation criterion
    const int LeafLevelSeparationCriterion;
343
344
345
346
347
348
349
350
351

    static const std::string getFileName()
    {
        const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f');
        std::stringstream stream;
        stream << "m2l_k"/*<< MatrixKernelClass::getID()*/ << "_" << precision_type
               << "_o" << order << ".bin";
        return stream.str();
    }
352

353
    
354
355
public:
    template <typename MatrixKernelClass>
356
    FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1)
357
358
        : TreeHeight(inTreeHeight),
          RootCellWidth(inRootCellWidth),
359
          Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
360
361
362
363
364
365
    {

        // initialize root node ids
        TensorType::setNodeIdsDiff(node_diff);
        
        // init M2L operators
366
        FC = new FComplex<FReal>*[TreeHeight];
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
        FC[0]       = NULL; FC[1]       = NULL;
        for (unsigned int l=2; l<TreeHeight; ++l) FC[l] = NULL;

        // Compute and Set M2L Operators
        ComputeAndSet(MatrixKernel);

    }

    /*
     * Copy constructor
     */
    FUnifM2LHandler(const FUnifM2LHandler& other)
      : FC(other.FC),
        TreeHeight(other.TreeHeight),
        RootCellWidth(other.RootCellWidth),
382
        Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
383
384
385
    {    
        // copy node_diff
        memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
386
    }
387
388
389



390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
    ~FUnifM2LHandler()
    {
        for (unsigned int l=0; l<TreeHeight; ++l) 
            if (FC[l] != NULL) delete [] FC[l];
    }

    /**
     * Computes and sets the matrix \f$C_t\f$
     */
    template <typename MatrixKernelClass>
    void ComputeAndSet(const MatrixKernelClass *const MatrixKernel)
    {
        // measure time
        FTic time; time.tic();

        // Compute matrix of interactions at each level !! (since non homog)
        FReal CellWidth = RootCellWidth / FReal(2.); // at level 1
        CellWidth /= FReal(2.);                      // at level 2
        for (unsigned int l=2; l<TreeHeight; ++l) {

410
411
412
            // Determine separation criteria wrt level
            const int SeparationCriterion = (l != TreeHeight-1 ? 1 : LeafLevelSeparationCriterion);

413
414
            // check if already set
            if (FC[l]) throw std::runtime_error("M2L operator already set");
415
            Compute<FReal,order>(MatrixKernel,CellWidth,FC[l],SeparationCriterion);
416
            CellWidth /= FReal(2.);                    // at level l+1 
417

418
        }
419

420
        // Compute memory usage
421
        unsigned long sizeM2L = (TreeHeight-2)*343*opt_rc*sizeof(FComplex<FReal>);
422
423
424
425
426
427

        // write info
        std::cout << "Compute and set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" B) in "
                  << time.tacAndElapsed() << "sec."   << std::endl;
    }

428
429
430
    unsigned long long getMemory() const {
        return (TreeHeight-2)*343*opt_rc*sizeof(FComplex<FReal>);
    }   
431
432
433
434
435
436
437
438

    /**
    * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be
    * seen as part of the L2L operation.
    *
    * @param[in] X transformed local expansion of size \f$r\f$
    * @param[out] x local expansion of size \f$\ell^3\f$
    */
439
    void unapplyZeroPaddingAndDFT(const FComplex<FReal> *const FX, FReal *const x) const
440
441
442
443
    { 
        FReal Px[rc];
        FBlas::setzero(rc,Px);
        // Apply forward Discrete Fourier Transform
444
        Dft.applyIDFTNorm(FX,Px);
445
446
        // Unapply Zero Padding
        for (unsigned int j=0; j<nnodes; ++j)
447
            x[j]=Px[node_diff[nnodes-j-1]];
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
    }

    /**
     * The M2L operation \f$X+=C_tY\f$ is performed in Fourier space by 
     * application of the convolution theorem (i.e. \f$FX+=FC_t:FY\f$), 
     * where \f$FY\f$ is the transformed multipole expansion and 
     * \f$FX\f$ is the transformed local expansion, both padded with zeros and
     * of size \f$r_c=(2\times ORDER-1)^3\f$ or \f$r_c^{opt}=\frac{r_c}{2}+1\f$ 
     * for real valued kernels. The index \f$t\f$ denotes the transfer vector 
     * of the target cell to the source cell.
     *
     * @param[in] transfer transfer vector
     * @param[in] FY transformed multipole expansion
     * @param[out] FX transformed local expansion
     * @param[in] CellWidth needed for the scaling of the compressed M2L operators which are based on a homogeneous matrix kernel computed for the reference cell width \f$w=2\f$, ie in \f$[-1,1]^3\f$.
     */
    void applyFC(const unsigned int idx, const unsigned int TreeLevel, const FReal,
465
                 const FComplex<FReal> *const FY, FComplex<FReal> *const FX) const
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
    {
        // Perform entrywise product manually
        for (unsigned int j=0; j<opt_rc; ++j){
            FX[j].addMul(FC[TreeLevel][idx*opt_rc + j],FY[j]);
        }
    }


    /**
     * Transform densities \f$Y= DFT(y)\f$ of a source cell. This operation
     * can be seen as part of the M2M operation.
     *
     * @param[in] y multipole expansion of size \f$\ell^3\f$
     * @param[out] Y transformed multipole expansion of size \f$r\f$
     */
481
    void applyZeroPaddingAndDFT(FReal *const y, FComplex<FReal> *const FY) const
482
483
484
485
486
487
488
489
490
    {
        FReal Py[rc];
        FBlas::setzero(rc,Py);
        // Apply Zero Padding
        for (unsigned int i=0; i<nnodes; ++i)
            Py[node_diff[i*nnodes]]=y[i];
        // Apply forward Discrete Fourier Transform
        Dft.applyDFT(Py,FY);
    }
491
492


493
};
494
495
496
497
498


#endif // FUNIFM2LHANDLER_HPP

// [--END--]