FUnifM2LHandler.hpp 18.7 KB
Newer Older
1
// ===================================================================================
2
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner
3 4 5 6 7 8 9 10 11 12 13 14 15
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.  
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info". 
// "http://www.gnu.org/licenses".
// ===================================================================================
16 17 18
// Keep in private GIT
// @SCALFMM_PRIVATE

19 20 21 22 23 24 25 26 27 28
#ifndef FUNIFM2LHANDLER_HPP
#define FUNIFM2LHANDLER_HPP

#include <numeric>
#include <stdexcept>
#include <string>
#include <sstream>
#include <fstream>
#include <typeinfo>

29 30 31
#include "Utils/FBlas.hpp"
#include "Utils/FTic.hpp"
#include "Utils/FDft.hpp"
32

33
#include "Utils/FComplex.hpp"
34 35


36
#include "FUnifTensor.hpp"
37

38 39
/*!  Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. 
PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/
40
template < class FReal,int ORDER, typename MatrixKernelClass>
41
static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplex<FReal>* &FC, const int SeparationCriterion = 1)
42
{
43 44 45 46 47
    // allocate memory and store compressed M2L operators
    if (FC) throw std::runtime_error("M2L operators are already set");
    // dimensions of operators
    const unsigned int order = ORDER;
    const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
48
    const unsigned int ninteractions = 316+26*(SeparationCriterion<1 ? 1 : 0) + 1*(SeparationCriterion<0 ? 1 : 0);
49
    typedef FUnifTensor<FReal,ORDER> TensorType;
50 51

    // interpolation points of source (Y) and target (X) cell
52
    FPoint<FReal> X[nnodes], Y[nnodes];
53
    // set roots of target cell (X)
54
    TensorType::setRoots(FPoint<FReal>(0.,0.,0.), CellWidth, X);
55

56
    // allocate memory and compute 316 m2l operators (342 if separation equals 0, 343 if separation equals -1)
57
    FReal    *_C;
58
    FComplex<FReal> *_FC;
59 60 61 62

    // reduce storage from nnodes^2=order^6 to (2order-1)^3
    const unsigned int rc = (2*order-1)*(2*order-1)*(2*order-1);
    _C = new FReal [rc];
63
    _FC = new FComplex<FReal> [rc * ninteractions];
64 65 66 67 68 69

    // initialize root node ids pairs
    unsigned int node_ids_pairs[rc][2];
    TensorType::setNodeIdsPairs(node_ids_pairs);
    // init Discrete Fourier Transformator
    const int dimfft = 1; // unidim FFT since fully circulant embedding
70
    FFftw<FReal,FComplex<FReal>,dimfft> Dft(rc);
71 72 73 74 75 76 77 78
    // get first column of K via permutation
    unsigned int perm[rc];
    TensorType::setStoragePermutation(perm);

    unsigned int counter = 0;
    for (int i=-3; i<=3; ++i) {
        for (int j=-3; j<=3; ++j) {
            for (int k=-3; k<=3; ++k) {
79
                if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) {
80
                    // set roots of source cell (Y)
81
                    const FPoint<FReal> cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k));
82
                    FUnifTensor<FReal,order>::setRoots(cy, CellWidth, Y);
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
                    // evaluate m2l operator
                    unsigned int ido=0;
                    for(unsigned int l=0; l<2*order-1; ++l)
                        for(unsigned int m=0; m<2*order-1; ++m)
                            for(unsigned int n=0; n<2*order-1; ++n){   
                    
                                // store value at current position in C
                                // use permutation if DFT is used because 
                                // the storage of the first column is required
                                // i.e. C[0] C[rc-1] C[rc-2] .. C[1] < WRONG!
                                // i.e. C[rc-1] C[0] C[1] .. C[rc-2] < RIGHT!
                                //                _C[counter*rc + ido]
                                _C[perm[ido]]
                                  = MatrixKernel->evaluate(X[node_ids_pairs[ido][0]], 
                                                           Y[node_ids_pairs[ido][1]]);
                                ido++;
                            }

                    // Apply Discrete Fourier Transformation
                    Dft.applyDFT(_C,_FC+counter*rc);

                    // increment interaction counter
                    counter++;
                }
            }
108
        }
109 110
    }
    if (counter != ninteractions)
111
        throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions));
112 113 114 115 116 117 118 119 120

    // Free _C
    delete [] _C;

    // store FC
    counter = 0;
    // reduce storage if real valued kernel
    const unsigned int opt_rc = rc/2+1;
    // allocate M2L
121
    FC = new FComplex<FReal>[343 * opt_rc];
122 123 124 125 126

    for (int i=-3; i<=3; ++i)
        for (int j=-3; j<=3; ++j)
            for (int k=-3; k<=3; ++k) {
                const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
127
                if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) {
128 129 130 131 132 133
                    FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC + counter*rc), 
                                  reinterpret_cast<FReal*>(FC + idx*opt_rc));
                    counter++;
                } else { 
                    FBlas::c_setzero(opt_rc, reinterpret_cast<FReal*>(FC + idx*opt_rc));
                }
134 135
      }

136
    if (counter != ninteractions)
137
        throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions));
138
    delete [] _FC;      
139 140 141
}


142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158


/**
 * @author Pierre Blanchard (pierre.blanchard@inria.fr)
 * @class FUnifM2LHandler
 * Please read the license
 *
 * This class precomputes and efficiently stores the M2L operators
 * \f$[K_1,\dots,K_{316}]\f$ for all (\f$7^3-3^3 = 316\f$ possible interacting
 * cells in the far-field) interactions for the Lagrange interpolation
 * approach. The resulting Lagrange operators have a Circulant Toeplitz 
 * structure and can be applied efficiently in Fourier Space. Hence, the
 * originally \f$K_t\f$ of size \f$\ell^3\times\ell^3\f$ times \f$316\f$ for
 * all interactions is reduced to \f$316\f$ \f$C_t\f$, each of size \f$2\ell-1\f$.
 *
 * @tparam ORDER interpolation order \f$\ell\f$
 */
159
template < class FReal, int ORDER, KERNEL_FUNCTION_TYPE TYPE> class FUnifM2LHandler;
160 161

/*! Specialization for homogeneous kernel functions */
162 163
template < class FReal, int ORDER>
class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS>
164
{
165 166 167 168 169 170
    enum {order = ORDER,
          nnodes = TensorTraits<ORDER>::nnodes,
          ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
          rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)};

    /// M2L Operators (stored in Fourier space)
171
    FSmartPointer< FComplex<FReal>,FSmartArrayMemory> FC;
172 173

    /// Utils
174
    typedef FUnifTensor<FReal,ORDER> TensorType;
175 176 177 178
    unsigned int node_diff[nnodes*nnodes];

    /// DFT specific
    static const int dimfft = 1; // unidim FFT since fully circulant embedding
179
    typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
180 181 182
    DftClass Dft;
    const unsigned int opt_rc; // specific to real valued kernel

183 184
    /// Leaf level separation criterion
    const int LeafLevelSeparationCriterion;
185 186 187 188 189 190 191 192 193 194 195

    static const std::string getFileName()
    {
        const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f');
        std::stringstream stream;
        stream << "m2l_k"/*<< MatrixKernelClass::getID()*/ << "_" << precision_type
                     << "_o" << order << ".bin";
        return stream.str();
    }

    
196
public:
197
    template <typename MatrixKernelClass>
198
    FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1)
199
        : FC(nullptr), Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
200
    {    
201

202 203
        // initialize root node ids
        TensorType::setNodeIdsDiff(node_diff);
204

205 206
        // Compute and Set M2L Operators
        ComputeAndSet(MatrixKernel);
207

208
    }
209

210 211 212 213
    /*
     * Copy constructor
     */
    FUnifM2LHandler(const FUnifM2LHandler& other)
214
      : FC(other.FC), Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
215 216 217
    {    
        // copy node_diff
        memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
218 219
    }

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
    ~FUnifM2LHandler()
    { }

    /**
     * Computes and sets the matrix \f$C_t\f$
     */
    template <typename MatrixKernelClass>
    void ComputeAndSet(const MatrixKernelClass *const MatrixKernel)
    {
        // measure time
        FTic time; time.tic();
        // check if aready set
        if (FC) throw std::runtime_error("M2L operator already set");
        // Compute matrix of interactions
        const FReal ReferenceCellWidth = FReal(2.);
235
        FComplex<FReal>* pFC = NULL;
236
        Compute<FReal,order>(MatrixKernel,ReferenceCellWidth,pFC,LeafLevelSeparationCriterion);
237 238 239
        FC.assign(pFC);

        // Compute memory usage
240
        unsigned long sizeM2L = 343*opt_rc*sizeof(FComplex<FReal>);
241 242 243 244 245 246


        // write info
        std::cout << "Compute and set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" B) in "
                  << time.tacAndElapsed() << "sec."   << std::endl;
    }
247 248 249 250

    unsigned long long getMemory() const {
        return 343*opt_rc*sizeof(FComplex<FReal>);
    }        
251 252 253 254 255 256 257 258

    /**
    * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be
    * seen as part of the L2L operation.
    *
    * @param[in] X transformed local expansion of size \f$r\f$
    * @param[out] x local expansion of size \f$\ell^3\f$
    */
259
    void unapplyZeroPaddingAndDFT(const FComplex<FReal> *const FX, FReal *const x) const
260 261 262 263
    { 
        FReal Px[rc];
        FBlas::setzero(rc,Px);
        // Apply forward Discrete Fourier Transform
264
        Dft.applyIDFTNorm(FX,Px);
265 266 267 268
        // Unapply Zero Padding
        for (unsigned int j=0; j<nnodes; ++j)
            x[j]+=Px[node_diff[nnodes-j-1]];
    }
269

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
    /**
     * The M2L operation \f$X+=C_tY\f$ is performed in Fourier space by 
     * application of the convolution theorem (i.e. \f$FX+=FC_t:FY\f$), 
     * where \f$FY\f$ is the transformed multipole expansion and 
     * \f$FX\f$ is the transformed local expansion, both padded with zeros and
     * of size \f$r_c=(2\times ORDER-1)^3\f$ or \f$r_c^{opt}=\frac{r_c}{2}+1\f$ 
     * for real valued kernels. The index \f$t\f$ denotes the transfer vector 
     * of the target cell to the source cell.
     *
     * @param[in] transfer transfer vector
     * @param[in] FY transformed multipole expansion
     * @param[out] FX transformed local expansion
     * @param[in] CellWidth needed for the scaling of the compressed M2L operators which are based on a homogeneous matrix kernel computed for the reference cell width \f$w=2\f$, ie in \f$[-1,1]^3\f$.
     */
    void applyFC(const unsigned int idx, const unsigned int, const FReal scale,
285
                 const FComplex<FReal> *const FY, FComplex<FReal> *const FX) const
286 287 288
    {
        // Perform entrywise product manually
        for (unsigned int j=0; j<opt_rc; ++j){
289
            FX[j].addMul(FComplex<FReal>(scale*FC[idx*opt_rc + j].getReal(),
290 291 292 293
                                  scale*FC[idx*opt_rc + j].getImag()),
                         FY[j]);
        }
    }
294 295


296 297 298 299 300 301 302
    /**
     * Transform densities \f$Y= DFT(y)\f$ of a source cell. This operation
     * can be seen as part of the M2M operation.
     *
     * @param[in] y multipole expansion of size \f$\ell^3\f$
     * @param[out] Y transformed multipole expansion of size \f$r\f$
     */
303
    void applyZeroPaddingAndDFT(FReal *const y, FComplex<FReal> *const FY) const
304 305 306 307 308 309 310 311 312
    {
        FReal Py[rc];
        FBlas::setzero(rc,Py);
        // Apply Zero Padding
        for (unsigned int i=0; i<nnodes; ++i)
            Py[node_diff[i*nnodes]]=y[i];
        // Apply forward Discrete Fourier Transform
        Dft.applyDFT(Py,FY);
    }
313 314 315 316 317


};


318
/*! Specialization for non-homogeneous kernel functions */
319 320
template <class FReal, int ORDER>
class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS>
321
{
322 323 324 325 326 327
    enum {order = ORDER,
          nnodes = TensorTraits<ORDER>::nnodes,
          ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
          rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)};

    /// M2L Operators (stored in Fourier space for each level)
328
    FSmartPointer< FComplex<FReal>*,FSmartArrayMemory> FC;
329 330 331 332
    /// Homogeneity specific variables
    const unsigned int TreeHeight;
    const FReal RootCellWidth;
    /// Utils
333
    typedef FUnifTensor<FReal,ORDER> TensorType;
334 335 336
    unsigned int node_diff[nnodes*nnodes];
    /// DFT specific
    static const int dimfft = 1; // unidim FFT since fully circulant embedding
337
    typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast real-valued Discrete Fourier Transformator
338 339 340
    DftClass Dft;
    const unsigned int opt_rc; // specific to real valued kernel

341 342
    /// Leaf level separation criterion
    const int LeafLevelSeparationCriterion;
343 344 345 346 347 348 349 350 351

    static const std::string getFileName()
    {
        const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f');
        std::stringstream stream;
        stream << "m2l_k"/*<< MatrixKernelClass::getID()*/ << "_" << precision_type
               << "_o" << order << ".bin";
        return stream.str();
    }
352

353
    
354 355
public:
    template <typename MatrixKernelClass>
356
    FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1)
357 358
        : TreeHeight(inTreeHeight),
          RootCellWidth(inRootCellWidth),
359
          Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
360 361 362 363 364 365
    {

        // initialize root node ids
        TensorType::setNodeIdsDiff(node_diff);
        
        // init M2L operators
366
        FC = new FComplex<FReal>*[TreeHeight];
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
        FC[0]       = NULL; FC[1]       = NULL;
        for (unsigned int l=2; l<TreeHeight; ++l) FC[l] = NULL;

        // Compute and Set M2L Operators
        ComputeAndSet(MatrixKernel);

    }

    /*
     * Copy constructor
     */
    FUnifM2LHandler(const FUnifM2LHandler& other)
      : FC(other.FC),
        TreeHeight(other.TreeHeight),
        RootCellWidth(other.RootCellWidth),
382
        Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
383 384 385
    {    
        // copy node_diff
        memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
386
    }
387 388 389



390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
    ~FUnifM2LHandler()
    {
        for (unsigned int l=0; l<TreeHeight; ++l) 
            if (FC[l] != NULL) delete [] FC[l];
    }

    /**
     * Computes and sets the matrix \f$C_t\f$
     */
    template <typename MatrixKernelClass>
    void ComputeAndSet(const MatrixKernelClass *const MatrixKernel)
    {
        // measure time
        FTic time; time.tic();

        // Compute matrix of interactions at each level !! (since non homog)
        FReal CellWidth = RootCellWidth / FReal(2.); // at level 1
        CellWidth /= FReal(2.);                      // at level 2
        for (unsigned int l=2; l<TreeHeight; ++l) {

410 411 412
            // Determine separation criteria wrt level
            const int SeparationCriterion = (l != TreeHeight-1 ? 1 : LeafLevelSeparationCriterion);

413 414
            // check if already set
            if (FC[l]) throw std::runtime_error("M2L operator already set");
415
            Compute<FReal,order>(MatrixKernel,CellWidth,FC[l],SeparationCriterion);
416
            CellWidth /= FReal(2.);                    // at level l+1 
417

418
        }
419

420
        // Compute memory usage
421
        unsigned long sizeM2L = (TreeHeight-2)*343*opt_rc*sizeof(FComplex<FReal>);
422 423 424 425 426 427

        // write info
        std::cout << "Compute and set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" B) in "
                  << time.tacAndElapsed() << "sec."   << std::endl;
    }

428 429 430
    unsigned long long getMemory() const {
        return (TreeHeight-2)*343*opt_rc*sizeof(FComplex<FReal>);
    }   
431 432 433 434 435 436 437 438

    /**
    * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be
    * seen as part of the L2L operation.
    *
    * @param[in] X transformed local expansion of size \f$r\f$
    * @param[out] x local expansion of size \f$\ell^3\f$
    */
439
    void unapplyZeroPaddingAndDFT(const FComplex<FReal> *const FX, FReal *const x) const
440 441 442 443
    { 
        FReal Px[rc];
        FBlas::setzero(rc,Px);
        // Apply forward Discrete Fourier Transform
444
        Dft.applyIDFTNorm(FX,Px);
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
        // Unapply Zero Padding
        for (unsigned int j=0; j<nnodes; ++j)
            x[j]+=Px[node_diff[nnodes-j-1]];
    }

    /**
     * The M2L operation \f$X+=C_tY\f$ is performed in Fourier space by 
     * application of the convolution theorem (i.e. \f$FX+=FC_t:FY\f$), 
     * where \f$FY\f$ is the transformed multipole expansion and 
     * \f$FX\f$ is the transformed local expansion, both padded with zeros and
     * of size \f$r_c=(2\times ORDER-1)^3\f$ or \f$r_c^{opt}=\frac{r_c}{2}+1\f$ 
     * for real valued kernels. The index \f$t\f$ denotes the transfer vector 
     * of the target cell to the source cell.
     *
     * @param[in] transfer transfer vector
     * @param[in] FY transformed multipole expansion
     * @param[out] FX transformed local expansion
     * @param[in] CellWidth needed for the scaling of the compressed M2L operators which are based on a homogeneous matrix kernel computed for the reference cell width \f$w=2\f$, ie in \f$[-1,1]^3\f$.
     */
    void applyFC(const unsigned int idx, const unsigned int TreeLevel, const FReal,
465
                 const FComplex<FReal> *const FY, FComplex<FReal> *const FX) const
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
    {
        // Perform entrywise product manually
        for (unsigned int j=0; j<opt_rc; ++j){
            FX[j].addMul(FC[TreeLevel][idx*opt_rc + j],FY[j]);
        }
    }


    /**
     * Transform densities \f$Y= DFT(y)\f$ of a source cell. This operation
     * can be seen as part of the M2M operation.
     *
     * @param[in] y multipole expansion of size \f$\ell^3\f$
     * @param[out] Y transformed multipole expansion of size \f$r\f$
     */
481
    void applyZeroPaddingAndDFT(FReal *const y, FComplex<FReal> *const FY) const
482 483 484 485 486 487 488 489 490
    {
        FReal Py[rc];
        FBlas::setzero(rc,Py);
        // Apply Zero Padding
        for (unsigned int i=0; i<nnodes; ++i)
            Py[node_diff[i*nnodes]]=y[i];
        // Apply forward Discrete Fourier Transform
        Dft.applyDFT(Py,FY);
    }
491 492


493
};
494 495 496 497 498


#endif // FUNIFM2LHANDLER_HPP

// [--END--]