FChebSymM2LHandler.hpp 24.1 KB
Newer Older
BRAMAS Berenger's avatar
BRAMAS Berenger committed
1
// ===================================================================================
2
// Copyright ScalFmm 2011 INRIA
3 4 5 6 7 8 9 10 11 12 13 14
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.  
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info". 
// "http://www.gnu.org/licenses".
BRAMAS Berenger's avatar
BRAMAS Berenger committed
15
// ===================================================================================
16 17 18 19
#ifndef FCHEBSYMM2LHANDLER_HPP
#define FCHEBSYMM2LHANDLER_HPP

#include <climits>
COULAUD Olivier's avatar
COULAUD Olivier committed
20
#include <sstream>
21

COULAUD Olivier's avatar
COULAUD Olivier committed
22
#include "Utils/FBlas.hpp"
23

COULAUD Olivier's avatar
COULAUD Olivier committed
24
#include "FChebTensor.hpp"
25
#include "../Interpolation/FInterpSymmetries.hpp"
COULAUD Olivier's avatar
COULAUD Olivier committed
26
#include "FChebM2LHandler.hpp"
27 28 29 30 31 32 33

/**
 * @author Matthias Messner (matthias.matthias@inria.fr)
 * Please read the license
 */


Matthias Messner's avatar
Matthias Messner committed
34 35
/*!  Choose either \a FULLY_PIVOTED_ACASVD or \a PARTIALLY_PIVOTED_ACASVD or
	\a ONLY_SVD.
COULAUD Olivier's avatar
COULAUD Olivier committed
36
 */
37
//#define ONLY_SVD
Matthias Messner's avatar
Matthias Messner committed
38
//#define FULLY_PIVOTED_ACASVD
39
#define PARTIALLY_PIVOTED_ACASVD
Matthias Messner's avatar
Matthias Messner committed
40 41


Matthias Messner's avatar
Matthias Messner committed
42 43


Matthias Messner's avatar
Matthias Messner committed
44 45 46 47
/*!  The fully pivoted adaptive cross approximation (fACA) compresses a
	far-field interaction as \f$K\sim UV^\top\f$. The fACA requires all entries
	to be computed beforehand, then the compression follows in
	\f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy
Matthias Messner's avatar
Matthias Messner committed
48
	\f$\varepsilon\f$. The matrix K will be destroyed as a result.
Matthias Messner's avatar
Matthias Messner committed
49 50

	@param[in] K far-field to be approximated
51 52
	@param[in] nx number of rows
	@param[in] ny number of cols
Matthias Messner's avatar
Matthias Messner committed
53 54 55 56
	@param[in] eps prescribed accuracy
	@param[out] U matrix containing \a k column vectors
	@param[out] V matrix containing \a k row vectors
	@param[out] k final low-rank depends on prescribed accuracy \a eps
COULAUD Olivier's avatar
COULAUD Olivier committed
57
 */
58
void fACA(FReal *const K,
COULAUD Olivier's avatar
COULAUD Olivier committed
59 60
		const unsigned int nx, const unsigned int ny,
		const double eps, FReal* &U, FReal* &V, unsigned int &k)
Matthias Messner's avatar
Matthias Messner committed
61 62
{
	// control vectors (true if not used, false if used)
63 64 65 66
	bool *const r = new bool[nx];
	bool *const c = new bool[ny];
	for (unsigned int i=0; i<nx; ++i) r[i] = true;
	for (unsigned int j=0; j<ny; ++j) c[j] = true;
Matthias Messner's avatar
Matthias Messner committed
67 68 69

	// compute Frobenius norm of original Matrix K
	FReal norm2K = 0;
70 71 72
	for (unsigned int j=0; j<ny; ++j) {
		const FReal *const colK = K + j*nx;
		norm2K += FBlas::scpr(nx, colK, colK);
Matthias Messner's avatar
Matthias Messner committed
73 74 75 76
	}

	// initialize rank k and UV'
	k = 0;
COULAUD Olivier's avatar
COULAUD Olivier committed
77
	const unsigned int maxk = (nx + ny) / 2;
78 79 80 81
	U = new FReal[nx * maxk];
	V = new FReal[ny * maxk];
	FBlas::setzero(nx*maxk, U);
	FBlas::setzero(ny*maxk, V);
Matthias Messner's avatar
Matthias Messner committed
82 83 84 85 86
	FReal norm2R;

	////////////////////////////////////////////////
	// start fully pivoted ACA
	do {
COULAUD Olivier's avatar
COULAUD Olivier committed
87

Matthias Messner's avatar
Matthias Messner committed
88 89
		// find max(K) and argmax(K)
		FReal maxK = 0.;
COULAUD Olivier's avatar
COULAUD Olivier committed
90
		unsigned int pi=0, pj=0;
91
		for (unsigned int j=0; j<ny; ++j)
Matthias Messner's avatar
Matthias Messner committed
92
			if (c[j]) {
93 94
				const FReal *const colK = K + j*nx;
				for (unsigned int i=0; i<nx; ++i)
Matthias Messner's avatar
Matthias Messner committed
95 96 97 98 99 100 101 102
					if (r[i] && maxK < FMath::Abs(colK[i])) {
						maxK = FMath::Abs(colK[i]);
						pi = i; 
						pj = j;
					}
			}

		// copy pivot cross into U and V
103 104 105 106 107
		FReal *const colU = U + k*nx;
		FReal *const colV = V + k*ny;
		const FReal pivot = K[pj*nx + pi];
		for (unsigned int i=0; i<nx; ++i) if (r[i]) colU[i] = K[pj*nx + i];
		for (unsigned int j=0; j<ny; ++j) if (c[j]) colV[j] = K[j *nx + pi] / pivot;
COULAUD Olivier's avatar
COULAUD Olivier committed
108 109

		// don't use these cols and rows anymore
Matthias Messner's avatar
Matthias Messner committed
110 111
		c[pj] = false;
		r[pi] = false;
COULAUD Olivier's avatar
COULAUD Olivier committed
112

Matthias Messner's avatar
Matthias Messner committed
113
		// subtract k-th outer product from K
114
		for (unsigned int j=0; j<ny; ++j)
Matthias Messner's avatar
Matthias Messner committed
115
			if (c[j]) {
116 117
				FReal *const colK = K + j*nx;
				FBlas::axpy(nx, FReal(-1. * colV[j]), colU, colK);
Matthias Messner's avatar
Matthias Messner committed
118 119 120
			}

		// compute Frobenius norm of updated K
COULAUD Olivier's avatar
COULAUD Olivier committed
121
		norm2R = 0.0;
122
		for (unsigned int j=0; j<ny; ++j)
Matthias Messner's avatar
Matthias Messner committed
123
			if (c[j]) {
124 125
				const FReal *const colK = K + j*nx;
				norm2R += FBlas::scpr(nx, colK, colK);
Matthias Messner's avatar
Matthias Messner committed
126 127 128
			}

		// increment rank k
COULAUD Olivier's avatar
COULAUD Olivier committed
129 130
		++k ;

Matthias Messner's avatar
Matthias Messner committed
131 132
	} while (norm2R > eps*eps * norm2K);
	////////////////////////////////////////////////
133 134 135

	delete [] r;
	delete [] c;
Matthias Messner's avatar
Matthias Messner committed
136 137 138
}


Matthias Messner's avatar
Matthias Messner committed
139 140 141 142




143 144 145 146 147





Matthias Messner's avatar
Matthias Messner committed
148 149 150 151 152 153 154
/*!  The partially pivoted adaptive cross approximation (pACA) compresses a
	far-field interaction as \f$K\sim UV^\top\f$. The pACA computes the matrix
	entries on the fly, as they are needed. The compression follows in
	\f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy
	\f$\varepsilon\f$. The matrix K will be destroyed as a result.

	@tparam ComputerType the functor type which allows to compute matrix entries
COULAUD Olivier's avatar
COULAUD Olivier committed
155

Matthias Messner's avatar
Matthias Messner committed
156 157 158 159 160 161 162
	@param[in] Computer the entry-computer functor
	@param[in] eps prescribed accuracy
	@param[in] nx number of rows
	@param[in] ny number of cols
	@param[out] U matrix containing \a k column vectors
	@param[out] V matrix containing \a k row vectors
	@param[out] k final low-rank depends on prescribed accuracy \a eps
COULAUD Olivier's avatar
COULAUD Olivier committed
163
 */
Matthias Messner's avatar
Matthias Messner committed
164
template <typename ComputerType>
165
void pACA(const ComputerType& Computer,
COULAUD Olivier's avatar
COULAUD Olivier committed
166 167
		const unsigned int nx, const unsigned int ny,
		const FReal eps, FReal* &U, FReal* &V, unsigned int &k)
Matthias Messner's avatar
Matthias Messner committed
168 169 170 171 172 173
{
	// control vectors (true if not used, false if used)
	bool *const r = new bool[nx];
	bool *const c = new bool[ny];
	for (unsigned int i=0; i<nx; ++i) r[i] = true;
	for (unsigned int j=0; j<ny; ++j) c[j] = true;
COULAUD Olivier's avatar
COULAUD Olivier committed
174

Matthias Messner's avatar
Matthias Messner committed
175 176
	// initialize rank k and UV'
	k = 0;
177
	const FReal eps2 = eps * eps;
COULAUD Olivier's avatar
COULAUD Olivier committed
178
	const unsigned int maxk = (nx + ny) / 2;
Matthias Messner's avatar
Matthias Messner committed
179 180
	U = new FReal[nx * maxk];
	V = new FReal[ny * maxk];
COULAUD Olivier's avatar
COULAUD Olivier committed
181

Matthias Messner's avatar
Matthias Messner committed
182 183 184
	// initialize norm
	FReal norm2S(0.);
	FReal norm2uv(0.);
COULAUD Olivier's avatar
COULAUD Olivier committed
185

Matthias Messner's avatar
Matthias Messner committed
186 187 188
	////////////////////////////////////////////////
	// start partially pivoted ACA
	unsigned int J = 0, I = 0;
COULAUD Olivier's avatar
COULAUD Olivier committed
189

Matthias Messner's avatar
Matthias Messner committed
190 191 192
	do {
		FReal *const colU = U + nx*k;
		FReal *const colV = V + ny*k;
COULAUD Olivier's avatar
COULAUD Olivier committed
193

Matthias Messner's avatar
Matthias Messner committed
194 195 196 197 198 199 200
		////////////////////////////////////////////
		// compute row I and its residual
		Computer(I, I+1, 0, ny, colV);
		r[I] = false;
		for (unsigned int l=0; l<k; ++l) {
			FReal *const u = U + nx*l;
			FReal *const v = V + ny*l;
201
			FBlas::axpy(ny, FReal(-1. * u[I]), v, colV);
Matthias Messner's avatar
Matthias Messner committed
202
		}
COULAUD Olivier's avatar
COULAUD Olivier committed
203

Matthias Messner's avatar
Matthias Messner committed
204 205
		// find max of residual and argmax
		FReal maxval = 0.;
206 207 208 209
		for (unsigned int j=0; j<ny; ++j) {
			const FReal abs_val = FMath::Abs(colV[j]);
			if (c[j] && maxval < abs_val) {
				maxval = abs_val;
Matthias Messner's avatar
Matthias Messner committed
210 211
				J = j;
			}
212 213
		}
		// find pivot and scale column of V
214 215
		const FReal pivot = FReal(1.) / colV[J];
		FBlas::scal(ny, pivot, colV);
COULAUD Olivier's avatar
COULAUD Olivier committed
216

Matthias Messner's avatar
Matthias Messner committed
217 218 219 220 221 222 223
		////////////////////////////////////////////
		// compute col J and its residual
		Computer(0, nx, J, J+1, colU);
		c[J] = false;
		for (unsigned int l=0; l<k; ++l) {
			FReal *const u = U + nx*l;
			FReal *const v = V + ny*l;
224
			FBlas::axpy(nx, FReal(-1. * v[J]), u, colU);
Matthias Messner's avatar
Matthias Messner committed
225
		}
COULAUD Olivier's avatar
COULAUD Olivier committed
226

Matthias Messner's avatar
Matthias Messner committed
227
		// find max of residual and argmax
COULAUD Olivier's avatar
COULAUD Olivier committed
228
		maxval = 0.0;
229 230 231 232
		for (unsigned int i=0; i<nx; ++i) {
			const FReal abs_val = FMath::Abs(colU[i]);
			if (r[i] && maxval < abs_val) {
				maxval = abs_val;
Matthias Messner's avatar
Matthias Messner committed
233 234
				I = i;
			}
235
		}
COULAUD Olivier's avatar
COULAUD Olivier committed
236

Matthias Messner's avatar
Matthias Messner committed
237
		////////////////////////////////////////////
COULAUD Olivier's avatar
COULAUD Olivier committed
238
		// increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk
Matthias Messner's avatar
Matthias Messner committed
239 240 241 242 243
		FReal normuuvv(0.);
		for (unsigned int l=0; l<k; ++l)
			normuuvv += FBlas::scpr(nx, colU, U + nx*l) * FBlas::scpr(ny, V + ny*l, colV);
		norm2uv = FBlas::scpr(nx, colU, colU) * FBlas::scpr(ny, colV, colV);
		norm2S += norm2uv + 2*normuuvv;
COULAUD Olivier's avatar
COULAUD Olivier committed
244

Matthias Messner's avatar
Matthias Messner committed
245 246
		////////////////////////////////////////////
		// increment low-rank
COULAUD Olivier's avatar
COULAUD Olivier committed
247
		++k;
Matthias Messner's avatar
Matthias Messner committed
248

249
	} while (norm2uv > eps2 * norm2S);
COULAUD Olivier's avatar
COULAUD Olivier committed
250

Matthias Messner's avatar
Matthias Messner committed
251 252 253 254 255 256
	delete [] r;
	delete [] c;
}



Matthias Messner's avatar
Matthias Messner committed
257 258 259 260 261
/*!  Precomputes the 16 far-field interactions (due to symmetries in their
  arrangement all 316 far-field interactions can be represented by
  permutations of the 16 we compute in this function). Depending on whether
  FACASVD is defined or not, either ACA+SVD or only SVD is used to compress
  them. */
262
template <int ORDER, typename MatrixKernelClass>
263
static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth,
COULAUD Olivier's avatar
COULAUD Olivier committed
264
		const FReal Epsilon, FReal* K[343], int LowRank[343])
265
{
COULAUD Olivier's avatar
COULAUD Olivier committed
266 267
	//	std::cout << "\nComputing 16 far-field interactions (l=" << ORDER << ", eps=" << Epsilon
	//						<< ") for cells of width w = " << CellWidth << std::endl;
268

269
	static const unsigned int nnodes = ORDER*ORDER*ORDER;
270 271 272 273

	// interpolation points of source (Y) and target (X) cell
	FPoint X[nnodes], Y[nnodes];
	// set roots of target cell (X)
274
	FChebTensor<ORDER>::setRoots(FPoint(0.,0.,0.), CellWidth, X);
275 276 277 278
	// temporary matrix
	FReal* U = new FReal [nnodes*nnodes];

	// needed for the SVD
COULAUD Olivier's avatar
COULAUD Olivier committed
279
	 int INFO;
280 281 282 283
	const unsigned int LWORK = 2 * (3*nnodes + nnodes);
	FReal *const WORK = new FReal [LWORK];
	FReal *const VT = new FReal [nnodes*nnodes];
	FReal *const S = new FReal [nnodes];
Matthias Messner's avatar
Matthias Messner committed
284 285 286 287


	// initialize timer
	FTic time;
288 289 290 291 292
	double overall_time(0.);
	double elapsed_time(0.);

	// initialize rank counter
	unsigned int overall_rank = 0;
Matthias Messner's avatar
Matthias Messner committed
293

294 295 296 297 298
	unsigned int counter = 0;
	for (int i=2; i<=3; ++i) {
		for (int j=0; j<=i; ++j) {
			for (int k=0; k<=j; ++k) {

Matthias Messner's avatar
Matthias Messner committed
299
				// assemble matrix and apply weighting matrices
300 301
				const FPoint cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k));
				FChebTensor<ORDER>::setRoots(cy, CellWidth, Y);
Matthias Messner's avatar
Matthias Messner committed
302 303
				FReal weights[nnodes];
				FChebTensor<ORDER>::setRootOfWeights(weights);
304

Matthias Messner's avatar
Matthias Messner committed
305
				// now the entry-computer is responsible for weighting the matrix entries
306
				EntryComputer<MatrixKernelClass> Computer(MatrixKernel, nnodes, X, nnodes, Y, weights);
Matthias Messner's avatar
Matthias Messner committed
307 308 309 310 311 312 313 314

				// start timer
				time.tic();

#if (defined ONLY_SVD || defined FULLY_PIVOTED_ACASVD)
				Computer(0, nnodes, 0, nnodes, U);
#endif
				/*
315 316 317 318 319 320 321
				// applying weights ////////////////////////////////////////
				FReal weights[nnodes];
				FChebTensor<ORDER>::setRootOfWeights(weights);
				for (unsigned int n=0; n<nnodes; ++n) {
					FBlas::scal(nnodes, weights[n], U + n,  nnodes); // scale rows
					FBlas::scal(nnodes, weights[n], U + n * nnodes); // scale cols
				}
COULAUD Olivier's avatar
COULAUD Olivier committed
322
				 */
Matthias Messner's avatar
Matthias Messner committed
323 324 325 326 327 328 329 330 331

				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				// ALL PREPROC FLAGS ARE SET ON TOP OF THIS FILE !!! /////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////

332

Matthias Messner's avatar
Matthias Messner committed
333 334

				//////////////////////////////////////////////////////////////
Matthias Messner's avatar
Matthias Messner committed
335
#if (defined FULLY_PIVOTED_ACASVD || defined PARTIALLY_PIVOTED_ACASVD) ////////////
Matthias Messner's avatar
Matthias Messner committed
336 337
				FReal *UU, *VV;
				unsigned int rank;
Matthias Messner's avatar
Matthias Messner committed
338 339

#ifdef FULLY_PIVOTED_ACASVD
340
				fACA(U,        nnodes, nnodes, Epsilon, UU, VV, rank);
Matthias Messner's avatar
Matthias Messner committed
341
#else
342
				pACA(Computer, nnodes, nnodes, Epsilon, UU, VV, rank);
Matthias Messner's avatar
Matthias Messner committed
343
#endif 
Matthias Messner's avatar
Matthias Messner committed
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372

				// QR decomposition
				FReal* phi = new FReal [rank*rank];
				{
					// QR of U and V
					FReal* tauU = new FReal [rank];
					INFO = FBlas::geqrf(nnodes, rank, UU, tauU, LWORK, WORK);
					assert(INFO==0);
					FReal* tauV = new FReal [rank];
					INFO = FBlas::geqrf(nnodes, rank, VV, tauV, LWORK, WORK);
					assert(INFO==0);
					// phi = Ru Rv'
					FReal* rU = new FReal [2 * rank*rank];
					FReal* rV = rU + rank*rank;
					FBlas::setzero(2 * rank*rank, rU);
					for (unsigned int l=0; l<rank; ++l) {
						FBlas::copy(l+1, UU + l*nnodes, rU + l*rank);
						FBlas::copy(l+1, VV + l*nnodes, rV + l*rank);
					}
					FBlas::gemmt(rank, rank, rank, FReal(1.), rU, rank, rV, rank, phi, rank);
					delete [] rU;
					// get Qu and Qv
					INFO = FBlas::orgqr(nnodes, rank, UU, tauU, LWORK, WORK);
					assert(INFO==0);
					INFO = FBlas::orgqr(nnodes, rank, VV, tauV, LWORK, WORK);
					assert(INFO==0);
					delete [] tauU;
					delete [] tauV;
				}
COULAUD Olivier's avatar
COULAUD Olivier committed
373

Matthias Messner's avatar
Matthias Messner committed
374 375 376 377 378
				const unsigned int aca_rank = rank;

				// SVD
				{
					INFO = FBlas::gesvd(aca_rank, aca_rank, phi, S, VT, aca_rank, LWORK, WORK);
COULAUD Olivier's avatar
COULAUD Olivier committed
379 380 381 382 383
					if (INFO!=0){
						std::stringstream stream;
						stream << INFO;
						throw std::runtime_error("SVD did not converge with " + stream.str());
					}
Matthias Messner's avatar
Matthias Messner committed
384 385
					rank = getRank(S, aca_rank, Epsilon);
				}					
COULAUD Olivier's avatar
COULAUD Olivier committed
386

Matthias Messner's avatar
Matthias Messner committed
387 388 389 390 391
				const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);

				// store
				{
					// allocate
392
					assert(K[idx]==nullptr);
Matthias Messner's avatar
Matthias Messner committed
393
					K[idx] = new FReal [2*rank*nnodes];
COULAUD Olivier's avatar
COULAUD Olivier committed
394

Matthias Messner's avatar
Matthias Messner committed
395
					// set low rank
COULAUD Olivier's avatar
COULAUD Olivier committed
396 397
					LowRank[idx] = static_cast<int>(rank);

Matthias Messner's avatar
Matthias Messner committed
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
					// (U Sigma)
					for (unsigned int r=0; r<rank; ++r)
						FBlas::scal(aca_rank, S[r], phi + r*aca_rank);

					// Qu (U Sigma) 
					FBlas::gemm(nnodes, aca_rank, rank, FReal(1.), UU, nnodes, phi, aca_rank, K[idx], nnodes);
					delete [] phi;

					// Vt -> V and then Qu V
					FReal *const V = new FReal [aca_rank * rank];
					for (unsigned int r=0; r<rank; ++r)
						FBlas::copy(aca_rank, VT + r, aca_rank, V + r*aca_rank, 1);
					FBlas::gemm(nnodes, aca_rank, rank, FReal(1.), VV, nnodes, V, aca_rank, K[idx] + rank*nnodes, nnodes);
					delete [] V;
				}

				//// store recompressed UV
				//const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
				//assert(K[idx]==NULL);
				//K[idx] = new FReal [2*rank*nnodes];
				//LowRank[idx] = rank;
				//FBlas::copy(rank*nnodes, UU,  K[idx]);
				//FBlas::copy(rank*nnodes, VV,  K[idx] + rank*nnodes);
COULAUD Olivier's avatar
COULAUD Olivier committed
421

Matthias Messner's avatar
Matthias Messner committed
422 423 424
				delete [] UU;
				delete [] VV;

Matthias Messner's avatar
Matthias Messner committed
425 426
				elapsed_time = time.tacAndElapsed(); 
				overall_time += elapsed_time;
427
				overall_rank += rank;
COULAUD Olivier's avatar
COULAUD Olivier committed
428 429
				// std::cout << "(" << i << "," << j << "," << k << ") " << idx <<
				// 	", low rank = " << rank << " (" << aca_rank << ") in " << elapsed_time << "s" << std::endl;
Matthias Messner's avatar
Matthias Messner committed
430

Matthias Messner's avatar
Matthias Messner committed
431 432 433 434 435 436 437 438 439
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				// ALL PREPROC FLAGS ARE SET ON TOP OF THIS FILE !!! /////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////

#elif defined ONLY_SVD
440
				// truncated singular value decomposition of matrix
Matthias Messner's avatar
Matthias Messner committed
441
				INFO = FBlas::gesvd(nnodes, nnodes, U, S, VT, nnodes, LWORK, WORK);
COULAUD Olivier's avatar
COULAUD Olivier committed
442 443 444 445 446
				if (INFO!=0){
					std::stringstream stream;
					stream << INFO;
					throw std::runtime_error("SVD did not converge with " + stream.str());
				}
447
				const unsigned int rank = getRank<ORDER>(S, Epsilon);
COULAUD Olivier's avatar
COULAUD Olivier committed
448

449 450
				// store 
				const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
451
				assert(K[idx]==nullptr);
452 453 454 455 456 457 458 459
				K[idx] = new FReal [2*rank*nnodes];
				LowRank[idx] = rank;
				for (unsigned int r=0; r<rank; ++r)
					FBlas::scal(nnodes, S[r], U + r*nnodes);
				FBlas::copy(rank*nnodes, U,  K[idx]);
				for (unsigned int r=0; r<rank; ++r)
					FBlas::copy(nnodes, VT + r, nnodes, K[idx] + rank*nnodes + r*nnodes, 1);

Matthias Messner's avatar
Matthias Messner committed
460 461
				elapsed_time = time.tacAndElapsed(); 
				overall_time += elapsed_time;
462
				overall_rank += rank;
COULAUD Olivier's avatar
COULAUD Olivier committed
463 464
				//				std::cout << "(" << i << "," << j << "," << k << ") " << idx <<
				//	", low rank = " << rank << " in " << elapsed_time << "s" << std::endl;
Matthias Messner's avatar
Matthias Messner committed
465 466
#else
#error Either fully-, partially pivoted ACA or only SVD must be defined!
Matthias Messner's avatar
Matthias Messner committed
467 468 469
#endif ///////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////

COULAUD Olivier's avatar
COULAUD Olivier committed
470

Matthias Messner's avatar
Matthias Messner committed
471 472 473 474 475 476 477
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				// ALL PREPROC FLAGS ARE SET ON TOP OF THIS FILE !!! /////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
				//////////////////////////////////////////////////////////////
Matthias Messner's avatar
Matthias Messner committed
478 479


480 481 482 483 484 485 486
				// un-weighting ////////////////////////////////////////////
				for (unsigned int n=0; n<nnodes; ++n) {
					FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + n,               nnodes); // scale rows
					FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + rank*nnodes + n, nnodes); // scale rows
				}
				//////////////////////////////////////////////////////////		

COULAUD Olivier's avatar
COULAUD Olivier committed
487
				++counter;
488 489 490
			}
		}
	}
491 492 493 494 495 496
//	std::cout << "The approximation of the " << counter
//			<< " far-field interactions (overall rank " << overall_rank
//			<< " / " << 16*nnodes
//			<< " , sizeM2L= " << 2*overall_rank*nnodes*sizeof(FReal) << ""
//			<< " / " << 16*nnodes*nnodes*sizeof(FReal) << " B"
//			<< ") took " << overall_time << "s\n" << std::endl;
497 498 499 500 501 502 503 504 505 506 507 508 509 510
	delete [] U;
	delete [] WORK;
	delete [] VT;
	delete [] S;
}









Matthias Messner's avatar
Matthias Messner committed
511
/*!  \class SymmetryHandler 
512

Matthias Messner's avatar
Matthias Messner committed
513 514 515 516 517 518 519
	\brief Deals with all the symmetries in the arrangement of the far-field interactions

	Stores permutation indices and permutation vectors to reduce 316 (7^3-3^3)
  different far-field interactions to 16 only. We use the number 343 (7^3)
  because it allows us to use to associate the far-field interactions based on
  the index \f$t = 7^2(i+3) + 7(j+3) + (k+3)\f$ where \f$(i,j,k)\f$ denotes
  the relative position of the source cell to the target cell. */
520 521 522
template <int ORDER, KERNEL_FUNCTION_TYPE TYPE> class SymmetryHandler;

/*! Specialization for homogeneous kernel functions */
523
template <int ORDER>
524
class SymmetryHandler<ORDER, HOMOGENEOUS>
525
{
COULAUD Olivier's avatar
COULAUD Olivier committed
526
	static const unsigned int nnodes = ORDER*ORDER*ORDER;
527 528 529 530

	// M2L operators
	FReal*    K[343];
	int LowRank[343];
531 532

public:
COULAUD Olivier's avatar
COULAUD Olivier committed
533

534 535 536 537 538 539 540
	// permutation vectors and permutated indices
	unsigned int pvectors[343][nnodes];
	unsigned int pindices[343];


	/** Constructor: with 16 small SVDs */
	template <typename MatrixKernelClass>
541
	SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const FReal Epsilon,
COULAUD Olivier's avatar
COULAUD Olivier committed
542 543
			const FReal, const unsigned int)
			{
544 545
		// init all 343 item to zero, because effectively only 16 exist
		for (unsigned int t=0; t<343; ++t) {
COULAUD Olivier's avatar
COULAUD Olivier committed
546
			K[t]            = nullptr;
547 548
			LowRank[t] = 0;
		}
COULAUD Olivier's avatar
COULAUD Olivier committed
549

550
		// set permutation vector and indices
551
		const FInterpSymmetries<ORDER> Symmetries;
552 553 554 555 556 557 558 559 560 561
		for (int i=-3; i<=3; ++i)
			for (int j=-3; j<=3; ++j)
				for (int k=-3; k<=3; ++k) {
					const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3);
					pindices[idx] = 0;
					if (abs(i)>1 || abs(j)>1 || abs(k)>1)
						pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]);
				}

		// precompute 16 M2L operators
COULAUD Olivier's avatar
COULAUD Olivier committed
562
		const FReal ReferenceCellWidth = FReal(2.0);
563
		precompute<ORDER>(MatrixKernel, ReferenceCellWidth, Epsilon, K, LowRank);
COULAUD Olivier's avatar
COULAUD Olivier committed
564
			}
565 566 567 568 569 570



	/** Destructor */
	~SymmetryHandler()
	{
COULAUD Olivier's avatar
COULAUD Olivier committed
571
		for (unsigned int t=0; t<343; ++t) if (K[t]!=nullptr) delete [] K[t];
572 573
	}

574 575

	/*! return the t-th approximated far-field interactions*/
COULAUD Olivier's avatar
COULAUD Olivier committed
576
	const FReal *const getK(const  int, const unsigned int t) const
577 578 579
	{	return K[t]; }

	/*! return the t-th approximated far-field interactions*/
COULAUD Olivier's avatar
COULAUD Olivier committed
580
	const int getLowRank(const int, const unsigned int t) const
581 582 583 584 585 586 587 588 589 590 591 592 593
	{	return LowRank[t]; }

};






/*! Specialization for non-homogeneous kernel functions */
template <int ORDER>
class SymmetryHandler<ORDER, NON_HOMOGENEOUS>
{
COULAUD Olivier's avatar
COULAUD Olivier committed
594
	static const unsigned int nnodes = ORDER*ORDER*ORDER;
595 596 597 598 599 600 601 602 603

	// Height of octree; needed only in the case of non-homogeneous kernel functions
	const unsigned int TreeHeight;

	// M2L operators for all levels in the octree
	FReal***    K;
	int** LowRank;

public:
COULAUD Olivier's avatar
COULAUD Olivier committed
604

605 606 607 608 609 610 611 612
	// permutation vectors and permutated indices
	unsigned int pvectors[343][nnodes];
	unsigned int pindices[343];


	/** Constructor: with 16 small SVDs */
	template <typename MatrixKernelClass>
	SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const double Epsilon,
COULAUD Olivier's avatar
COULAUD Olivier committed
613 614 615
			const FReal RootCellWidth, const unsigned int inTreeHeight)
			: TreeHeight(inTreeHeight)
			  {
616 617 618
		// init all 343 item to zero, because effectively only 16 exist
		K       = new FReal** [TreeHeight];
		LowRank = new int*    [TreeHeight];
619 620
		K[0]       = nullptr; K[1]       = nullptr;
		LowRank[0] = nullptr; LowRank[1] = nullptr;
621 622 623 624
		for (unsigned int l=2; l<TreeHeight; ++l) {
			K[l]       = new FReal* [343];
			LowRank[l] = new int    [343];
			for (unsigned int t=0; t<343; ++t) {
625
				K[l][t]       = nullptr;
626 627 628
				LowRank[l][t] = 0;
			}
		}
COULAUD Olivier's avatar
COULAUD Olivier committed
629

630 631

		// set permutation vector and indices
632
		const FInterpSymmetries<ORDER> Symmetries;
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
		for (int i=-3; i<=3; ++i)
			for (int j=-3; j<=3; ++j)
				for (int k=-3; k<=3; ++k) {
					const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3);
					pindices[idx] = 0;
					if (abs(i)>1 || abs(j)>1 || abs(k)>1)
						pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]);
				}

		// precompute 16 M2L operators at all levels having far-field interactions
		FReal CellWidth = RootCellWidth / FReal(2.); // at level 1
		CellWidth /= FReal(2.);                      // at level 2
		for (unsigned int l=2; l<TreeHeight; ++l) {
			precompute<ORDER>(MatrixKernel, CellWidth, Epsilon, K[l], LowRank[l]);
			CellWidth /= FReal(2.);                    // at level l+1 
		}
COULAUD Olivier's avatar
COULAUD Olivier committed
649
			  }
650 651 652 653 654 655 656



	/** Destructor */
	~SymmetryHandler()
	{
		for (unsigned int l=0; l<TreeHeight; ++l) {
657 658
			if (K[l]!=nullptr) {
				for (unsigned int t=0; t<343; ++t) if (K[l][t]!=nullptr) delete [] K[l][t];
659 660
				delete [] K[l];
			}
661
			if (LowRank[l]!=nullptr)	delete [] LowRank[l];
662 663 664 665 666 667
		}
		delete [] K;
		delete [] LowRank;
	}

	/*! return the t-th approximated far-field interactions*/
COULAUD Olivier's avatar
COULAUD Olivier committed
668
	const FReal *const getK(const  int l, const unsigned int t) const
669 670 671
	{	return K[l][t]; }

	/*! return the t-th approximated far-field interactions*/
COULAUD Olivier's avatar
COULAUD Olivier committed
672
	const int getLowRank(const  int l, const unsigned int t) const
673 674
	{	return LowRank[l][t]; }

675 676 677 678 679 680 681 682 683 684 685 686 687
};








#include <fstream>
#include <sstream>


Matthias Messner's avatar
Matthias Messner committed
688 689 690
/**
 * Computes, compresses and stores the 16 M2L kernels in a binary file.
 */
691 692 693 694 695 696 697 698
template <int ORDER, typename MatrixKernelClass>
static void ComputeAndCompressAndStoreInBinaryFile(const MatrixKernelClass *const MatrixKernel, const FReal Epsilon)
{
	static const unsigned int nnodes = ORDER*ORDER*ORDER;

	// compute and compress ////////////
	FReal* K[343];
	int LowRank[343];
699
	for (unsigned int idx=0; idx<343; ++idx) { K[idx] = nullptr; LowRank[idx] = 0;	}
700
	precompute<ORDER>(MatrixKernel, FReal(2.), Epsilon, K, LowRank);
701 702 703 704 705 706 707 708 709

	// write to binary file ////////////
	FTic time; time.tic();
	// start computing process
	const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f');
	std::stringstream sstream;
	sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin";
	const std::string filename(sstream.str());
	std::ofstream stream(filename.c_str(),
COULAUD Olivier's avatar
COULAUD Olivier committed
710
			std::ios::out | std::ios::binary | std::ios::trunc);
711 712 713
	if (stream.good()) {
		stream.seekp(0);
		for (unsigned int idx=0; idx<343; ++idx)
714
			if (K[idx]!=nullptr) {
715 716 717 718 719 720 721 722 723 724 725 726 727 728
				// 1) write index
				stream.write(reinterpret_cast<char*>(&idx), sizeof(int));
				// 2) write low rank (int)
				int rank = LowRank[idx];
				stream.write(reinterpret_cast<char*>(&rank), sizeof(int));
				// 3) write U and V (both: rank*nnodes * FReal)
				FReal *const U = K[idx];
				FReal *const V = K[idx] + rank*nnodes;
				stream.write(reinterpret_cast<char*>(U), sizeof(FReal)*rank*nnodes);
				stream.write(reinterpret_cast<char*>(V), sizeof(FReal)*rank*nnodes);
			}
	} else throw std::runtime_error("File could not be opened to write");
	stream.close();
	// write info
COULAUD Olivier's avatar
COULAUD Olivier committed
729 730
	//	std::cout << "Compressed M2L operators stored in binary file " << filename
	//					<< " in " << time.tacAndElapsed() << "sec."	<< std::endl;
731 732

	// free memory /////////////////////
733
	for (unsigned int t=0; t<343; ++t) if (K[t]!=nullptr) delete [] K[t];
734 735 736
}


Matthias Messner's avatar
Matthias Messner committed
737 738 739 740
/**
 * Reads the 16 compressed M2L kernels from the binary files and writes them
 * in K and the respective low-rank in LowRank.
 */
741 742 743 744 745
template <int ORDER>
void ReadFromBinaryFile(const FReal Epsilon, FReal* K[343], int LowRank[343])
{
	// compile time constants
	const unsigned int nnodes = ORDER*ORDER*ORDER;
COULAUD Olivier's avatar
COULAUD Olivier committed
746

747 748 749 750 751 752 753 754
	// find filename
	const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f');
	std::stringstream sstream;
	sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin";
	const std::string filename(sstream.str());

	// read binary file
	std::ifstream istream(filename.c_str(),
COULAUD Olivier's avatar
COULAUD Olivier committed
755
			std::ios::in | std::ios::binary | std::ios::ate);
756 757
	const std::ifstream::pos_type size = istream.tellg();
	if (size<=0) throw std::runtime_error("The requested binary file does not yet exist. Exit.");
COULAUD Olivier's avatar
COULAUD Olivier committed
758

759 760 761 762 763 764 765
	if (istream.good()) {
		istream.seekg(0);
		// 1) read index (int)
		int _idx;
		istream.read(reinterpret_cast<char*>(&_idx), sizeof(int));
		// loop to find 16 compressed m2l operators
		for (int idx=0; idx<343; ++idx) {
766
			K[idx] = nullptr;
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
			LowRank[idx] = 0;
			// if it exists
			if (idx == _idx) {
				// 2) read low rank (int)
				int rank;
				istream.read(reinterpret_cast<char*>(&rank), sizeof(int));
				LowRank[idx] = rank;
				// 3) read U and V (both: rank*nnodes * FReal)
				K[idx] = new FReal [2*rank*nnodes];
				FReal *const U = K[idx];
				FReal *const V = K[idx] + rank*nnodes;
				istream.read(reinterpret_cast<char*>(U), sizeof(FReal)*rank*nnodes);
				istream.read(reinterpret_cast<char*>(V), sizeof(FReal)*rank*nnodes);

				// 1) read next index
				istream.read(reinterpret_cast<char*>(&_idx), sizeof(int));
			}
		}
	}	else throw std::runtime_error("File could not be opened to read");
	istream.close();
}





#endif