Hybrid High Performance Parallel Python Solver (H2P3S)
This org document can be tangled to produce this python file.
License
Copyright 2016 INRIA
louis.poirel@inria.fr
This file is part of the h2p3s python package.
This software is governed by the CeCILL-C license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the
CeCILL-C license as circulated by CEA, CNRS and INRIA at the following
URL "http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided
only with a limited warranty and the software's author, the holder of
the economic rights, and the successive licensors have only limited
liability.
In this respect, the user's attention is drawn to the risks associated
with loading, using, modifying and/or developing or reproducing the
software by the user in light of its specific status of free software,
that may mean that it is complicated to manipulate, and that also
therefore means that it is reserved for developers and experienced
professionals having in-depth computer knowledge. Users are therefore
encouraged to load and test the software's suitability as regards
their requirements in conditions enabling the security of their
systems and/or data to be ensured and, more generally, to use and
operate it in the same conditions as regards security.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
See the license file here.
Purpose of this module
""" This module provides versatile functions in python to write high-level
HPC algorithms in python linked with state-of-the-art libraries. """
Installing
Installing spack
http://morse.gforge.inria.fr/spack/spack.html
git clone -b develop https://github.com/llnl/spack.git
source ./spack/share/spack/setup-env.sh
git clone https://gitlab.inria.fr/solverstack/spack-repo.git
spack repo add ./spack-repo
Installing the dependencies (pastix & mumps)
MPI_VERSION=openmpi
PYTHON_VERSION=python@
BLAS_VERSION=openblas
SCALAPACK_VERSION=netlib-scalapack
spack install -v py-genfem@git ^$PYTHON_VERSION ^$BLAS_VERSION
spack install -v pastix@solverstack~mpi ^$BLAS_VERSION
spack install -v py-mumps ^mumps+mpi ^$PYTHON_VERSION ^$MPI_VERSION ^$BLAS_VERSION ^$SCALAPACK_VERSION
Imports
from __future__ import print_function
from collections import OrderedDict
import numpy as np
import scipy.sparse as ssp
import scipy.linalg as la
import scipy.sparse.linalg as sla
import genfem as fem
import traceback
try:
import pypastix
except ImportError:
traceback.print_exc()
try:
import pymumps
except ImportError:
traceback.print_exc()
try:
import pymaphys
except ImportError:
traceback.print_exc()
from mpi4py import MPI
rank = MPI.COMM_WORLD.Get_rank()
Ici, une fonction qui affiche toto
$α = \frac{1}{β}$
print("toto")
toto
Linear Solvers
In order to solve a linear problem Ax = b
, the most straightforward way is to implement a solve
function. Then, the user can solve the problem by executing:
x = solve(A, b, parameters)
There are two important limitations attached to this functional paradigm:
- first, subsequent solves with the same matrix
A
and different right-hand-sides (rhs)b1, b2, ...
are more cumbersome to write ; - although the solve operation is linear with regard to the rhs, it cannot be supplied directly as a matrix, for instance for preconditioning iterative methods.
Our approach is to write all solvers as objects, with a matrix-like interface:
S = Solver(A, parameters)
x = S @ b
or
S = Solver(parameters)
S.setup(A, updated_parameters)
x = S @ b
This way, solving the problem using, for instance, something as complicated as a substructuring approach in which the interior variables are eliminated using Pastix
and the interface variables are solved using a conjugate gradient preconditioned with mumps can be written this way:
S = Schur(A,
interface = [0, 1, 2, 3],
local_solver = Pastix(),
interface_solver = ConjGrad(M=Mumps()))
x = S @ b
<<basesolver>>
<<spfacto>>
<<pinv>>
<<pastix>>
<<mumps>>
<<conjgrad>>
<<schur>>
TimeIt
import time
class TimeIt:
DEBUG = False
MAX = None
events = []
stack = []
MPI.COMM_WORLD.barrier()
t0 = time.time()
def __init__(self, name=""):
self.name = name
def __enter__(self):
self.begin = time.time() - self.t0
self.end = np.nan
self.duration = np.nan
self.level = len(self.stack)
self.events.append(self)
self.stack.append(len(self.events)-1)
if TimeIt.DEBUG:
print(self.__str__(which="current"))
return self
def __exit__(self, type, value, traceback):
self.end = time.time() - self.t0
self.duration = self.end - self.begin
if TimeIt.DEBUG:
print(self.__str__(which="current"))
if self.stack:
self.stack.pop()
return False
def __call__(self, f):
def timed_f(*args, **kwargs):
# We create a default name
name = self.name
if name=="":
name = type(args[0]).__name__ + " " + f.__name__
# We time and run the function
# We create a new TimeIt instance because self is created
# at function declaration and is the same object for all
# executions of the function
with TimeIt(name) as t:
res = f(*args, **kwargs)
# Store the duration in the object
if len(args)>0 and isinstance(args[0], Solver):
solver = args[0]
key = "t_" + name.replace(" ", "_")
if key in solver.parameters:
solver.parameters[key] += t.duration
else:
solver.parameters[key] = t.duration
return res
return timed_f
def __str__(self, which="all"):
if which=="current":
s = "{:50s} | {:2d} | {:12.7f} | {:12.7f} | {:12.7f}".format(
"! "*self.level + self.name, self.level, self.begin,
self.duration, self.end)
elif which=="all":
s = "\n".join([t.__str__(which="current") for t in self.events[:TimeIt.MAX]])
else:
s = "\n".join([self.events[i].__str__(which="current") for i in self.stack])
return s
@classmethod
def mpi_gather(cls, comm, root=0):
all_events = comm.gather(cls.events, root=root)
min_events = []
med_events = []
max_events = []
non_root = 1 if root==0 else 0 # index of non-root process
while all_events[root]:
name = all_events[root][0].name
level = all_events[root][0].level
ev = [ae.pop(0) for ae in all_events
if ae[0].name == name]
min_e = TimeIt(name)
min_e.begin = min((e.begin for e in ev))
min_e.end = min((e.end for e in ev))
min_e.duration = min((e.duration for e in ev))
min_e.level = level
min_events.append(min_e)
max_e = TimeIt(name)
max_e.begin = max((e.begin for e in ev))
max_e.end = max((e.end for e in ev))
max_e.duration = max((e.duration for e in ev))
max_e.level = level
max_events.append(max_e)
med_e = TimeIt(name)
med_e.begin = np.median([e.begin for e in ev])
med_e.end = np.median([e.end for e in ev])
med_e.duration = np.median([e.duration for e in ev])
med_e.level = level
med_events.append(med_e)
@classmethod
def reset(cls):
cls.events = []
cls.stack = []
MPI.COMM_WORLD.barrier()
cls.t0 = time.time()
TimeIt.reset()
n = 10000000
print(\n")
with TimeIt("bad") as t:
res = 0
for i in range(n):
res += i
with TimeIt("good") as t:
res = sum(range(n))
with TimeIt("np") as t:
res = np.arange(n).sum()
print(t)
@TimeIt("test")
def my_function():
return(1)
my_function()
print(TimeIt())
python Abstract base class
class Solver(object):
""" Abstract base class for solvers
A solver is built using a matrix A and some optional parameters.
To solve A @ x = b with a solver Mysolver, do x = MySolver(A) @ b
All child classes must implement a setup(A, **kwargs) that calls
Solver.setup(A, **kwargs), and a self.solve(b) method
Optionally, a child class can have a dict "parameters" attribute
with default values for the parameters.
"""
def __init__(self, A=None, **kwargs):
""" Constructor of the solver
Store the keyword arguments as parameters for the solver,
performs all the analysis steps that are possible without
having the matrix A
and performs the setup if the matrix A is given
"""
if hasattr(self, "defaults"):
self.parameters = OrderedDict(sorted(self.defaults.items(),
key=lambda x:str.lower(x[0])))
self.parameters.update(kwargs)
else:
self.parameters = OrderedDict(kwargs.items())
self.setup_performed = False
if A is not None:
self.setup(A)
# abstract method
def setup(self, A, **kwargs):
""" Setups the solver to work on a particular matrix A
Store the keyword arguments as parameters for the solver
and performs all computational steps that are possible without
having the RHS.
"""
self.A = A
self.shape = A.shape
self.parameters.update(kwargs)
# some default parameters are solver types instead of instance
# and a type v has to be replaced by an instance of v
for k, v in self.parameters.items():
if type(v)==type:
instance = v.__new__(v)
instance.__init__()
self.parameters[k] = instance
# We add the parameters to the attributes of the solver
#for k, v in self.parameters.items():
# self.__setattr__(k, v)
self.setup_performed = True
# abstract method
def solve(self, b):
""" Performs a solve with b as a right-hand-side vector """
pass
def dot(self, b):
""" Performs a solve with b as a right-hand-side vector """
if self.setup_performed:
return self.solve(b)
else:
raise RuntimeError('The solver was not setup before performing the solve')
def matvec(self, b):
return self.dot(b)
def toStr(self, level=0):
s = [self.__class__.__name__]
for key, value in self.parameters.items():
k = str(key)
if isinstance(value, Solver):
v = value.toStr(level+1)
else:
v = str(value)
s.append(" "*(1+level) + k + " : " + v)
return "\n".join(s)
def __str__(self):
return self.toStr()
def __getattr__(self, item):
if item != "parameters" and item in self.parameters:
return self.parameters[item]
elif item in self.__dict__:
return self.__dict__[item]
else:
raise AttributeError("No {} in {}.".format(item, self))
Scipy
class SpFacto(Solver):
"""Factorize a matrix using scipy
Choose an appropriate Scipy solver according to the
format (dense/sparse) and symetry of A.
Optional parameters are:
- symmetry=True/False. Default: False
"""
defaults = {"symmetry": False}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
if ssp.issparse(A):
self.solve_ = sla.factorized(A)
else:
if self.symmetry:
self.LLt = la.cho_factor(A)
self.solve_ = lambda b:la.cho_solve(self.LLt, b)
else:
self.LU = la.lu_factor(A)
self.solve_ = lambda b: la.lu_solve(self.LU, b)
@TimeIt()
def solve(self, b):
return self.solve_(b)
Pinv
class Pinv(Solver):
"""Return a dense pseudoinverse of a matrix (scipy) """
@TimeIt()
def setup(self, A):
Solver.setup(self, A)
if ssp.issparse(A):
self.A_pinv = la.pinv(A.A)
else:
self.A_pinv = la.pinv(A)
@TimeIt()
def solve(self, b):
return self.A_pinv.dot(b)
Pastix_
class Pastix(Solver):
"""Factorize a matrix using pastix
"""
defaults = {"verbose": False, # can be 0, 1 or 2
"symmetry": False, # can be 0 (general), 1 (symmetric) or 2 (SPD)
"refine": True,
"threads": 1, # can be "auto", 1, 2, ...
"full_schur": True,
"check": False,
"x0": None}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
self.init(A)
pypastix.task_analyze(self.pastix_data, self.spmA)
pypastix.task_numfact(self.pastix_data, self.spmA)
def init(self, A):
""" Register the options in iparm and dparm, and setup A"""
iparm, dparm = pypastix.initParam()
self.iparm, self.dparm = iparm, dparm
# Verbose
d = {0: pypastix.verbose.Not,
1: pypastix.verbose.Yes,
2: pypastix.verbose.No}
iparm[pypastix.iparm.verbose] = d[self.verbose]
# Threads
if self.threads=="auto":
iparm[pypastix.iparm.thread_nbr] = -1
else:
iparm[pypastix.iparm.thread_nbr] = self.threads
# Symmetry
d = {0: pypastix.factotype.LU,
1: pypastix.factotype.LDLT,
2: pypastix.factotype.LLT,
"SPD": pypastix.factotype.LLT}
self.factotype = d[self.symmetry]
self.iparm[pypastix.iparm.factorization] = self.factotype
# init
self.pastix_data = pypastix.init(iparm, dparm)
self.spmA = pypastix.spm(A)
if self.verbose:
self.spmA.printInfo()
@TimeIt()
def solve(self, b):
x = b.copy()
pypastix.task_solve(self.pastix_data, x)
if self.refine:
pypastix.task_refine(self.pastix_data, b, x)
if self.check and self.x0 is not None:
self.spmA.checkAxb(self.x0, b, x)
return x
@TimeIt()
def schur(self, A, interface, **kwargs):
""" Perform a partial factorization and compute the Schur matrix S """
Solver.setup(self, A, **kwargs)
self.init(A)
self.iparm[pypastix.iparm.schur_solv_mode] = pypastix.solv_mode.Interface
schur_list = np.asarray(interface, pypastix.pastix_int)
self.schur_list = schur_list + self.spmA.findBase()
pypastix.setSchurUnknownList(self.pastix_data, self.schur_list)
pypastix.task_analyze(self.pastix_data, self.spmA)
pypastix.task_numfact(self.pastix_data, self.spmA)
nschur = len(schur_list)
self.nschur = nschur
self.S = np.zeros((nschur, nschur), order='F', dtype=A.dtype)
pypastix.getSchur(self.pastix_data, self.S)
#if self.full_schur and self.factotype!=pypastix.factotype.LU:
# self.S += la.tril(self.S, -1).T
return self.S
@TimeIt()
def b2f(self, b):
""" Compute the reduced RHS f from the complete RHS b """
x = b.copy()
pypastix.subtask_applyorder(self.pastix_data,
pypastix.dir.Forward,
x)
if self.factotype == pypastix.factotype.LLT:
pypastix.subtask_trsm(self.pastix_data,
pypastix.side.Left,
pypastix.uplo.Lower,
pypastix.trans.NoTrans,
pypastix.diag.NonUnit,
x)
else:
pypastix.subtask_trsm(self.pastix_data,
pypastix.side.Left,
pypastix.uplo.Lower,
pypastix.trans.NoTrans,
pypastix.diag.Unit,
x)
if self.factotype == pypastix.factotype.LDLT:
pypastix.subtask_diag(self.pastix_data,
x)
self.x = x
f = x[-self.nschur:]
return f
@TimeIt()
def y2x(self, y, b):
""" Compute the complete solution x from the Schur solution y """
x = self.x.copy()
x[-self.nschur:] = y
if self.factotype == pypastix.factotype.LDLT:
pypastix.subtask_trsm(self.pastix_data,
pypastix.side.Left,
pypastix.uplo.Lower,
pypastix.trans.Trans,
pypastix.diag.Unit,
x)
elif self.factotype == pypastix.factotype.LLT:
pypastix.subtask_trsm(self.pastix_data,
pypastix.side.Left,
pypastix.uplo.Lower,
pypastix.trans.Trans,
pypastix.diag.NonUnit,
x)
else: # LU
pypastix.subtask_trsm(self.pastix_data,
pypastix.side.Left,
pypastix.uplo.Upper,
pypastix.trans.NoTrans,
pypastix.diag.NonUnit,
x)
pypastix.subtask_applyorder(self.pastix_data,
pypastix.dir.Backward,
x)
if self.check and self.x0 is not None:
self.spmA.checkAxb(self.x0, b, x)
return x
Mumps
Direct solver
class Mumps(Solver):
"""Factorize a matrix using mumps
Three modes:
- sequential: comm is None and A is a Scipy Sparse Matrix
- distributed in global ordering: comm is not None
and A is a Scipy Sparse Matrix
- distributed in local ordering: A is a DistMatrix
the communicator used is A.dd.comm, comm is not used
Optional parameters are:
- verbose=True/False. Default: False
- symmetry=0 (General), 1 (Symmetric), 2 (SPD). Default: 0.
If symmetry>0, only the lower triangular part of A is used.
- comm, optional. Default: None
"""
defaults = {"verbose": False,
"symmetry": False, # can be 0 (general), 1 (symmetric) or 2 (SPD)
"comm" : None,
"ordering": "auto"}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
# We cut the function in two part to be able to easily add the
# Schur code in between
self.init()
self.factorize()
def init(self):
self.driver = pymumps.Mumps('D')
id = self.driver.id
A = self.A
# Set id.par, id.comm and id.sym before initialize
id.par = 1 # Process 0 takes part in the facto
if ssp.issparse(A):
if self.comm is None:
# sequential
self.driver.ICNTL[18] = 0 # centralized entry
self.comm = MPI.COMM_SELF
else:
# distributed with global ordering
self.driver.ICNTL[18] = 3 # distributed entry
self.A_internal = A
elif isinstance(A, DistMatrix):
# Distributed matrix in local ordering
self.driver.ICNTL[18] = 3 # distributed entry
# internally, we switch to global ordering
dd = A.dd
self.A_internal = ssp.coo_matrix(
dd.Ri.T.dot(
ssp.csc_matrix(A.local).dot(
dd.Ri)))
self.comm = A.dd.comm
id.comm_fortran = self.comm.py2f()
if self.ordering == "auto":
self.driver.ICNTL[7] = 7
elif self.ordering == "Scotch":
self.driver.ICNTL[7] = 3
elif self.ordering == "Metis":
self.driver.ICNTL[7] = 5
id.sym = self.symmetry
if self.symmetry>0:
id.sym = 3 - id.sym # 0 Ge, 1 SPD, 2 Sym
self.A_internal = ssp.tril(self.A_internal)
self.driver.initialize()
if self.verbose:
id.icntl[0:4] = [6, 0, 6, 3]
self.driver.ICNTL[11] = 2 # compute main statistics
else:
id.icntl[0:4] = [0, 0, 0, 0]
self.driver.ICNTL[24] = 1 # null pivot detection
self.driver.set_A(self.A_internal)
self.is_forward = False
def factorize(self):
with TimeIt("MumpsDriver Analysis"):
self.driver.drive(1) # Analysis
with TimeIt("MumpsDriver Facto"):
self.driver.drive(2) # Facto
@TimeIt()
def solve(self, b):
A = self.A
if ssp.issparse(A) and self.comm is not None:
b_ = self.comm.reduce(b, root=0)
elif isinstance(A, DistMatrix):
b_ = b.centralize(root=0)
else:
b_ = b
self.driver.set_RHS(b_)
self.driver.drive(3)
x_ = self.driver.get_solution()
x_.shape = b.shape
x = x_
if self.comm is not None:
self.comm.Bcast(x_, root=0)
if isinstance(A, DistMatrix):
x = DistVector(b.dd.Ri.dot(x_), b.dd)
return x
@TimeIt()
def schur(self, A, interface, **kwargs):
""" Perform a partial factorization and compute the Schur matrix S """
Solver.setup(self, A, **kwargs)
self.init()
self.driver.set_schur_listvar(interface)
self.factorize()
self.S = self.driver.get_schur()
self.interface = interface
return self.S
@TimeIt()
def b2f(self, b):
""" Compute the reduced RHS f from the complete RHS b """
self.driver.set_RHS(b)
f = self.driver.schur_forward()
self.is_forward=True
f.shape = (len(f), 1)
return f
@TimeIt()
def y2x(self, y, b):
""" Compute the complete solution x from the Schur solution y """
if not self.is_forward:
self.b2f(b)
x = self.driver.schur_backward(y)
x.shape = b.shape
self.is_forward = False
return x
def __del__(self):
try:
self.driver.finalize()
except (TypeError, AttributeError):
pass
Conjugate Gradient
Standalone function
def cg(A, b, x0=None, tol=1e-5, maxiter=None, xtype=None, M=None, callback=None, ritz=False, save_x=False, debug=False, true_res=False):
"""Solves the linear problem Ax=b using the Conjugate Gradient algorithm.
Interface compatible with scipy.sparse.linalg.cg
Parameters
----------
A : matrix-like object
A is the linear operator on which we want to perform the solve
operation. The only requirement on A is to provide a method
dot(self, x) where x is a vector-like object.
b : vector-like object
b is the right-hand-side of the system to solve. The only
requirement on b is to provide the following methods:
__len__(self) (not necessary if maxiter is provided),
copy(self), __add__(self, w), __sub__(self, w),
__rmul__(self, a) where a is a scalar, dot(self, w)
where w is a vector-like, returning a scalar
x0 : vector-like object
starting guess for the solution, optional
tol : float
Relative tolerance to achieve, optional, default: 1e-5
maxiter : integer
Maximum number of iterations, optional, default: len(b)
xtype :
not used, just for compatibility with scipy.sparse.linalg.cg, optional
M : matrix-like object
Preconditioner for A, optional
callback : function
After each iteration, callback(x) is called, where x is the current solution, optional
ritz : boolean
Store the dot products in order to compute the ritz values later, optional
save_x : boolean
Store the x value at each iteration, optional
debug : boolean
print debug info, optional
true_res : boolean
recompute the residual at each iteration, optional
"""
bb = b.T.dot(b)
maxiter = len(b) if maxiter is None else maxiter
with TimeIt("Instrumentation"):
global _cg_n_iter
_cg_n_iter=0
if ritz:
global _cg_omega, _cg_gamma
_cg_omega = np.zeros(maxiter)
_cg_gamma = np.zeros(maxiter)
# Initialization
x = 0 * b if x0 is None else x0
r = b - A.dot(x)
if r.T.dot(r) <= tol * tol * bb:
return x, 0
z = r if M is None else M.dot(r)
p = z.copy()
rz = r.T.dot(z)
with TimeIt("Instrumentation"):
if save_x:
global _cg_x
_cg_x = [x]
for i in range(maxiter):
Ap = A.dot(p)
alpha = (rz / (p.T.dot(Ap)))[0,0]
x += alpha * p
if true_res:
r = b - A.dot(x)
else:
r -= alpha * Ap
with TimeIt("Instrumentation"):
if ritz:
if i>0:
_cg_gamma[i-1] = beta
_cg_omega[i] = alpha
_cg_n_iter += 1
if callback:
print("iteration {} from rank {}".format(i, rank))
callback(x)
if save_x:
_cg_x.append(x.copy())
rr = r.T.dot(r)
if debug:
print("Iteration: {}, ||r||_2/||b||_2 = {}".format(i, np.sqrt(rr/bb)[0, 0]))
if rr / bb <= tol * tol:
return x, 0
z = r if M is None else M.dot(r)
rz, rzold = r.T.dot(z), rz
beta = (rz / rzold)[0,0]
p = z + beta * p
return x, i
ConjGrad class
class ConjGrad(Solver):
"""Solve Ax=b using the CG algorithm
Optional parameters are:
- x0, initial guess for the solution, default=0*b
- tol, relative tolerance to achieve, default=1e-5
- maxiter, maximum number of iterations, default=len(b)
- M, preconditioner for A
- callback, after each iteration, callback(x) is called, where x
is the current solution
- ritz=True/False, whether to compute the ritz values (approximate
eigenvalues of A, default=False
- save_x=True/False, whether to store x at each iteration,
default=False
- setup_M=True/False, whether to try and call M.setup(A) during
the setup phase
- debug=True/False, whether to print debug info
- true_res=True/False, whether to recompute the true residual
instead of a recurrence formula
"""
defaults = {"x0": None,
"tol": 1e-5,
"maxiter": None,
"M": None,
"callback": None,
"ritz": False,
"save_x": False,
"setup_M": True,
"abs_tol": None,
"debug": False,
"true_res": False}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
# We setup the preconditioner if possible and asked for by the user
if self.setup_M:
if hasattr(self.M, "setup"):
self.M.setup(A)
@TimeIt()
def solve(self, b):
# For deflation, the preconditioner first orthogonalize x0,
# through its self.M.x0 method
x0_f = getattr(self.M, "x0", None)
if x0_f is not None:
r0 = b if self.x0 is None else b - self.A.dot(self.x0)
self.x0 = x0_f(r0)
# reshape b
if b.ndim==1:
b = b.reshape((-1, 1))
# compute tol
if self.abs_tol is not None:
tol = self.abs_tol*np.sqrt(b.T.dot(b))
else:
tol = self.tol
# Call the standalone cg function
x, self.i = cg(self.A, b, self.x0, tol, self.maxiter,
None, self.M, self.callback, self.ritz,
self.save_x, self.debug)
self.n_iter = _cg_n_iter
self.parameters["n_iter"] = self.n_iter
if self.ritz:
self.omega = _cg_omega
self.gamma = _cg_gamma
if self.save_x:
self.x_ = _cg_x
return x
def ritz_values(self):
"""Compute the ritz values of the Hessenberg matrix.
Call this function after a solve has been performed with self.ritz==True
"""
if self.n_iter>1:
alpha = np.zeros(self.n_iter)
alpha[0] = 1/self.omega[0]
beta = np.zeros(self.n_iter-1)
for i in range(self.n_iter-1):
alpha[i+1] = 1/self.omega[i+1] + self.gamma[i]/self.omega[i]
beta[i] = np.sqrt(max(self.gamma[i], 0))/self.omega[i]
T = np.diag(alpha) + np.diag(beta, 1) + np.diag(beta, -1)
lambda_ = la.eigvalsh(T)
else:
lambda_ = np.array([1])
return lambda_
Schur
class Schur(Solver):
""" Compute the Schur of a matrix
Use local_solver to eliminate all variables in A that are not in interface
and compute the Schur matrix S. Then, use interface_solver to solve S.
"""
defaults = {"interface": None,
"local_solver": Mumps,
"interface_solver": ConjGrad}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
if hasattr(self.local_solver, "schur"):
self.S = self.local_solver.schur(self.A, self.interface)
else:
interface = self.interface
n = self.A.shape[0]
nG = len(interface)
nI = n - nG
RG = ssp.csc_matrix((np.ones_like(interface),
(range(nG), interface)),
shape=(nG, n))
interior = np.setdiff1d(range(n), interface)
RI = ssp.csc_matrix((np.ones_like(interior),
(range(nI), interior)),
shape=(nI, n))
self.RI, self.RG = RI, RG
with TimeIt("Schur_AII"):
AII = RI.dot(A.dot(RI.T))
self.local_solver.setup(AII)
with TimeIt("Schur_AIG"):
AIG = (RI.dot(A.dot(RG.T))).A
with TimeIt("Schur_AGG"):
self.S = (RG.dot(A.dot(RG.T))).A
with TimeIt("Schur_AII^{-1}AIG"):
self.S -= AIG.T.dot(self.local_solver.dot(AIG))
if self.interface_solver is not None:
self.interface_solver.setup(self.S)
@TimeIt()
def b2f(self, b):
try:
f = self.local_solver.b2f(b)
except AttributeError:
RI, RG = self.RI, self.RG
bI = RI.dot(b)
bI_I = self.local_solver.dot(bI)
bI_I_ = RI.T.dot(bI_I)
f_ = self.A.dot(bI_I_)
f = RG.dot(b - f_)
return f
@TimeIt()
def y2x(self, y, b):
if len(b.shape)==1:
b = b.reshape((-1,1))
try:
x = self.local_solver.y2x(y, b)
except AttributeError:
RI, RG = self.RI, self.RG
tmp = RG.T.dot(y)
tmp = b - self.A.dot(tmp)
tmp = RI.dot(tmp)
tmp = self.local_solver.dot(tmp)
tmp = RI.T.dot(tmp)
x = RG.T.dot(y) + tmp
return x
@TimeIt()
def solve(self, b):
f = self.b2f(b)
param = self.interface_solver.parameters
if "tol" in param:
bb = b.T.dot(b)
ff = f.T.dot(f)
ratio = np.sqrt(bb/ff)[0, 0]
param["global_tol"] = param["tol"]
param["tol"] *= ratio
y = self.interface_solver.dot(f)
x = self.y2x(y, b)
return x
In order to introduce parallelism, we use domain decomposition to distribute the computation: each process is responsible for a subdomain. The degrees of freedom (dof) of the original problem are distributed among the subdomains. A dof that appears in only one subdomain is said to be interior to this subdomain. A dof that appears in several subdomains is said to be in the interface of these subdomains.
To handle this distributed arithmetic, we define a DomainDecomposition
class that contains all the information about the dof distribution.
Then, the DistMatrix
and DistVector
classes contain the local components of the matrix or vector they represent, as well as a DomainDecomposition
instance.
Domain Decomposition
class DomainDecomposition(object):
tag = 0
def __init__(self, ni, neighbors, comm):
self.ni = ni # local number of unknowns
self.neighbors = neighbors # neighbors[i] = interface shared with neighbor i
self.comm = comm
self.n_parts = comm.Get_size() # number of domains
self.rank = comm.Get_rank()
# Partition of unity (1 or 0)
D = np.ones(ni, dtype=np.bool)
for n in neighbors:
if n < self.rank:
D[neighbors[n]] = 0
self.D = ssp.diags(D.astype(np.int))
#Compute the global ordering
ni_filtered = np.array(np.count_nonzero(D)) # number of dofs the subdomain is responsible for
ni_gathered = np.empty(self.n_parts, dtype=int)
comm.Allgather(ni_filtered, ni_gathered)
self.n = ni_gathered.sum() # Total global number of unknowns
offset = ni_gathered[:self.rank].sum() # Number of unknowns belonging to any subdomain with a smaller index
self.global_indices = np.zeros(self.ni, dtype=int)
self.global_indices[D] = range(offset, offset+ni_filtered)
requests = []
for neighbor, indices in self.neighbors.items():
if self.rank<neighbor:
req = comm.Isend(self.global_indices[indices], dest=neighbor, tag=self.tag)
requests.append(req)
# Probe & Receive
status = MPI.Status()
for k in neighbors:
if k<self.rank:
comm.Probe(source=MPI.ANY_SOURCE, tag=self.tag, status=status)
neighbor = status.Get_source()
indices = self.neighbors[neighbor]
recv_b = np.empty(len(indices), dtype=int)
comm.Recv(recv_b, source=neighbor, tag=self.tag)
self.global_indices[indices] += recv_b
self.Ri = ssp.csc_matrix(([1]*ni, (range(ni), self.global_indices)),
shape=(ni, self.n))
for req in requests:
req.wait()
self.tag += 1
def interface(self):
"""return the DomainDecomposition of the interface matrix"""
interf = np.unique(np.hstack([i for i in self.neighbors.values()])) # local indices of interface unknown
neighbors = {n: np.nonzero(np.in1d(interf, i))[0] for n, i in self.neighbors.items()}
return DomainDecomposition(len(interf), neighbors, self.comm)
def coarsen(self, n_i=1):
"""Returns a coarse domain decomposition
If j is the current subdomain or any of its direct neighbors, the n_j
dofs from j are shared with j and all j's neighbors
For each neighbor, we order the common interface according to the rank
of the original subdomain of the dofs
"""
info_neighbors = self.local_allgather(
[n_i, [n for n in self.neighbors.keys()]])
info_neighbors[self.rank] = [np.array(n_i), [n for n in self.neighbors.keys()]]
n_i0 = 0
neighbors0 = dict()
for j in sorted(info_neighbors):
n_j, neighbors_of_j = info_neighbors[j]
for k in np.append(neighbors_of_j, j):
if k!=self.rank:
neighbors0.setdefault(k, [])
neighbors0[k].extend(range(n_i0, n_i0+n_j.item()))
n_i0 += n_j.item()
return DomainDecomposition(n_i0, neighbors0, self.comm)
def Isend(self, M, dest, tag=0): #work on numpy buffer if possible, else on python object
if type(M)==np.ndarray:
return self.comm.Isend(M, dest=dest, tag=tag)
else:
return self.comm.isend(M, dest=dest, tag=tag)
def Recv(self, M, source, tag=0):
if type(M)==np.ndarray:
self.comm.Recv(M, source=source, tag=tag)
else:
M = self.comm.recv(source=source, tag=tag)
return M
def neighbor_alltoall(self, send_messages):
"""alltoall communication between neighbors
send_messages is a dict, and send_messages[j] is a list of buffers.
to send to [j]. All the lists are supposed to be the same length.
Return a dict recv_messages where recv_messages[j] is the
send_messages[i] from process j if i=self.rank
"""
# Isend
comm = self.comm
requests = []
for j, send_message_j in send_messages.items():
assert(type(send_message_j)==list)
n_messages = len(send_message_j)
dtypes = [np.asarray(m).dtype for m in send_message_j]
for k, m in enumerate(send_message_j):
req = comm.Isend(np.asarray(m), dest=j, tag=self.tag+k)
requests.append(req)
# Probe & Receive
recv_messages = {j: [[]]*n_messages for j in send_messages}
status = MPI.Status()
n_received_messages=0
while n_received_messages<n_messages*len(recv_messages):
for k in range(n_messages):
if comm.Iprobe(source=MPI.ANY_SOURCE, tag=self.tag+k, status=status):
j = status.Get_source()
size = status.Get_count(datatype=MPI._typedict[dtypes[k].char])
recv_b = np.empty(size, dtypes[k])
comm.Recv(recv_b, source=j, tag=self.tag+k)
recv_messages[j][k] = recv_b
n_received_messages += 1
for req in requests:
req.wait()
self.tag += n_messages
return recv_messages
def neighbor_allgather(self, send_messages):
"""gather communication between neighbors
send_messages is a list of buffers to send to all neighbors.
Return a dict recv_messages where recv_messages[j] is the
send_messages from neighbor process j.
"""
return self.neighbor_alltoall({n:send_messages for n in self.neighbors})
def assemble(self, v, dim=1):
comm = self.comm
if dim==1:
indices = self.neighbors
elif dim==2:
indices = {n: np.ix_(i,i) if len(i)>1 else (i,i) for n, i in self.neighbors.items()}
else:
raise ValueError("dim should be 1 or 2")
# make all Isend calls
if type(v) == ssp.coo.coo_matrix:
v = v.toarray()
req_s = [self.Isend(v[i], dest=n, tag=self.tag) for n, i in indices.items()]
# Probe, receive and treat messages
v_ = v.copy()
status = MPI.Status()
with TimeIt("MPI assemble"):
for i in range(len(indices)):
comm.Probe(source=MPI.ANY_SOURCE, status=status, tag=self.tag)
n = status.Get_source()
buf = v_[indices[n]].copy()
buf = self.Recv(buf, source=n, tag=self.tag)
v_[indices[n]] += buf
for req in req_s:
req.wait()
self.tag += 1
return v_
def reduce(self, v, op=None):
if op is None:
op = MPI.SUM
if hasattr(v, "__array_interface__"):
v_ = np.empty_like(v)
self.comm.Allreduce(v, v_, op=op)
else:
v_ = self.comm.allreduce(v, op=op)
return v_
def __str__(self):
return("rank: " + str(self.rank) + "\nn_i: " + str(self.ni) + "\nneighbors: " + str(self.neighbors) + "\nn: " + str(self.n))
Distributed matrices and vectors
Asynchronous dot product
class AsyncDotProduct(object):
def __init__(self, u, v):
assert(u.dd==v.dd)
self.dd = u.dd
self.u = u
self.v = v
self.local = np.array(u.local.T.dot(self.dd.D.dot(v.local)))
self.global_ = np.empty_like(self.local)
self.req = self.dd.comm.Iallreduce(self.local, self.global_)
self.isDone = False
def wait(self):
if not self.isDone:
self.req.wait()
self.isDone = True
def __add__(self, other):
self.wait()
return self.global_[...] + other
def __radd__(self, other):
self.wait()
return self.global_[...] + other
def __sub__(self, other):
self.wait()
return self.global_[...] - other
def __rsub__(self, other):
self.wait()
return other - self.global_[...]
def __mul__(self, other):
self.wait()
return self.global_[...] * other
def __rmul__(self, other):
self.wait()
return self.global_[...] * other
def __truediv__(self, other):
self.wait()
return self.global_[...] / other
def __rtruediv__(self, other):
self.wait()
return other / self.global_[...]
Dist Vector
A distributed vector (DistVector
) contains the restriction of the vector on the subdomain.
class DistVector(object):
def __init__(self, bi, dd, assembled=True):
if assembled:
self.local = bi
else:
self.local = dd.assemble(bi)
self.dd = dd
self.shape = dd.n, 1
self.ndim = len(self.shape)
self.transposed = False
def __len__(self):
return self.shape[0]
def copy(self):
return type(self)(self.local.copy(), self.dd)
@property
def T(self):
other = type(self)(self.local, self.dd)
other.transposed = not self.transposed
other.shape = self.shape[::-1]
return other
@TimeIt()
def __add__(self, w):
with TimeIt("Local __add__"):
return type(self)(self.local + w.local, self.dd)
@TimeIt()
def __sub__(self, w):
with TimeIt("Local __sub__"):
return type(self)(self.local - w.local, self.dd)
@TimeIt()
def __rmul__(self, a):
with TimeIt("Local __rmul__"):
return type(self)(a * self.local, self.dd)
@TimeIt()
def dot(self, other):
dd = self.dd
if self.transposed: # multiplication along the distributed dimension
assert(self.dd==other.dd)
with TimeIt("Local dot"):
local_dot = self.local.T.dot(self.dd.D.dot(other.local))
with TimeIt("MPI reduce"):
global_dot = dd.reduce(local_dot)
return global_dot
else: # multiplication along the global dimension
return type(self)(self.local.dot(other), dd)
def centralize(self, root=None, loc2glob=None):
if loc2glob is None:
loc2glob = self.dd.global_indices
b_global = np.zeros((self.dd.n, 1))
b_global[loc2glob, :] = self.dd.D.dot(self.local)
if root is None:
self.dd.comm.Allreduce(b_global.copy(), b_global)
else:
self.dd.comm.Reduce(b_global.copy(), b_global, root=root)
return b_global
Dist Matrix
class DistMatrix(object):
def __init__(self, Ai, dd):
self.local = Ai
self.dd = dd
self.shape = dd.n, dd.n
def copy(self):
return DistMatrix(self.local.copy(), self.dd)
@TimeIt()
def dot(self, b):
assert(isinstance(b, DistVector))
assert(self.dd==b.dd)
with TimeIt("Local dot"):
Abi = self.local.dot(b.local)
return type(b)(Abi, self.dd, assembled=False)
def matvec(self, b):
return self.dot(b)
def centralize(self, root=None):
"""return the global matrix on process root.
If no root is provided, then all get the global matrix
"""
global_indices = self.dd.global_indices
A_global_i = ssp.lil_matrix((self.dd.n, self.dd.n))
A_global_i[np.ix_(global_indices, global_indices)] = self.local
if root is None:
return self.dd.comm.allreduce(A_global_i)
else:
return self.dd.comm.reduce(A_global_i, root=root)
def coarsen(self, Wi):
pass
Distributed Schur
class DistSchur(Schur):
defaults = {"local_solver": Mumps,
"interface_solver": ConjGrad}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
self.interface = np.unique(np.hstack([i for i in A.dd.neighbors.values()]))
self.local_schur = Schur(A.local,
interface=self.interface,
local_solver=self.local_solver,
interface_solver=None)
self.S = DistMatrix(self.local_schur.S, A.dd.interface())
if self.interface_solver is not None:
self.interface_solver.setup(self.S)
@TimeIt()
def b2f(self, b):
bi = b.dd.D.dot(b.local)
fi = self.local_schur.b2f(bi)
f = DistVector(fi, self.S.dd, assembled=False)
return f
@TimeIt()
def y2x(self, y, b):
xi = self.local_schur.y2x(y.local, b.local)
x = DistVector(xi, b.dd)
return x
Distributed Solvers
Pipelined CG
def pipelinedCG(A, b, x0=None, M=None, maxiter=None):
global _cg_n_iter
if maxiter is None:
maxiter = len(b)
x = 0*b if x0 is None else x0
r = b - A.dot(x)
u = r.copy() if M is None else M.dot(r)
w = A.dot(u)
z = q = s = p = b.copy()
for _cg_n_iter in range(maxiter):
gamma = AsyncDotProduct(r, u)
delta = AsyncDotProduct(w, u)
m = w.copy() if M is None else M.dot(w)
n = A.dot(m)
gamma.wait()
delta.wait()
if _cg_n_iter>0:
beta = gamma/gamma_old
alpha = gamma/(delta - beta*gamma/alpha_old)
else:
beta = 0
alpha = gamma/delta
z = n + beta*z
q = m + beta*q
s = w + beta*s
p = u + beta*p
x = x + alpha*p
r = r - alpha*s
r = b - A.dot(x)
u = u - alpha*q
w = w - alpha*z
gamma_old = gamma
alpha_old = alpha
return x
Distributed Conjugate Gradient
The Conjugate Gradient works in parallel transparently
Centralize
class CentralizedSolver(Solver):
defaults = {"local_solver": Mumps,
"root": None}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
self.A_global = A.centralize(self.root)
if self.root is None or self.root==A.dd.rank:
self.local_solver.setup(self.A_global)
self.A_global_inv = self.local_solver
@TimeIt()
def solve(self, b):
with TimeIt("centralizeB"):
b_global = b.centralize(self.root)
if self.root is None:
x_global = self.A_global_inv.dot(b_global)
else:
if self.root==b.dd.rank:
x_global = self.A_global_inv.dot(b_global)
else:
x_global = np.empty(b_global.shape, dtype=b.local.dtype)
with TimeIt("distributeX"):
b.dd.comm.Bcast(x_global, root=self.root)
xi = x_global[b.dd.global_indices, :]
return DistVector(xi, b.dd)
MaPHyS
class Maphys(Solver):
"""Solve Ax=b using the MaPHyS Library
Optional parameters are:
- verbose=True/False. Default: False
- symmetry=0 (General), 1 (Symmetric), 2 (SPD). Default: 0.
If symmetry>0, only the lower triangular part of A is used.
"""
defaults = {"symmetry": 0,
"verbose": False,
"output" : "Normal", # "Normal", "Org"
"input": "Distributed", # "Centralized", "Distributed", "FromDump"
"use_paddle": False,
"dump": False,
"Scotch_imbalance": 0.2,
"bind_threads": "Grouped", # "Not", "Core", "Grouped"
"nNodes": 0,
"nCoresPerNode": 1,
"nThrdsPerProc": 1,
"nProcs": 1,
"twoLevel": False,
"local_solver": "Mumps", # "Mumps", "Pastix"
"Schur": "Auto", # "Auto", "Explicit", "Implicit"
"Mumps_ws": 20,
"Mumps_ws_add": 0,
"Mumps_ws_mul": 2,
"M": "Dense",
"Sparsify_threshold": 1e-4,
"coarse_nv": 10,
"coarse_solver": "MumpsDist",
"coarse_nProcs": 1,
"interface_solver": "Auto",
"maxiter": 100,
"tol" : 1e-5,
"Krylov_res_scale": "Global",
"Krylov_stop_alpha": 0.,
"Krylov_stop_beta": 0.,
"CG_stop": "ScaledRes",
"CG_Anorm_delay": 5,
"GMRES_ortho": "ICGS",
"GMRES_res": "Recurrence",
"GMRES_maxiter": 100,
"Fabulous_method": "IB",
"Fabulous_DR_nv": 10,
"Fabulous_ortho": "Ruhe",
"Fabulous_DR_target": 0.}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
if self.verbose:
print("Starting pymaphys setup")
driver = pymaphys.Maphys('d')
driver.set_comm(A.dd.comm)
driver.initialize()
# * Output
# ** Text output
if self.verbose:
print("CNTL")
driver.ICNTL[4] = 5 # Errors, Warnings, Statistics
driver.ICNTL[5] = 1 # CNTL
driver.ICNTL[6] = 1 # INFO
else:
driver.ICNTL[4] = 1
driver.ICNTL[5] = 0
driver.ICNTL[6] = 0
# ** Table output
driver.ICNTL[14] = "1" if self.output is "Org" else 0
# * Input
d = {"Centralized": 1, "Distributed": 2, "FromDump": 3}
driver.ICNTL[43] = d[self.input]
driver.ICNTL[49] = 2 if self.use_paddle else 1
# * Analysis
# ** Dump analysis
driver.ICNTL[45] = 1 if self.dump else 0
# ** Scotch imbalance
driver.RCNTL[15] = self.Scotch_imbalance
# * 2 levels
d = {"Not": 0, "Core": 1, "Grouped": 2}
driver.ICNTL[36] = d[self.bind_threads]
driver.ICNTL[37] = self.nNodes
driver.ICNTL[38] = self.nCoresPerNode
driver.ICNTL[39] = self.nThrdsPerProc
driver.ICNTL[40] = self.nProcs
driver.ICNTL[42] = 1 if self.twoLevel else 0
# * Sparse Direct Solver
d = {"Mumps": 1, "Pastix": 2}
driver.ICNTL[13] = d[self.local_solver]
# ** Schur method
d = {"Auto": 0, "Explicit": 1, "Implicit": 2}
driver.ICNTL[27] = d[self.Schur]
# ** Mumps
driver.ICNTL[47] = self.Mumps_ws
driver.ICNTL[50] = self.Mumps_ws_add
driver.RCNTL[5] = self.Mumps_ws_mul
# * Preconditioner
d = {"Dense": 1, "Sparse": 2, "None": 4, "DenseCGC": 10}
driver.ICNTL[21] = d[self.M]
# ** Sparse
driver.RCNTL[11] = self.Sparsify_threshold
# ** CGC
driver.ICNTL[33] = self.coarse_nv
d = {"MumpsDist": 0, "Lapack": 1, "SDS": 2, "Replicated": 3}
driver.ICNTL[51] = d[self.coarse_solver]
driver.ICNTL[52] = self.coarse_nProcs
# * Krylov library
d = {"PackGMRES": 1, "PackCG": 2, "Auto": 3, "Fabulous": 4}
driver.ICNTL[20] = d[self.interface_solver]
# Max number of iterations
driver.ICNTL[24] = self.maxiter
# Tolerance
driver.RCNTL[21] = self.tol
# Scaled residual
d = {"Interface": 0, "Global": 1, "User": 2}
driver.ICNTL[28] = d[self.Krylov_res_scale]
driver.RCNTL[3] = self.Krylov_stop_alpha
driver.RCNTL[4] = self.Krylov_stop_beta
# ** CG
# CG Stopping criterion
d = {"ScaledRes": 1, "Anorm": 2}
driver.ICNTL[56] = d[self.CG_stop]
driver.ICNTL[57] = self.CG_Anorm_delay
# ** GMRES options
# Orthogonalization scheme for GMRES
d = {"MGS": 0, "IMGS": 1, "CGS": 2, "ICGS": 3}
driver.ICNTL[22] = d[self.GMRES_ortho]
# How to compute the residual at GMRES restart
d = {"Recurrence": 0, "MatVec": 1}
driver.ICNTL[25] = d[self.GMRES_res]
# Maximum size of search space before GMRES restart
driver.ICNTL[26] = self.GMRES_maxiter
# *** Fabulous
d = {"STD": 0, "IB": 1, "QR": 2, "DR": 3, "IBDR": 4,
"QRIB": 5, "QRDR": 6, "QRIBDR": 7, "GCR": 8}
driver.ICNTL[29] = d[self.Fabulous_method]
# Fabulous size of search space for DR
driver.ICNTL[48] = self.Fabulous_DR_nv
driver.RCNTL[2] = self.Fabulous_DR_target
# Fabulous Orthogonalization
d = {"Ruhe": 0, "Block": 1}
driver.ICNTL[55] = d[self.Fabulous_ortho]
if isinstance(A, DistMatrix):
if self.verbose:
print("Distributed interface")
dd = A.dd.interface()
myinterface = dd.global_indices
neighbors = dd.neighbors
myindexvi = [k for k in neighbors.keys()]
myptrindexvi = np.cumsum([0] +
[len(v) for v in neighbors.values()])
myindexintrf = np.concatenate([v for v in neighbors.values()])
self.A_internal = self.A.local
print(A.dd.rank, myinterface, myindexvi, myptrindexvi, myindexintrf)
driver.distributed_interface(myinterface, myindexvi,
myptrindexvi, myindexintrf)
else:
self.A_internal = self.A
# Symmetry
driver.mphs.sym = self.symmetry
if self.symmetry>0:
driver.mphs.sym = 3 - driver.mphs.sym # 0 Ge, 1 SPD, 2 Sym
self.A_internal = ssp.tril(self.A_internal)
driver.set_A(self.A_internal)
driver.set_RHS(np.ones(self.A_internal.shape[0]))
self.driver = driver
#driver.drive(5) # Ana + Facto + Pcd
@TimeIt()
def solve(self, b):
rhs = b.dd.D.dot(b).local
#if self.Krylov_res_scale=="Global": #BUGFIX
# self.driver.ICNTL[28] = 2 # User
# self.driver.RCNTL[3] = 0.
# self.driver.RCNTL[4] = np.sqrt(b.T.dot(b)[0,0])
if isinstance(b, DistVector):
self.driver.set_RHS(b.dd.D.dot(b).local)
shape = b.local.shape
else:
self.driver.set_RHS(b)
shape = b.shape
self.driver.drive(6) # Solve
x = self.driver.get_solution()
x.shape = shape
if isinstance(b, DistVector):
x = DistVector(x, b.dd)
return x
Preconditioners
Abstract Schwarz (AS)
class LinearOperator:
def __init__(self, f):
self.f = f
self.setup_performed=False
def dot(self, b):
return self.f(b)
class AbstractSchwarz(Solver, DistMatrix):
defaults = {"use_D": False,
"method_D": "multiplicity",
"assemble": True,
"robin": None,
"local_solver": SpFacto,
"perform_setup": True}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
# Assembly step for additive Schwarz
if self.assemble:
Aih = A.dd.assemble(A.local, dim=2)
else:
Aih = A.local
# Partition of unity for Neumann Neumann
if self.use_D:
if self.method_D is None:
self.D = self.dd.D
elif self.method_D=="multiplicity":
mult = np.ones(A.dd.ni, dtype=np.int)
for i in A.dd.neighbors.values():
mult[i] += 1
self.D = ssp.diags(1./mult)
elif self.method_D=="coeff_matrix":
diagA = A.local.diagonal()
diagA_ = A.dd.assemble(diagA)
self.D = ssp.diags(diagA/diagA_)
D_inv = ssp.diags(1./self.D.diagonal().clip(1e-12))
Aih = D_inv.dot(D_inv.dot(Aih.T).T) # problem dense.dot(sparse)
# Add a matrix on the interface for Robin
if self.robin is not None:
neighbors = A.dd.neighbors
data = np.zeros(A.dd.ni)
for n in self.robin:
interface = neighbors[n]
data[interface] += self.robin[n]
Aih += ssp.diags(data)
self.Aih = Aih
if self.perform_setup:
self.local_solver.setup(Aih)
DistMatrix.__init__(self, self.local_solver, A.dd)
@TimeIt()
def solve(self, b):
return DistMatrix.dot(self, b)
class AdditiveSchwarz(AbstractSchwarz):
defaults = AbstractSchwarz.defaults.copy()
defaults.update({"use_D": False,
"assemble": True})
class NeumannNeumann(AbstractSchwarz):
defaults = AbstractSchwarz.defaults.copy()
defaults.update({"use_D": True,
"method_D": "multiplicity",
"assemble": False,
"local_solver":Pinv})
class Robin(AbstractSchwarz):
"""Robin.robin should be a dictionnary {rank_of_neighbor_i:
robin_coefficient_i}
"""
defaults = AbstractSchwarz.defaults.copy()
defaults.update({"use_D": False,
"assemble": False,
"robin": {}})
Coarse
class CoarseSolve(Solver):
defaults = {"Wi": None,
"global_solver": Mumps}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
dd = A.dd
if self.Wi is None:
mult = np.ones((A.dd.ni, 1), dtype=np.int)
for i in A.dd.neighbors.values():
mult[i, 0] += 1
self.Wi = 1./mult
Wi = self.Wi
self.parameters["D"] = Wi.T
with TimeIt("MPI assemble"):
send_messages = {n: [Wi.shape[1], # number of coarse vectors
Wi[i], # coarse vectors
[k for k in dd.neighbors]] # neighbors
for n,i in dd.neighbors.items()}
recv_messages = dd.neighbor_alltoall(send_messages)
# We add the subdomain to the dictionary
recv_messages[dd.rank] = [np.array(Wi.shape[1]), # number of coarse vectors
Wi.ravel(), # coarse vectors
np.array([n for n in dd.neighbors])] # neighbors
# Now, for each subdomain in recv_messages, we add the corresponding vectors to Wi_
# and we update neighbors0
neighbors0 = dict()
ni0 = sum(a[0].item() for a in recv_messages.values()) # number of coarse dofs
Wi_ = np.zeros((A.dd.ni, ni0))
counter = 0
for j in sorted(recv_messages):
n_j, Wj, neighbors_j = recv_messages[j]
n_j = n_j.item()
if n_j > 0:
if j in dd.neighbors:
indices = dd.neighbors[j]
else:
indices = slice(None)
Wi_[indices, counter:counter+n_j] = Wj.reshape((-1, n_j))
for k in np.append(neighbors_j, j):
if k!=dd.rank:
neighbors0.setdefault(k, []).extend(range(counter, counter+n_j))
counter += n_j
self.dd = dd
self.dd0 = DomainDecomposition(ni0, neighbors0, dd.comm)
self.Wi_ = Wi_
with TimeIt("coarse computeA0"):
self.AiWi_ = A.local.dot(Wi_)
A0i = Wi_.T.dot(self.AiWi_)
self.A0 = DistMatrix(A0i, self.dd0)
self.global_solver.setup(self.A0)
self.A0_inv = self.global_solver
self.setup_performed=True
@TimeIt()
def solve(self, b, project=False):
dd = self.dd
dd0 = self.dd0
Wi = self.Wi_
if project:
b0i = self.AiWi_.T.dot(b.local)
else:
b0i = Wi.T.dot(dd.D*b.local)
b0 = DistVector(b0i, dd0, assembled=False)
x0 = self.A0_inv.dot(b0)
x = Wi.dot(x0.local)
return DistVector(x, dd)
def __add__(self, other):
return SumOperator(B=other, C=self)
def __radd__(self, other):
return self + other
Combinations
class SumOperator(Solver):
defaults = {"B": AdditiveSchwarz,
"C": CoarseSolve}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
if not getattr(self.B, "setup_performed", True):
self.B.setup(A)
if not getattr(self.C, "setup_performed", True):
self.C.setup(A)
@TimeIt()
def solve(self, b):
return self.B.dot(b) + self.C.dot(b)
def __str__(self):
return(str(self.B) + " + " + str(self.C))
Deflated
A = ∑ Ri.T Ai Ri Wi_ = Ri W W.T A W = sum Wi_.T Ai Wi_ = A0
M0 = W (W.T A W).I W.T = W A0.I W.T
Ri M0 b = Ri W A0.I W.T b = Wi_ A0.I
Ri M0 A b = Ri W A0.I W.T A b = Wi_ A0.I sum_j W.T Rj.T Aj Rj b = Wi_ A0.I sum_j Wj_.T Aj bj = Wi_ A0.I assemble(Wi_.T Ai bi)
Ri A M0 b = Ri A W A0.I W.T b = Ri sum Rj.T Aj Rj W A0.I W.T b = Ri sum Rj.T Aj Wj_ A0.I Wj_.T bj = assemble(Aj Wj_ A0.I Wj_.T bj)
class DeflatedPcd(Solver):
defaults = {"M0": CoarseSolve,
"M1": AdditiveSchwarz}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
if not getattr(self.M0, "setup_performed", True):
self.M0.setup(A)
if not getattr(self.M1, "setup_performed", True):
self.M1.setup(A)
@TimeIt()
def solve(self, b):
y = b if self.M1 is None else self.M1.dot(b)
return y - self.M0.solve(y, project=True)
@TimeIt()
def x0(self, b):
return self.M0.dot(b)
Init
class InitPcd(Solver):
defaults = {"M0": CoarseSolve,
"M1": AdditiveSchwarz}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
if not getattr(self.M0, "setup_performed", True):
self.M0.setup(A)
if not getattr(self.M1, "setup_performed", True):
self.M1.setup(A)
@TimeIt()
def solve(self, b):
return b if self.M1 is None else self.M1.dot(b)
@TimeIt()
def x0(self, b):
return self.M0.dot(b)
eigen
def eigen(A, B, B_I=None, n_v=None, dense=True, local_solver=None):
if dense or n_v is None:
with TimeIt("eigen_dense"):
if ssp.issparse(A) and ssp.issparse(B):
w, v = la.eigh(A.A, B.A)
else:
w, v = la.eigh(A, B)
if n_v is None:
n_v = A.shape[0]
else:
n_v = min(A.shape[0], n_v)
w, v = w[:n_v], v[:, :n_v]
else:
try:
with TimeIt("eigen_sparse"):
if B_I is None and local_solver is not None:
B_I = local_solver
B_I.setup(B)
w, v = sla.eigsh(A, n_v, which='SM', M=B, Minv=B_I)
except (sla.ArpackNoConvergence, sla.ArpackError) as err:
print(err, '=> dense computation')
w, v = eigen(A, B, B_I, n_v, dense=True)
return w, v
genEO_space
def genEO_space(M1, alpha=10, beta=10, n_v=None, local_solver=None):
dense = not ssp.issparse(M1.A.local)
if isinstance(M1, NeumannNeumann):
NN = M1
w1, v1 = np.zeros((0)), np.zeros((M1.Aih.shape[0], 0))
else:
NN = NeumannNeumann(M1.A, perform_setup=False)
w1, v1 = eigen(A=NN.Aih, B=M1.Aih, B_I=M1.local_solver,
n_v=n_v, dense=dense)
if alpha > 0:
w1 = w1[w1*alpha < 1]
v1 = v1[:, :w1.shape[0]]
if isinstance(M1, AdditiveSchwarz):
AS = M1
w2, v2 = np.zeros((0)), np.zeros((M1.Aih.shape[0], 0))
beta = alpha
else:
AS = AdditiveSchwarz(M1.A, perform_setup=False)
w2, v2 = eigen(A=M1.Aih, B=AS.Aih, n_v=n_v, dense=dense,
local_solver=local_solver)
if beta > 0:
w2 = w2[w2*beta < 1]
v2 = v2[:, :w2.shape[0]]
w, v = np.hstack((w1, w2)), np.hstack((v1, v2))
x = np.argsort(w)
if n_v is not None and x.shape[0] > n_v:
x = x[:n_v]
return v[:, x]
GenEO
class GenEO(Solver):
defaults = {"M1": AdditiveSchwarz,
"local_solver": Mumps,
"global_solver": Mumps,
"deflated": True,
"alpha": 10,
"beta": 10,
"n_v": 5}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
if not getattr(self.M1, "setup_performed", True):
self.M1.setup(A)
with TimeIt("GenEO eigen") as t:
self.Wi = genEO_space(self.M1, self.alpha, self.beta,
self.n_v, self.local_solver)
self.M0 = CoarseSolve(A, Wi=self.Wi,
global_solver=self.global_solver)
self.M0.parameters["Wi"] = "GenEO"
self.parameters["t_GenEO_eigen"] = t.duration
self.parameters["M0"] = self.M0
if self.deflated:
self.pcd = DeflatedPcd(A, M0=self.M0, M1=self.M1)
self.x0 = self.pcd.x0
else:
self.pcd = SumOperator(A, B=self.M0, C=self.M1)
@TimeIt()
def solve(self, b):
return self.pcd.solve(b)
MPSD
@TimeIt()
def mpsd(A, b, x0=None, tol=1e-5, maxiter=None, xtype=None, M=None, callback=None, global_solver=Mumps):
bb = b.T.dot(b)
maxiter = len(b) if maxiter is None else maxiter
if x0 is None:
x = 0 * b
r = b.copy()
else:
x = x0.copy()
r = b - A.dot(x0)
p = np.zeros((b.local.shape[0], maxiter))
for i in range(maxiter):
p[:, i] = M.local.dot(r.local)[:, 0]
#p[:, :i+1] = la.orth(p[:, :i+1])
if b.dd.rank==20:
print(p[:, :i+1])
alphaP = CoarseSolve(A, Wi=p[:, i:i+1]).dot(r)
x += alphaP
r -= A.dot(alphaP)
if callback:
#callback(p[:,i])
callback(x)
if r.T.dot(r) <= tol * tol * bb:
return x, 0
return x, i
class MPSD(Solver):
defaults = {"x0": None,
"tol": 1e-5,
"maxiter": None,
"M": None,
"callback": None,
"global_solver": Mumps,
"setup_M": True}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
# We setup the preconditioner if possible and asked for by the user
if self.setup_M:
if hasattr(self.M, "setup"):
self.M.setup(A)
@TimeIt()
def solve(self, b):
# Call the standalone cg function
x, self.i = mpsd(self.A, b, self.x0, self.tol, self.maxiter,
None, self.M, self.callback, self.global_solver)
return x
Robin-Robin
class DistVector2(DistVector):
"Like DistVector, but without the weight in the dot product"
@TimeIt()
def dot(self, other):
dd = self.dd
if self.transposed: # multiplication along the distributed dimension
assert(self.dd==other.dd)
with TimeIt("Local dot"):
local_dot = self.local.T.dot(other.local)
with TimeIt("MPI reduce"):
global_dot = dd.reduce(local_dot)
return global_dot
else: # multiplication along the global dimension
return DistVector2(self.local.dot(other), dd)
# def __len__(self):
# return self.dd.reduce(len(self.local))
class RobinRobin_operator(Solver):
defaults = {"Ti": 1,
"Sih_solver": SpFacto}
@TimeIt()
def setup(self, A, **kwargs):
Solver.setup(self, A, **kwargs)
# setup Sih solver
self.Sih_solver.setup(A.local + self.Ti*ssp.eye(A.dd.ni))
# compute W
W = {n: np.zeros((A.dd.ni)) for n in A.dd.neighbors}
W_ = np.zeros((A.dd.ni))
for n, i in A.dd.neighbors.items():
W[n][i] = 1 - W_[i]
W[n] = ssp.diags(W[n])
W_[i] = 1
self.W = W
def communicate(self, ui):
dd = self.A.dd
comm = dd.comm
indices = dd.neighbors
# make all Isend calls
req_s = [dd.Isend(ui[i], dest=n, tag=dd.tag) for n, i in indices.items()]
# Probe, receive and treat messages
uj = {}
status = MPI.Status()
with TimeIt("MPI assemble"):
for i in range(len(indices)):
comm.Probe(source=MPI.ANY_SOURCE, status=status, tag=dd.tag)
n = status.Get_source()
v_ = 0 * ui
buf = v_[indices[n]].copy()
buf = dd.Recv(buf, source=n, tag=dd.tag)
v_[indices[n]] = buf
uj[n] = v_
for req in req_s:
req.wait()
dd.tag += 1
return uj
@TimeIt()
def rhs(self, b):
SihI = self.Sih_solver
ui = SihI.dot(self.A.dd.D.dot(b.local))
mui = self.Ti*ui
uj = self.communicate(ui)
muj = self.communicate(mui)
rhs = 0*b.local
for j in self.A.dd.neighbors:
rhs += muj[j]
rhs += self.Ti * self.W[j].dot(uj[j])
return DistVector2(rhs, self.A.dd)
@TimeIt()
def solve(self, l):
li = l.local
SihI = self.Sih_solver
ui = SihI.dot(li)
mui = li - self.Ti*ui
#print(rank, "li", li.T)
#print(rank, "ui", ui.T)
#print(rank, "mui", mui.T)
uj = self.communicate(ui)
muj = self.communicate(mui)
li_ = li.copy()
#print(rank, "RRli", li_.T)
for j in self.A.dd.neighbors:
li_ += muj[j]
#print(rank, "RRli", li_.T)
for j in self.A.dd.neighbors:
li_ -= self.Ti * self.W[j].dot(uj[j])
#print(rank, "uj", uj)
#print(rank, "muj", muj)
#print(rank, "RRli", li_.T)
return DistVector2(li_, self.A.dd)
Misc
Analyze output
Analyze
def my_print(s, file=None):
"""print in a file
if file is None (default), prints to stdout
if file is a file descriptor, print to it
if file is a string, open the file and print to it
"""
fid = file
if isinstance(fid, str):
with open(fid, "w") as fid_out:
print(s, file=fid_out)
else:
print(s, file=fid)
class DevNull(object):
"""Fake file object for my_print"""
def __enter__(self):
return None
def __exit__(self, type, value, traceback):
pass
def analyze_output(solver):
if type(solver) is DistSchur:
t_total_setup = solver.t_DistSchur_setup
t_total_solve = solver.t_DistSchur_solve
t_schur_setup = solver.local_schur.t_Schur_setup
t_schur_solve = solver.t_DistSchur_b2f + solver.t_DistSchur_y2x
cg_solver = solver.interface_solver
elif type(solver) is ConjGrad:
t_total_setup = solver.t_ConjGrad_setup
t_total_solve = solver.t_ConjGrad_solve
t_schur_setup = 0
t_schur_solve = 0
cg_solver = solver
else: #any direct solver
t_total_setup = solver.parameters["t_{}_setup".format(type(solver).__name__)]
t_total_solve = solver.parameters["t_{}_solve".format(type(solver).__name__)]
t_schur_setup = t_total_setup
t_schur_solve = t_total_solve
cg_solver = None
if cg_solver is None:
t_cg_setup = 0
t_cg_solve = 0
M = None
else:
t_cg_setup = cg_solver.t_ConjGrad_setup
t_cg_solve = cg_solver.t_ConjGrad_solve
M = cg_solver.M
if M is None:
t_pcd_setup = 0
t_pcd_solve = 0
M0 = None
M1 = None
else:
t_pcd_setup = M.parameters["t_{}_setup".format(type(M).__name__)]
t_pcd_solve = M.parameters["t_{}_solve".format(type(M).__name__)]
if type(M) in (DeflatedPcd, GenEO):
M0 = M.M0
M1 = M.M1
elif type(M) is SumOperator:
M0 = cg_solver.M.B
M1 = cg_solver.M.C
if type(M1) is CoarseSolve:
M0, M1 = M1, M0
else:
M0 = None
M1 = M
if M0 is None:
t_pcd_coarse_setup = 0
t_pcd_coarse_solve = 0
t_pcd_coarse_eigen = 0
else:
t_pcd_coarse_setup = M0.parameters["t_{}_solve".format(type(M0).__name__)]
t_pcd_coarse_solve = M0.parameters["t_{}_solve".format(type(M0).__name__)]
t_pcd_coarse_eigen = 0
if type(M) == GenEO:
t_pcd_coarse_eigen = M.t_GenEO_eigen
if M1 is None:
t_pcd_local_setup = 0
t_pcd_local_solve = 0
else:
t_pcd_local_setup = M1.parameters["t_{}_setup".format(type(M1).__name__)]
t_pcd_local_solve = M1.parameters["t_{}_solve".format(type(M1).__name__)]
lines = ["{:16s}: {:9s} | {:9s} | {:9s} | {:9s}".format("name", "min", "mean", "max", "std")]
for name, time in [("Total Setup", t_total_setup),
(" Schur Facto", t_schur_setup),
(" Pcd Setup", t_pcd_setup),
(" M0 Eigen", t_pcd_coarse_eigen),
(" M0 Setup", t_pcd_coarse_setup),
(" M1 Setup", t_pcd_local_setup),
("Total Solve", t_total_solve),
(" Schur Solve", t_schur_solve),
(" CG Solve", t_cg_solve),
(" Pcd Solve", t_pcd_solve),
(" M0 Solve", t_pcd_coarse_solve),
(" M1 Solve", t_pcd_local_solve),
("Total", t_total_setup + t_total_solve)]:
time = solver.A.dd.comm.gather(time, root=0)
if solver.A.dd.rank==0:
min_, mean_, max_, sigma = np.min(time), np.mean(time), np.max(time), np.std(time)
lines.append("{:15s} : {:9.7f} | {:9.7f} | {:9.7f} | {:9.7f}".format(name, min_, mean_, max_, sigma))
return "\n".join(lines)
def run(solver, A, b, x_s=None, print_solver=None, print_analysis=None, print_TimeIt=None, print_x=None, loc2glob=None):
# solver object
if print_solver is not None:
my_print(solver, file=print_solver)
A_ = A.copy()
TimeIt.reset()
# Solver setup
solver.setup(A_)
# Solver solve
x = solver.dot(b)
if A.dd.rank==0:
print("Problem solved")
# TimeIt object
if print_TimeIt is not None:
my_print(TimeIt(), file=print_TimeIt)
# solver object
if print_solver is not None:
my_print(solver, file=print_solver)
# A posteriori analysis
if type(solver) is DistSchur:
global_solver = solver.interface_solver
else:
global_solver = solver
n_iter = 1
cond = np.nan
if type(global_solver) is ConjGrad:
n_iter = global_solver.n_iter
if global_solver.ritz and n_iter < 500:
ritz_values = global_solver.ritz_values()
cond = ritz_values.max()/ritz_values.min()
r = A.dot(x) - b
tol = np.sqrt(r.T.dot(r)/(b.T.dot(b)))[0,0]
if x_s is not None:
Anorm = ((x-x_s).T.dot(A.dot(x-x_s)) / (x_s.T.dot(A.dot(x_s))))[0,0]
else:
Anorm = np.nan
output = """n_iter : {}
|A@x - b|_2 / |b|_2 : {:9.3e}
||x-x*||_A / ||x*||_A : {:9.3e}
Condition Number : {:9.3e}
{}""".format(n_iter,
tol,
Anorm,
cond,
str(solver))
analysis = analyze_output(solver)
if A.dd.rank==0:
print(output, analysis, sep="\n\n")
if print_analysis is not None:
my_print("{}\n{}".format(output, analysis),
file=print_analysis)
# x_save
try:
if isinstance(solver, DistSchur):
y_ = solver.interface_solver.x_
x_ = [solver.y2x(y, b).centralize(loc2glob=loc2glob, root=0) for y in y_]
elif isinstance(solver, ConjGrad):
x_ = [x.centralize(loc2glob=loc2glob, root=0) for x in solver.x_]
else:
x_ = None
if A.dd.rank==0 and print_x is not None:
import pickle
if isinstance(print_x, str):
with open(print_x, "w") as f_out:
pickle.dump(x_, f_out)
else:
pickle.dump(x_, print_x)
except AttributeError:
pass
Dump
def dump(A=None, b=None, dd=None, symmetry=False):
import scipy.io as sio
if dd is None:
if A is None:
if b is None:
return
else:
dd = b.dd
else:
dd = A.dd
rank = dd.rank
ddG = dd.interface()
ndof = str(dd.ni)
sizeIntrf = str(ddG.ni)
myInterface = " ".join(str(i+1) for i in ddG.global_indices)
nbVi = str(len(ddG.neighbors))
indexVi = " ".join(str(i) for i in ddG.neighbors)
ptrIndexVi = " ".join(str(i) for i in np.cumsum(
[1,] + [len(v) for v in ddG.neighbors.values()]))
indexIntrf = " ".join(str(i+1) for i in np.concatenate(
[v for v in ddG.neighbors.values()]))
dom = """ndof= {}
sizeIntrf= {}
myInterface= {}
nbVi= {}
indexVi= {}
ptrIndexVi= {}
indexIntrf= {}""".format(ndof, sizeIntrf, myInterface, nbVi, indexVi, ptrIndexVi, indexIntrf)
with open("maphys_local_domain" + str(rank+1) + ".dom", 'w') as f:
print(dom, file=f)
if A is not None:
sym = "symetric" if symmetry else "general"
sio.mmwrite("maphys_local_matrix" + str(rank+1) + ".mtx",
A.local, symmetry=sym)
if b is not None:
sio.mmwrite("maphys_local_rhs" + str(rank+1) + ".mtx", dd.D.dot(b.local))
def read_from_dump(rank):
import scipy.io as sio
Ai = sio.mmread("maphys_local_matrix{}.mtx".format(rank+1))
bi = sio.mmread("maphys_local_rhs{}.mtx".format(rank+1))
with open("maphys_local_domain{}.dom".format(rank+1)) as f:
lines = f.readlines()
ndof = int(lines[0].split('=')[1][:-1])
sizeIntrf = int(lines[1].split('=')[1][:-1])
nNeighbors = int(lines[4].split('=')[1][:-1])
indexVi = [int(i) for i in lines[5].split('=')[1][:-1].split(' ') if i]
ptrIndexVi = [int(i) for i in lines[6].split('=')[1][:-1].split(' ') if i]
lines[7] += " "
indexIntrf = [int(i)-1 for i in lines[7].split('=')[1][:-1].split(' ') if i]
neighbors = {indexVi[i]: np.array([ndof - sizeIntrf + indexIntrf[j-1]
for j in range(ptrIndexVi[i],
ptrIndexVi[i+1])])
for i in range(nNeighbors)}
dd = DomainDecomposition(ndof, neighbors, MPI.COMM_WORLD)
A = DistMatrix(Ai, dd)
bi = bi.toarray() # Assume RHS is dense
b = DistVector(bi, dd, assembled=False)
return (A, b, dd)
Generate Problem
def generate_problem(geom="Cube", ni=30, dim=3, K1=1, K2=1, nLineK=3, comm=None, random_rhs=False, rank=None, nProc=None):
if comm is None:
comm = MPI.COMM_WORLD
if rank is None:
rank = comm.Get_rank()
if nProc is None:
nProc = comm.Get_size()
if geom is "Cube":
if nProc<=3:
nDom = [nProc,] + [1,]*(dim-1)
else:
nDom = [3,] + [1,]*(dim-1)
assert(nProc%3==0)
n = nProc // 3
while n > 1:
assert(n%2==0)
n //= 2
nDom[-1] *= 2
nDom = np.sort(nDom)[::-1]
assert(nProc==np.multiply.reduce(nDom))
elif geom is "Stick":
nDom = (nProc,) + (1,)*(dim-1)
else:
raise ValueError("{} is not a valid geom".format(geom))
local_shape = (ni,) * dim
shape = [ls*n + 1 for ls, n in zip(local_shape, nDom)]
limits, neighbors, interfaces = fem.subdomain(shape, nDom, rank)
loc2glob, glob2loc, interfaces, interface = fem.local_ordering(
shape, limits, neighbors, interfaces)
Ai, bi = fem.stratified_heat(shape, lim=limits,
K1=K1, K2=K2, nLineK=nLineK,
loc2glob=loc2glob, glob2loc=glob2loc)
nei = {n: np.array(i) + len(bi) - len(interface)
for n, i in zip(neighbors, interfaces)}
dd = DomainDecomposition(Ai.shape[0], nei, comm)
A_d = DistMatrix(Ai, dd)
ni = Ai.shape[0]
if random_rhs:
xi = np.random.RandomState(42).rand(ni).reshape([ni, 1])
x_s = DistVector(dd.D.dot(xi), dd, assembled=False)
b_d = A_d.dot(x_s)
else:
x_s = None
b_d = DistVector(bi, dd, assembled=False)
return A_d, b_d, x_s, loc2glob
Dictionary of solvers
def get_solver(name):
solvers = OrderedDict()
solvers["Mumps"] = Mumps()
for Mat in "A", "S":
for M1 in 0, "AS", "NN":
for V0 in 0, -1, 1, 2, 3, 4, 5, 6:
for M0 in "", "D", "+":
# V0 <=> M0
if not M0 and V0 or not V0 and M0:
continue
# NN => "D"
if M1 is "NN" and M0 is not "D":
continue
# AS <= M0 is +
if M0 is "+" and M1 is not "AS":
continue
# AS <= M0 is +
if M1 is 0 and V0 > 0:
continue
if V0:
key = "{}{}({})_{}".format(M1, M0, V0, Mat)
else:
key = "{}{}_{}".format(M1, M0, Mat)
if Mat is "A":
M1_solver = Mumps() # sparse
elif Mat is "S":
M1_solver = SpFacto()
else:
raise ValueError("{} is not a valid Mat".format(Mat))
if M1 is "AS":
M1_ = AdditiveSchwarz(local_solver=M1_solver)
elif M1 is "NN":
M1_ = NeumannNeumann(local_solver=M1_solver)
elif M1 is 0:
M1_ = None
else:
raise ValueError("{} is not a valid M1".format(M1))
if V0 == 0:
M = M1_
elif V0 == -1:
M0_ = CoarseSolve(global_solver=CentralizedSolver(local_solver=Mumps()))
if M0 is "D":
M = DeflatedPcd(M0=M0_, M1=M1_)
elif M0 is "+":
M = SumOperator(B=M0_, C=M1_)
else:
raise ValueError("{} is not a valid M0".format(M0))
else:
M = GenEO(M1=M1_, alpha=0, beta=0, n_v=V0,
deflated=(M0=="D"),
global_solver=CentralizedSolver(local_solver=Mumps()))
solver = ConjGrad(M=M)
if Mat is "S":
solver = DistSchur(local_solver=Mumps(),
interface_solver=solver)
solvers[key] = solver
return solvers[name]
Tests
spack_load
exit()
echo ''
if [ "$spack_loaded" != true ] ; then
. ./spack/share/spack/setup-env.sh
# . /usr/share/modules/init/bash
spack load maphys
spack load py-mumps
spack load -r py-genfem
spack_loaded=true
fi
test mumps
from h2p3s import *
TimeIt.DEBUG = False
n = 5
A = fem.stratified_heat((n,))[0]
xs = (np.arange(n, dtype=float)**2).reshape(n,1)
b = A.dot(xs)
x = Schur(A, interface = [0, 2, 4], local_solver=SpFacto).dot(b)
print(x.T, xs.T)
# x = Schur(A, interface = [0, 2, 4], local_solver=Pastix).dot(b)
# print(x.T, xs.T)
x = Schur(A, interface = [0, 2, 4], local_solver=Mumps).dot(b)
print(x.T, xs.T)
Standalone module
<<imports>>
<<TimeIt>>
<<linearsolver>>
<<dmat>>
<<RobinRobin>>
<<maphys>>
<<asyncdot>>
<<pipelinedCG>>
<<precond>>
<<MPSD>>
<<analyze>>
<<dump>>
<<GenProb>>
<<get_solver>>
<<module>>
<<module>>
Sequential tests
from h2p3s import *
from sys import exc_info
import traceback
TimeIt.DEBUG = False
if __name__ == '__main__':
np.set_printoptions(suppress=True)
n = 10
A_SPD = fem.stratified_heat((n,))[0].tocsc()
A_SPSD = fem.stratified_heat((n+1,), lim=[[1,n]])[0]
A_SPSD = A_SPSD.tocsc()[np.ix_(range(1,n+1), range(1,n+1))]
x_true = (np.arange(n, dtype=float)**2).reshape(n,1)
Schur.defaults["interface"] = [0, 2, 4]
for A in [A_SPD, A_SPSD]:
print("\n\n* A:\n{}".format(A.A))
#print("x_true:\n{}".format(x_true.T))
A.dot = TimeIt()(A.dot)
b = A.dot(x_true)
#print("b:\n{}".format(b.T))
print("\n** Dense\n")
for solver in (SpFacto(),
SpFacto(symmetry=True),
Pinv()):
try:
TimeIt.reset()
solver.setup(A.A)
x = solver.dot(b)
print("***{}".format(solver.toStr(level=2)))
e = x - x_true
e2 = np.sqrt(e.T.dot(e))
r = A.dot(e)
r2 = np.sqrt(r.T.dot(r))
print(" ||e||2 = {}".format(e2[0,0]))
print(" ||r||2 = {}".format(r2[0,0]))
#print(TimeIt())
except Exception as err:
print("*** {}".format(solver.toStr(level=2)))
traceback.print_exc()
print("\n** Sparse\n")
for solver in (SpFacto(),
Pinv(),
Pastix(),
Pastix(symmetry=True),
Mumps(),
ConjGrad(),
ConjGrad(maxiter=2),
ConjGrad(M=Pastix(), maxiter=2),
ConjGrad(M=Mumps(), maxiter=2),
Schur(local_solver=SpFacto()),
Schur(local_solver=Pastix()),
Schur(local_solver=Mumps())):
try:
TimeIt.reset()
solver.setup(A)
x = solver.dot(b)
print("*** {}".format(solver.toStr(level=2)))
e = x - x_true
e2 = np.sqrt(e.T.dot(e))
r = A.dot(e)
r2 = np.sqrt(r.T.dot(r))
print(" ||e||2 = {}".format(e2[0,0]))
print(" ||r||2 = {}".format(r2[0,0]))
#print(TimeIt())
except Exception as err:
print("*** {}".format(solver.toStr(level=2)))
traceback.print_exc()
print("\n* Ritz values\n")
A = A_SPSD
b = A.dot(x_true)
cg = ConjGrad(A, ritz=True, maxiter=500)
x = cg.dot(b)
l = cg.ritz_values()
print("** CG:\n {} {}".format(l.min(), l.max()))
l2 = la.eigh(A.A)[0]
print("** la.eigh:\n {} {}".format(l2.min(), l2.max()))
<<seq_test>>
(org-babel-tangle)
<<spack_load>>
python -i h2p3s_seq.py
Distributed tests
from __future__ import print_function
from h2p3s import *
A_d, b_d, x_s, loc2glob = generate_problem(geom="Cube", ni=4, dim=3, K1=1, K2=1, nLineK=3, random_rhs=True)
# run(ConjGrad(M=AdditiveSchwarz), A_d, b_d, x_s)
s = DistSchur(A_d, interface_solver=None, local_solver=SpFacto)
S = s.S
dd = S.dd
#Si = ssp.eye(dd.ni).A * (rank+1.)
#S = DistMatrix(Si, dd)
f = S.dot(DistVector(np.ones((dd.ni, 1)), dd))
fi = S.dd.D.dot(f.local)
x = ConjGrad(S).dot(f)
r = RobinRobin_operator(S, Ti=.1)
rhs = r.rhs(f)
# print(rank, "x", x.local.T)
# print(rank, "S", S.local)
# print(rank, "f", f.local.T)
# print(rank, "fi", fi.T)
#print(rank, "rhs1", rhs.local.T)
#TimeIt.DEBUG = rank==1
def cb(x):
print(rank, "cb", x.local.T)
#l = DistVector2(S.local.dot(x.local) + x.local - fi, dd)
l = 0*rhs
for i in range(200):
l += .2*(rhs - r.dot(l))
TimeIt.DEBUG = 0
#print(rank, "rhs2", rhs.local.T)
res = r.dot(l) - rhs
print(rank, "res", res.local.T)
x = DistVector(r.Sih_solver.dot(l.local+fi), S.dd)
print(rank, "res", (S.dot(x) - f).local.T)
#
# xs = ConjGrad(S).dot(f)
# li = S.local.dot(xs.local) - S.dd.D.dot(f.local)
#
# rhs_ = r.dot(li)
# print(rank, rhs.T.round(4), rhs_.T.round(4))
#
<<DistTest>>
(org-babel-tangle)
<<spack_load>>
killall python
mpiexec -tag-output -np 4 python h2p3s_mpi.py
#mpiexec -tag-output -np 1 python -i h2p3s_mpi.py : -np 1 python h2p3s_mpi.py
#cat out_solver*
pickle load
<<spack_load>>
python
import pickle
import numpy as np
from h2p3s import *
with open("test") as f_in:
x = pickle.load(f_in)
print(x)
plot(x, x)
Demo
Create a matrix
from h2p3s import *
shape = 5,
n = np.multiply.reduce(shape)
A, b = fem.stratified_heat(shape)
b.shape = n,1
A.A
b
Pastix and Mumps as preconditioners
You can call the CG
x, info = cg(A, b)
x
In the end, the residual has converged to 0
A@x - b
Preconditioned with Pastix
x, info = cg(A, b, M=Pastix(A, verbose=True))
x
or Mumps
x, info = cg(A, b, M=Mumps(A, verbose=True))
x
A Solver is a Matrix
The solver in itself has some matrix-like attributes
solver = ConjGrad(A)
solver.shape
solver.dot(b)
str(solver)
The solver’s behavior can be changed by the parameters
ConjGrad(A, tol=.5).dot(b)
Schur
More complex solvers can be used
solver = Schur(A,
interface=[2,4],
local_solver=SpFacto,
interface_solver=ConjGrad)
str(solver)
solver = Schur(A,
interface=[2,4],
local_solver=Pastix(verbose=True),
interface_solver=ConjGrad)
str(solver)
solver.dot(b)
The Schur matrix is accessible
solver.S
Performance
Facto
n = 30
A_big, b_big = fem.stratified_heat((n, n, n))
TimeIt.reset()
x = Pastix(A_big) @ b_big
TimeIt()
TimeIt.reset()
x = Mumps(A_big) @ b_big
TimeIt()
TimeIt.reset()
x = SpFacto(A_big) @ b_big
TimeIt()
Schur
n = 25
A_big, b_big = fem.stratified_heat((n, n, n))
TimeIt.reset()
x = Schur(A_big, interface=range(n**2), local_solver=Pastix) @ b_big
TimeIt()
TimeIt.reset()
x = Schur(A_big, interface=range(n**2), local_solver=Mumps) @ b_big
TimeIt()
TimeIt.reset()
x = Schur(A_big, interface=range(n**2), local_solver=SpFacto) @ b_big
TimeIt()
Distributed
Create the problem
from h2p3s import *
shape = 30, 30
comm = MPI.COMM_WORLD
nDom = (comm.Get_size(),) + (1,)*(len(shape)-1)
rank = comm.Get_rank()
# Setup the distributed matrix
# Local problem
limits, neighbors, interfaces = fem.subdomain(shape, nDom, rank)
Ai, bi = fem.stratified_heat(shape, lim=limits, rhs_sparse=True, K2=1)
Ri, interfaces, interface = fem.local_ordering(shape, limits, neighbors, interfaces)
Ai = Ri.dot(Ai.dot(Ri.T))
ni = Ai.shape[0]
# Domain Decomposition for A
nei = {n: np.array(i) + ni - len(interface) for n, i in zip(neighbors, interfaces)}
dd = DomainDecomposition(ni, nei, comm)
# Distributed matrix
A_d = DistMatrix(Ai, dd)
x_s = DistVector(np.random.rand(ni).reshape((ni, 1)), dd, assembled=False)
b_d = A_d @ x_s
TimeIt.reset()
Define the solver
solver = DistSchur(A_d,
interface_solver=ConjGrad(M=DeflatedPcd(M1=NeumannNeumann(),
M0=CoarseSolve()),
ritz=True,
tol=1e-8),
local_solver=Pastix(symmetry=2))
x_d = solver @ b_d
Analyze
with open("out_time_{}".format(rank), "w") as f_out:
print(TimeIt(), file=f_out)
cg = solver.interface_solver
n_iter = cg.n_iter
r = A_d @ x_d - b_d
tol = np.sqrt(r.T@r/(b_d.T@b_d))[0,0]
Anorm_x_d = np.sqrt((x_d-x_s).T @ (A_d @ (x_d-x_s)))[0,0]
Anorm_x_s = np.sqrt((x_s).T @ (A_d @ (x_s)))[0,0]
Anorm = Anorm_x_d/Anorm_x_s
ritz_values = cg.ritz_values()
cond = ritz_values.max()/ritz_values.min()
output = """n_iter : {}
|A@x - b|_2 / |b|_2 : {:9.3e}
||x-x*||_A/||x*||_A : {:9.3e}
Condition Number : {:9.3e}
{}""".format(n_iter,
tol,
Anorm,
cond,
str(solver))
if rank==0:
print(output)
Run
mpiexec -np 4 python3 demo_mpi.py
R trace
R trace tminmax
Here we represent the minimum, median and maximum of each timing among processors.
See coarse space
from __future__ import print_function
from h2p3s import *
import matplotlib.pyplot as plt
TimeIt.DEBUG = False
np.set_printoptions(precision=4,suppress=True)
n_couches=4
n_v = 8
nLineK = 5
local_shape = ((n_couches*2-1)*nLineK+1,)*2
comm = MPI.COMM_WORLD
nDom = (comm.Get_size(),) + (1,)*(len(local_shape)-1)
rank = comm.Get_rank()
shape = [n*(ls-1)+1 for n, ls in zip(nDom, local_shape)]
# Distributed A
# Local problem
limits, neighbors, interfaces = fem.subdomain(shape, nDom, rank)
loc2glob, glob2loc, interfaces, interface = fem.local_ordering(shape, limits, neighbors, interfaces)
Ai, bi = fem.stratified_heat(shape, lim=limits, K1=1000000, nLineK=5, loc2glob=loc2glob, glob2loc=glob2loc)
# Domain Decomposition for A
nei = {n: np.array(i) + len(bi) - len(interface) for n, i in zip(neighbors, interfaces)}
dd = DomainDecomposition(Ai.shape[0], nei, comm)
# Distributed matrix
A_d = DistMatrix(Ai, dd)
b_d = DistVector(bi, dd)
x_s = Mumps(A_d).dot(b_d)
def plot(x, scale=True, vmin=-1, vmax=1, cmap="seismic"):
if scale:
M = np.max(x)
m = np.min(x)
if -m > M:
x_ = x/m
else:
x_ = x/M
else:
x_ = x
I = np.argsort(loc2glob)[fem.nodes(local_shape)]
plt.matshow(x_[I].T, cmap=plt.get_cmap(cmap), vmin=vmin, vmax=vmax)
plt.colorbar()
plt.show()
MAS = AdditiveSchwarz(A_d)
G = GenEO(A_d, alpha=0, beta=0, n_v=n_v)
if rank==1:
print("A")
plot(A_d.local.diagonal(), scale=False, vmin=0, vmax=None, cmap="copper")
print("GenEO")
for i in range(n_v):
plot(G.Wi[:, i])
print("MPSD")
solver = MPSD(A_d, M=AdditiveSchwarz, maxiter=n_v, callback=plot)
solver.dot(b_d)
if rank!=1:
exit()
(org-babel-tangle)
<<spack_load>>
mpiexec -np 3 python -i coarse_space.py
#cat out_solver*
open on occigen
(find-file-other-window
"/ssh:occigen:/scratch/cnt0021/ces1926/SHARED/test_spack/install_spack_sharedscratchdir.org")
export INSTALL_DIR=/scratch/cnt0021/ces1926/SHARED/install
#export SPACK_DIR=$HOME/install
#source $SPACK_DIR/spack/share/spack/setup-env.sh
source $INSTALL_DIR/spack_modules.sh
# module load intel/17.0
# module load hwloc/1.11.0
# module load intelmpi/2017.0.098
# module load cmake/3.5.2
# module load parmetis/4.0.3-real64
# spack load maphys
# spack load python
# spack load pastix
#
# export PYTHONPATH=${SHAREDSCRATCHDIR}/libs/lib/python2.7/site-packages/
#
#cd python
python -i ~/python/h2p3s_seq.py
mpirun -np 2 python ~/python/h2p3s_mpi.py
Occigen run
salloc
salloc -N 1 --exclusive --time=00:30:00 --constraint=HSW24
ssh `squeue -u $USER | awk 'END{print $8}'`
init
source $SHAREDSCRATCHDIR/install/spack_modules.sh
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
run.py
from __future__ import print_function
from h2p3s import *
np.set_printoptions(precision=4,suppress=True)
n = 30 # number of elements in each direction of the local domain
d = 3 # number of dimensions
K1 = 1 # conductivity of the layers
K2 = 1
nLineK = 3
sym = 2 # do we take into account the fact that the problem is spd?
Debug = False#rank==0
geom = "Cube" # "Cube" or "Stick"
# GenEO
n_v = 5
alpha = 0
beta = 0
# ConjGrad
maxiter = 5000
tol = 1e-8
true_res = False
TimeIt.max = 2000
######################################################
#OVERRIDE#
######################################################
ConjGrad.defaults["maxiter"] = maxiter
ConjGrad.defaults["ritz"] = True
ConjGrad.defaults["save_x"] = False
ConjGrad.defaults["tol"] = tol
ConjGrad.defaults["debug"] = Debug
ConjGrad.defaults["true_res"] = true_res
GenEO.defaults["alpha"] = alpha
GenEO.defaults["beta"] = beta
GenEO.defaults["n_v"] = n_v
Mumps.defaults["ordering"] = "Scotch"
Mumps.defaults["verbose"] = Debug
TimeIt.DEBUG = Debug
solver = get_solver(solver_name)
if rank==0:
print("Building problem")
A, b, x, loc2glob = generate_problem(geom, n, d, K1, K2, nLineK)
if rank==0:
print("Problem built")
f_TimeIt = "TimeIt_{}".format(rank) if rank < 6 else None
f_solver = "solver" if rank==0 else None
f_analysis = "analysis" if rank==0 else None
if rank==0:
print(solver)
print("Let's go")
run(solver, A, b, x,
loc2glob=loc2glob,
print_solver=f_solver,
print_analysis=f_analysis,
print_TimeIt=f_TimeIt)
job.sh
NPROC=3
EXPE=TEST
SOLVERLIST="Mumps 0_A 0D(-1)_A AS_A ASD(-1)_A AS+(-1)_A ASD(1)_A AS+(1)_A ASD(2)_A AS+(2)_A ASD(3)_A AS+(3)_A ASD(4)_A AS+(4)_A ASD(5)_A AS+(5)_A ASD(6)_A AS+(6)_A NND(-1)_A NND(1)_A NND(2)_A NND(3)_A NND(4)_A NND(5)_A NND(6)_A 0_S 0D(-1)_S AS_S ASD(-1)_S AS+(-1)_S ASD(1)_S AS+(1)_S ASD(2)_S AS+(2)_S ASD(3)_S AS+(3)_S ASD(4)_S AS+(4)_S ASD(5)_S AS+(5)_S ASD(6)_S AS+(6)_S NND(-1)_S NND(1)_S NND(2)_S NND(3)_S NND(4)_S NND(5)_S NND(6)_S"
SOLVERLIST="NND(-1)_A"
#mpirun -np $NPROC python_mpi -c "from __future__ import print_function; from h2p3s import *"
for GEOM in Stick
do
for K in 1
do
for SOLVER in $SOLVERLIST
do
DIR=$SCRATCHDIR/$EXPE/${GEOM}_${K}/$SOLVER/$NPROC
test -e $DIR/DONE && echo "$DIR: already OK" && continue || echo "$DIR:"
mkdir -p $DIR
cd $DIR
OVERRIDE="geom = \"$GEOM\"\nK1 = $K\nsolver_name = \"$SOLVER\"\nmaxiter = 250\nDebug = rank==0\ntrue_res=True"
sed "s/#OVERRIDE#/${OVERRIDE}/" $HOME/python/run.py > ./script.py
cp $SHAREDSCRATCHDIR/libs/lib/python2.7/site-packages/h2p3s.py ./
echo "* GEOM: $GEOM K: $K SOLVER: $SOLVER"
timeout 1m time -f "walltime: %e" mpirun -np $NPROC -l python_mpi ./script.py &> out && touch ./DONE && echo " OK" || echo " KO"
cd - > /dev/null
done
done
done
sshfs
rm -f occigen_scratch
mkdir occigen_scratch
sshfs occigen:/scratch/cnt0021/ces1926/poirell occigen_scratch
tar gz
tar czvf $HOME/resultsTEST.tar.gz $SCRATCHDIR/TEST
scalability
cd $SCRATCHDIR
EXPE=TEST
line="geom, K, solver, M1, M0, n_v, mat, n_nodes, n_dom, n_iter, be, cond, t_setup, t_schur_setup, t_pcd_setup, t_M0_eigen, t_M0_setup, t_M1_setup, t_solve, t_schur_solve, t_CG_solve, t_pcd_solve, t_M0_solve, t_M1_solve, t_total, t_walltime"
echo $line
echo $line > $EXPE/results.csv
SOLVERLIST="Mumps 0_A 0D(-1)_A AS_A ASD(-1)_A AS+(-1)_A ASD(1)_A AS+(1)_A ASD(2)_A AS+(2)_A ASD(3)_A AS+(3)_A ASD(4)_A AS+(4)_A ASD(5)_A AS+(5)_A ASD(6)_A AS+(6)_A NN_A NND(-1)_A NND(1)_A NND(2)_A NND(3)_A NND(4)_A NND(5)_A NND(6)_A 0_S 0D(-1)_S AS_S ASD(-1)_S AS+(-1)_S ASD(1)_S AS+(1)_S ASD(2)_S AS+(2)_S ASD(3)_S AS+(3)_S ASD(4)_S AS+(4)_S ASD(5)_S AS+(5)_S ASD(6)_S AS+(6)_S NN_S NND(-1)_S NND(1)_S NND(2)_S NND(3)_S NND(4)_S NND(5)_S NND(6)_S"
for GEOM in Stick
do
for K in 1 100 10000
do
for NPROC in 2 3 6 12 24
#2 3 6 12 24 48 96 192 384 768 1536 3072 6144 12288
do
for M1 in 0 AS NN
do
for M0 in "" "D" "+"
do
for NV in "" "(-1)" "(1)" "(2)" "(3)" "(4)" "(5)" "(6)"
do
for MAT in A S
do
SOLVER="$M1$M0${NV}_$MAT"
DIR="$EXPE/${GEOM}_${K}/$SOLVER/$NPROC"
test -e $DIR/DONE || continue
n_nodes=`python -c "print(-(-$NPROC//24))"`
n_dom=$NPROC
n_iter=`awk '/n_iter / {print $3}' $DIR/analysis`
be=`awk '/\|A@x - b\|_2/ {print $7}' $DIR/analysis`
cond=`awk '/Condition Number/ {print $4}' $DIR/analysis`
t_setup=`awk '/Total Setup/ {print $8}' $DIR/analysis`
t_schur_setup=`awk '/Schur Facto/ {print $8}' $DIR/analysis`
t_pcd_setup=`awk '/Pcd Setup/ {print $8}' $DIR/analysis`
t_M0_eigen=`awk '/M0 Eigen/ {print $8}' $DIR/analysis`
t_M0_setup=`awk '/M0 Setup/ {print $8}' $DIR/analysis`
t_M1_setup=`awk '/M1 Setup/ {print $8}' $DIR/analysis`
t_solve=`awk '/Total Solve/ {print $8}' $DIR/analysis`
t_schur_solve=`awk '/Schur Solve/ {print $8}' $DIR/analysis`
t_CG_solve=`awk '/CG Solve/ {print $8}' $DIR/analysis`
t_pcd_solve=`awk '/Pcd Solve/ {print $8}' $DIR/analysis`
t_M0_solve=`awk '/M0 Solve/ {print $8}' $DIR/analysis`
t_M1_solve=`awk '/M1 Solve/ {print $8}' $DIR/analysis`
t_total=`awk '/Total / {print $7}' $DIR/analysis`
t_walltime=`awk '/walltime/ {print $2}' $DIR/out`
line="$GEOM, $K, $SOLVER, $M1, $M0, $NV, $MAT, $n_nodes, $n_dom, $n_iter, $be, $cond, $t_setup, $t_schur_setup, $t_pcd_setup, $t_M0_eigen, $t_M0_setup, $t_M1_setup, $t_solve, $t_schur_solve, $t_CG_solve, $t_pcd_solve, $t_M0_solve, $t_M1_solve, $t_total, $t_walltime"
echo $line
echo $line >> $EXPE/results.csv
done
done
done
done
done
done
done
cd - > /dev/null
R scalability
R n_iter
R trace tminmax
Example pypastix
import pypastix as pastix
import scipy.sparse as sps
import scipy.linalg as la
import numpy as np
# Hack to make sure that the mkl is loaded
tmp = np.eye(2).dot(np.ones(2))
# Set matrix A from Scipy Sparse matrices
n = 30
A = sps.spdiags([np.ones(n)*i for i in [4., -1, -1]],
[0, 1, -1], n, n)
x0 = np.arange(n).reshape(n,1)
# Construct b as b = A * x_0
b = A.dot(x0)
x = b.copy()
# Initialize the Schur list as the first third of the elements
nschur = min( int(n / 3), 5000 )
schurlist = np.arange(nschur)
for factotype in (1, 2, 3):
solver = pastix.solver(verbose=0, factotype=factotype)
solver.schur(A, schurlist)
S = solver.S
f = solver.schur_forward(b)
y = la.solve(S, f, sym_pos=True, lower=True)
x = solver.schur_backward(y, b, x0=x0, check=True)
#print(f, y, x)
test pastix
import pypastix as pastix
import scipy.sparse as sps
import scipy.linalg as la
import numpy as np
# Hack to make sure that the mkl is loaded
tmp = np.eye(2).dot(np.ones(2))
# Set matrix A from Scipy Sparse matrices
for n in range(30, 100):
print(n)
A = sps.spdiags([np.ones(n)*i for i in [4., -1, -1]],
[0, 1, -1], n, n)
x0 = np.arange(n).reshape(n,1)
# Construct b as b = A * x_0
b = A.dot(x0)
x = b.copy()
# Initialize the Schur list as the first third of the elements
nschur = min( int(n / 3), 5000 )
schurlist = np.arange(nschur)
solver = pastix.solver(verbose=1, thread_nbr=1)
solver.schur(A, schurlist)
S = solver.S
print(S)
f = solver.schur_forward(b)
y = la.solve(S, f, sym_pos=True, lower=True)
x = solver.schur_backward(y, b, x0=x0, check=False)
import pypastix as pastix
import scipy.sparse as sps
import numpy as np
# Set the problem
for n in range(5, 100):
print(n)
A = sps.spdiags([np.ones(n)*i for i in [4, -1, -1, -1, -1]],
[0, 1, 3, -1, -3], n, n)
x0 = np.ones(n)
b = A.dot(x0)
# Hack to make sure that the mkl is loaded
tmp = np.eye(2).dot(np.ones(2))
# Factorize
solver = pastix.solver(A, verbose=False, thread_nbr=1)
# Solve
x = solver.solve(b, x0=x0, check=True)
solver.finalize()
test robin
from h2p3s import *
A, b = fem.stratified_heat((11,))
s = Schur(A, interface=np.arange(3, 8), local_solver=SpFacto)
s.S
Dict
from __future__ import print_function
from h2p3s import *
SOLVERLIST="Mumps 0_A 0D(-1)_A AS_A ASD(-1)_A AS+(-1)_A ASD(1)_A AS+(1)_A ASD(2)_A AS+(2)_A ASD(3)_A AS+(3)_A ASD(4)_A AS+(4)_A ASD(5)_A AS+(5)_A ASD(6)_A AS+(6)_A NND(-1)_A NND(1)_A NND(2)_A NND(3)_A NND(4)_A NND(5)_A NND(6)_A 0_S 0D(-1)_S AS_S ASD(-1)_S AS+(-1)_S ASD(1)_S AS+(1)_S ASD(2)_S AS+(2)_S ASD(3)_S AS+(3)_S ASD(4)_S AS+(4)_S ASD(5)_S AS+(5)_S ASD(6)_S AS+(6)_S NND(-1)_S NND(1)_S NND(2)_S NND(3)_S NND(4)_S NND(5)_S NND(6)_S"
for s in " ".split(SOLVERLIST):
print(s)
Test dist Robin
python
from h2p3s import *
A, b, x_s, loc2glob = generate_problem(geom="Stick", n=4, d=1, K1=1, K2=1, nLineK=3, random_rhs=False)
solver = ConjGrad(M=AdditiveSchwarz)
solver = DistSchur(interface_solver=ConjGrad(M=AdditiveSchwarz),
local_solver=SpFacto)
print(solver)
solver.setup(A)
print(rank, solver.S.local)
print(rank, solver.interface_solver.M.Aih)
run(solver, A, b, x_s)
sh
mpirun -np 4 python test_robin_mpi.py