diff --git a/setup.py b/setup.py index 561409a5300176b0af867c4037e925cf7e6b01ee..1ded557bfa9f8f4fa3db1a8b9d577019580293ac 100644 --- a/setup.py +++ b/setup.py @@ -191,9 +191,8 @@ opts = dict( url='https://thoth.inrialpes.fr/people/mairal/spams/', license='GPLv3', python_requires='>=3', - install_requires=['Cython>=0.29', 'numpy>=1.12', - 'Pillow>=6.0', 'scipy>=1.0', 'six>=1.12'], - packages=['myscipy_rand', 'spams_wrap', 'spams', 'spams.tests'], + install_requires=['numpy>=1.12', 'Pillow>=6.0', 'scipy>=1.0'], + packages=['spams_wrap', 'spams', 'spams.tests'], cmdclass={'build_ext': CustomBuildExtCommand}, ext_modules=get_extension(), package_data={ diff --git a/spams/spams.py b/spams/spams.py index 72116046d668bfb0ddfce2531f8ddb3734af181d..143a012890daa8509338be3545945ecdfd8d7f41 100644 --- a/spams/spams.py +++ b/spams/spams.py @@ -3,9 +3,6 @@ This module makes some functions of the SPAMS library usable with numpy and scipy. """ -from __future__ import absolute_import, division, print_function -import six.moves - import spams_wrap import numpy as np import scipy.sparse as ssp @@ -37,16 +34,16 @@ def sort(X, mode=True): def calcAAt(A): """ - Compute efficiently AAt = A*A', when A is sparse + Compute efficiently AAt = A*A', when A is sparse and has a lot more columns than rows. In some cases, it is up to 20 times faster than the equivalent python expression AAt=A*A'; Args: - A: double sparse m x n matrix + A: double sparse m x n matrix Returns: - AAt: double m x m matrix + AAt: double m x m matrix Authors: Julien MAIRAL, 2009 (spams, matlab interface and documentation) @@ -64,16 +61,16 @@ def calcAAt(A): def calcXAt(X, A): """ - Compute efficiently XAt = X*A', when A is sparse and has a - lot more columns than rows. In some cases, it is up to 20 times + Compute efficiently XAt = X*A', when A is sparse and has a + lot more columns than rows. In some cases, it is up to 20 times faster than the equivalent python expression; Args: X: double m x n matrix - A: double sparse p x n matrix + A: double sparse p x n matrix Returns: - XAt: double m x p matrix + XAt: double m x p matrix Authors: Julien MAIRAL, 2009 (spams, matlab interface and documentation) @@ -110,10 +107,10 @@ def calcXY(X, Y): Args: X: double m x n matrix - Y: double n x p matrix + Y: double n x p matrix Returns: - Z: double m x p matrix + Z: double m x p matrix Authors: Julien MAIRAL, 2009 (spams, matlab interface and documentation) @@ -130,10 +127,10 @@ def calcXYt(X, Y): Args: X: double m x n matrix - Y: double p x n matrix + Y: double p x n matrix Returns: - Z: double m x p matrix + Z: double m x p matrix Authors: Julien MAIRAL, 2009 (spams, matlab interface and documentation) @@ -150,10 +147,10 @@ def calcXtY(X, Y): Args: X: double n x m matrix - Y: double n x p matrix + Y: double n x p matrix Returns: - Z: double m x p matrix + Z: double m x p matrix Authors: Julien MAIRAL, 2009 (spams, matlab interface and documentation) @@ -170,11 +167,11 @@ def bayer(X, offset): There are four possible offsets. Args: - X: double m x n matrix - offset: scalar, 0,1,2 or 3 + X: double m x n matrix + offset: scalar, 0,1,2 or 3 Returns: - Y: double m x m matrix + Y: double m x m matrix Authors: Julien MAIRAL, 2009 (spams, matlab interface and documentation) @@ -189,7 +186,7 @@ def bayer(X, offset): def conjGrad(A, b, x0=None, tol=1e-10, itermax=None): """ - Conjugate gradient algorithm, sometimes faster than the + Conjugate gradient algorithm, sometimes faster than the equivalent python function solve. In order to solve Ax=b; Args: @@ -224,10 +221,10 @@ def invSym(A): returns the inverse of a symmetric matrix A Args: - A: double n x n matrix + A: double n x n matrix Returns: - B: double n x n matrix + B: double n x n matrix Authors: Julien MAIRAL, 2009 (spams, matlab interface and documentation) @@ -246,10 +243,10 @@ def normalize(A): unit l2-norm. Args: - X: double m x n matrix + X: double m x n matrix Returns: - Y: double m x n matrix + Y: double m x n matrix Authors: Julien MAIRAL, 2010 (spams, matlab interface and documentation) @@ -279,7 +276,7 @@ PENALTY2 = spams_wrap.PENALTY2 def sparseProject(U, thrs=1.0, mode=1, lambda1=0.0, lambda2=0.0, lambda3=0.0, pos=0, numThreads=-1): """ - sparseProject solves various optimization + sparseProject solves various optimization problems, including projections on a few convex sets. It aims at addressing the following problems @@ -289,11 +286,11 @@ def sparseProject(U, thrs=1.0, mode=1, lambda1=0.0, lambda2=0.0, 2) when mode=2 min_v ||u-v||_2^2 s.t. ||v||_2^2 + lamuda1||v||_1 <= thrs 3) when mode=3 - min_v ||u-v||_2^2 s.t ||v||_1 + 0.5lamuda1||v||_2^2 <= thrs + min_v ||u-v||_2^2 s.t ||v||_1 + 0.5lamuda1||v||_2^2 <= thrs 4) when mode=4 min_v 0.5||u-v||_2^2 + lamuda1||v||_1 s.t ||v||_2^2 <= thrs 5) when mode=5 - min_v 0.5||u-v||_2^2 + lamuda1||v||_1 +lamuda2 FL(v) + ... + min_v 0.5||u-v||_2^2 + lamuda1||v||_1 +lamuda2 FL(v) + ... 0.5lamuda_3 ||v||_2^2 where FL denotes a "fused lasso" regularization term. 6) when mode=6 @@ -301,7 +298,7 @@ def sparseProject(U, thrs=1.0, mode=1, lambda1=0.0, lambda2=0.0, 0.5lamuda3||v||_2^2 <= thrs When pos=true and mode <= 4, - it solves the previous problems with positivity constraints + it solves the previous problems with positivity constraints Args: U: double m x n matrix (input signals) @@ -329,7 +326,7 @@ def sparseProject(U, thrs=1.0, mode=1, lambda1=0.0, lambda2=0.0, Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting + - single precision setting """ @@ -348,7 +345,7 @@ def lasso(X, D=None, Q=None, q=None, return_reg_path=False, L=-1, lambda1=None, max_length_path=-1, verbose=False, cholesky=False): """ lasso is an efficient implementation of the - homotopy-LARS algorithm for solving the Lasso. + homotopy-LARS algorithm for solving the Lasso. If the function is called this way spams.lasso(X,D = D, Q = None,...), @@ -377,7 +374,7 @@ def lasso(X, D=None, Q=None, q=None, return_reg_path=False, L=-1, lambda1=None, Q: p x p double matrix (Q = D'D) q: p x n double matrix (q = D'X) verbose: verbose mode - return_reg_path: + return_reg_path: if true the function will return a tuple of matrices. Kwargs: @@ -392,7 +389,7 @@ def lasso(X, D=None, Q=None, q=None, return_reg_path=False, L=-1, lambda1=None, numThreads: (optional, number of threads for exploiting multi-core / multi-cpus. By default, it takes the value -1, which automatically selects all the available CPUs/cores). - cholesky: (optional, default false), choose between Cholesky + cholesky: (optional, default false), choose between Cholesky implementation or one based on the matrix inversion Lemma ols: (optional, default false), perform an orthogonal projection before returning the solution. @@ -411,7 +408,7 @@ def lasso(X, D=None, Q=None, q=None, return_reg_path=False, L=-1, lambda1=None, Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) Examples: @@ -475,7 +472,7 @@ def lassoMask(X, D, B, L=-1, lambda1=None, lambda2=0., 1) when mode=0 min_{alpha} ||diag(beta)(x-Dalpha)||_2^2 s.t. ||alpha||_1 <= lambda1 2) when mode=1 - min_{alpha} ||alpha||_1 s.t. ||diag(beta)(x-Dalpha)||_2^2 + min_{alpha} ||alpha||_1 s.t. ||diag(beta)(x-Dalpha)||_2^2 <= lambda1*||beta||_0/m 3) when mode=2 min_{alpha} 0.5||diag(beta)(x-Dalpha)||_2^2 + @@ -496,7 +493,7 @@ def lassoMask(X, D, B, L=-1, lambda1=None, lambda2=0., Kwargs: lambda1: (parameter) - L: (optional, maximum number of elements of each + L: (optional, maximum number of elements of each decomposition) pos: (optional, adds positivity constraints on the coefficients, false by default) @@ -517,7 +514,7 @@ def lassoMask(X, D, B, L=-1, lambda1=None, lambda2=0., Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) """ @@ -538,7 +535,7 @@ def lassoWeighted(X, D, W, L=-1, lambda1=None, """ lassoWeighted is an efficient implementation of the LARS algorithm for solving the weighted Lasso. It is optimized - for solving a large number of small or medium-sized + for solving a large number of small or medium-sized decomposition problem (and not for a single large one). It first computes the Gram matrix D'D and then perform @@ -546,14 +543,14 @@ def lassoWeighted(X, D, W, L=-1, lambda1=None, For all columns x of X, and w of W, it computes one column alpha of A which is the solution of 1) when mode=0 - min_{alpha} ||x-Dalpha||_2^2 s.t. + min_{alpha} ||x-Dalpha||_2^2 s.t. ||diag(w)alpha||_1 <= lambda1 2) when mode=1 min_{alpha} ||diag(w)alpha||_1 s.t. ||x-Dalpha||_2^2 <= lambda1 3) when mode=2 - min_{alpha} 0.5||x-Dalpha||_2^2 + - lambda1||diag(w)alpha||_1 + min_{alpha} 0.5||x-Dalpha||_2^2 + + lambda1||diag(w)alpha||_1 Possibly, when pos=true, it solves the previous problems with positivity constraints on the vectors alpha @@ -568,7 +565,7 @@ def lassoWeighted(X, D, W, L=-1, lambda1=None, Kwargs: lambda1: (parameter) - L: (optional, maximum number of elements of each + L: (optional, maximum number of elements of each decomposition) pos: (optional, adds positivity constraints on the coefficients, false by default) @@ -587,7 +584,7 @@ def lassoWeighted(X, D, W, L=-1, lambda1=None, Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) """ @@ -607,7 +604,7 @@ def omp(X, D, L=None, eps=None, lambda1=None, return_reg_path=False, numThreads= """ omp is an efficient implementation of the Orthogonal Matching Pursuit algorithm. It is optimized - for solving a large number of small or medium-sized + for solving a large number of small or medium-sized decomposition problem (and not for a single large one). It first computes the Gram matrix D'D and then perform @@ -615,12 +612,12 @@ def omp(X, D, L=None, eps=None, lambda1=None, return_reg_path=False, numThreads= X=[x^1,...,x^n] is a matrix of signals, and it returns a matrix A=[alpha^1,...,alpha^n] of coefficients. - it addresses for all columns x of X, + it addresses for all columns x of X, min_{alpha} ||alpha||_0 s.t ||x-Dalpha||_2^2 <= eps or min_{alpha} ||x-Dalpha||_2^2 s.t. ||alpha||_0 <= L or - min_{alpha} 0.5||x-Dalpha||_2^2 + lambda1||alpha||_0 + min_{alpha} 0.5||x-Dalpha||_2^2 + lambda1||alpha||_0 Args: X: double m x n matrix (input signals) @@ -629,11 +626,11 @@ def omp(X, D, L=None, eps=None, lambda1=None, return_reg_path=False, numThreads= D: double m x p matrix (dictionary) p is the number of elements in the dictionary All the columns of D should have unit-norm ! - return_reg_path: + return_reg_path: if true the function will return a tuple of matrices. Kwargs: - L: (optional, maximum number of elements in each decomposition, + L: (optional, maximum number of elements in each decomposition, min(m,p) by default) eps: (optional, threshold on the squared l2-norm of the residual, 0 by default @@ -655,11 +652,11 @@ def omp(X, D, L=None, eps=None, lambda1=None, return_reg_path=False, numThreads= Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) - Passing an int32 vector of length n to L provides a different parameter L for each input signal x_i - - Passing a double vector of length n to eps and or lambda1 + - Passing a double vector of length n to eps and or lambda1 provides a different parameter eps (or lambda1) for each input signal x_i """ @@ -705,9 +702,9 @@ def ompMask(X, D, B, L=None, eps=None, lambda1=None, return_reg_path=False, numT a binary mask B - for all columns x of X, and columns beta of B, it computes a column + for all columns x of X, and columns beta of B, it computes a column alpha of A by addressing - min_{alpha} ||alpha||_0 s.t ||diag(beta)*(x-Dalpha)||_2^2 + min_{alpha} ||alpha||_0 s.t ||diag(beta)*(x-Dalpha)||_2^2 <= eps*||beta||_0/m or min_{alpha} ||diag(beta)*(x-Dalpha)||_2^2 s.t. ||alpha||_0 <= L @@ -723,11 +720,11 @@ def ompMask(X, D, B, L=None, eps=None, lambda1=None, return_reg_path=False, numT All the columns of D should have unit-norm ! B: boolean m x n matrix (mask) p is the number of elements in the dictionary - return_reg_path: + return_reg_path: if true the function will return a tuple of matrices. Kwargs: - L: (optional, maximum number of elements in each decomposition, + L: (optional, maximum number of elements in each decomposition, min(m,p) by default) eps: (optional, threshold on the squared l2-norm of the residual, 0 by default @@ -738,7 +735,7 @@ def ompMask(X, D, B, L=None, eps=None, lambda1=None, return_reg_path=False, numT Returns: A: double sparse p x n matrix (output coefficients) - path (optional): double dense p x L matrix + path (optional): double dense p x L matrix (regularization path of the first signal) A = spams.ompMask(X,D,B,L,eps,return_reg_path = False,...) (A,path) = spams.ompMask(X,D,B,L,eps,return_reg_path = True,...) @@ -750,11 +747,11 @@ def ompMask(X, D, B, L=None, eps=None, lambda1=None, return_reg_path=False, numT Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) - Passing an int32 vector of length n to L provides a different parameter L for each input signal x_i - - Passing a double vector of length n to eps and or lambda1 + - Passing a double vector of length n to eps and or lambda1 provides a different parameter eps (or lambda1) for each input signal x_i """ @@ -796,25 +793,25 @@ def ompMask(X, D, B, L=None, eps=None, lambda1=None, return_reg_path=False, numT def cd(X, D, A0, lambda1=None, mode=spams_wrap.PENALTY, itermax=100, tol=0.001, numThreads=-1): """ - cd addresses l1-decomposition problem with a + cd addresses l1-decomposition problem with a coordinate descent type of approach. - It is optimized for solving a large number of small or medium-sized + It is optimized for solving a large number of small or medium-sized decomposition problem (and not for a single large one). It first computes the Gram matrix D'D. - This method is particularly well adapted when there is low - correlation between the dictionary elements and when one can benefit + This method is particularly well adapted when there is low + correlation between the dictionary elements and when one can benefit from a warm restart. It aims at addressing the two following problems for all columns x of X, it computes a column alpha of A such that 2) when mode=1 min_{alpha} ||alpha||_1 s.t. ||x-Dalpha||_2^2 <= lambda1 - For this constraint setting, the method solves a sequence of + For this constraint setting, the method solves a sequence of penalized problems (corresponding to mode=2) and looks for the corresponding Lagrange multplier with a simple but efficient heuristic. 3) when mode=2 - min_{alpha} 0.5||x-Dalpha||_2^2 + lambda1||alpha||_1 + min_{alpha} 0.5||x-Dalpha||_2^2 + lambda1||alpha||_1 Args: X: double m x n matrix (input signals) @@ -844,7 +841,7 @@ def cd(X, D, A0, lambda1=None, mode=spams_wrap.PENALTY, itermax=100, tol=0.001, Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha + - single precision setting (even though the output alpha is double precision) """ @@ -861,7 +858,7 @@ def somp(X, D, list_groups, L=None, eps=0., numThreads=-1): """ somp is an efficient implementation of a Simultaneous Orthogonal Matching Pursuit algorithm. It is optimized - for solving a large number of small or medium-sized + for solving a large number of small or medium-sized decomposition problem (and not for a single large one). It first computes the Gram matrix D'D and then perform @@ -871,7 +868,7 @@ def somp(X, D, list_groups, L=None, eps=0., numThreads=-1): X is a matrix structured in groups of signals, which we denote by X=[X_1,...,X_n] - for all matrices X_i of X, + for all matrices X_i of X, min_{A_i} ||A_i||_{0,infty} s.t ||X_i-D A_i||_2^2 <= eps*n_i where n_i is the number of columns of X_i @@ -882,7 +879,7 @@ def somp(X, D, list_groups, L=None, eps=0., numThreads=-1): Args: X: double m x N matrix (input signals) m is the signal size - N is the total number of signals + N is the total number of signals D: double m x p matrix (dictionary) p is the number of elements in the dictionary All the columns of D should have unit-norm ! @@ -906,7 +903,7 @@ def somp(X, D, list_groups, L=None, eps=0., numThreads=-1): Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) """ @@ -921,8 +918,8 @@ def somp(X, D, list_groups, L=None, eps=0., numThreads=-1): def l1L2BCD(X, D, alpha0, list_groups, lambda1=None, mode=spams_wrap.PENALTY, itermax=100, tol=1e-3, numThreads=-1): """ - l1L2BCD is a solver for a - Simultaneous signal decomposition formulation based on block + l1L2BCD is a solver for a + Simultaneous signal decomposition formulation based on block coordinate descent. @@ -930,8 +927,8 @@ def l1L2BCD(X, D, alpha0, list_groups, lambda1=None, mode=spams_wrap.PENALTY, it by X=[X_1,...,X_n] if mode=2, it solves - for all matrices X_i of X, - min_{A_i} 0.5||X_i-D A_i||_2^2 + lambda1/sqrt(n_i)||A_i||_{1,2} + for all matrices X_i of X, + min_{A_i} 0.5||X_i-D A_i||_2^2 + lambda1/sqrt(n_i)||A_i||_{1,2} where n_i is the number of columns of X_i if mode=1, it solves min_{A_i} ||A_i||_{1,2} s.t. ||X_i-D A_i||_2^2 <= n_i lambda1 @@ -939,7 +936,7 @@ def l1L2BCD(X, D, alpha0, list_groups, lambda1=None, mode=spams_wrap.PENALTY, it Args: X: double m x N matrix (input signals) m is the signal size - N is the total number of signals + N is the total number of signals D: double m x p matrix (dictionary) p is the number of elements in the dictionary alpha0: double dense p x N matrix (initial solution) @@ -965,7 +962,7 @@ def l1L2BCD(X, D, alpha0, list_groups, lambda1=None, mode=spams_wrap.PENALTY, it Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) """ @@ -1005,7 +1002,7 @@ def fistaFlat( w = argmin 0.5||y- X w||_2^2 + lambda1 psi(w) - if loss='square' and regul is a regularization function for matrices - the entries of Y are real-valued, W is a matrix of size p x n. + the entries of Y are real-valued, W is a matrix of size p x n. It computes the matrix W such that W = argmin 0.5||Y- X W||_F^2 + lambda1 psi(W) @@ -1035,7 +1032,7 @@ def fistaFlat( W = argmin (1/m)sum_{j=1}^m log(sum_{j=1}^r e^(x^j'(w^j-w^{y_j}))) + lambda1 psi(W) where ww^j is the j-th column of WW. - - loss='cur' useful to perform sparse CUR matrix decompositions, + - loss='cur' useful to perform sparse CUR matrix decompositions, W = argmin 0.5||Y-X*W*X||_F^2 + lambda1 psi(W) @@ -1046,10 +1043,10 @@ def fistaFlat( Args: Y: double dense m x n matrix - X: double dense or sparse m x p matrix + X: double dense or sparse m x p matrix W0: double dense p x n matrix or p x Nn matrix (for multi-logistic loss) initial guess - return_optim_info: + return_optim_info: if true the function will return a tuple of matrices. Kwargs: @@ -1079,13 +1076,13 @@ def fistaFlat( linesearch_mode: (line-search scheme when ista=true: 0: default, monotonic backtracking scheme 1: monotonic backtracking scheme, with restart at each iteration - 2: Barzilai-Borwein step sizes (similar to SparSA by Wright et al.) + 2: Barzilai-Borwein step sizes (similar to SparSA by Wright et al.) 3: non-monotonic backtracking compute_gram: (optional, pre-compute X^TX, false by default). intercept: (optional, do not regularize last row of W, false by default). ista: (optional, use ista instead of fista, false by default). subgrad: (optional, if not ista, use subradient descent instead of fista, false by default). - a: + a: b: (optional, if subgrad, the gradient step is a/(t+b) also similar options as proximalFlat @@ -1184,11 +1181,11 @@ def fistaTree( Args: Y: double dense m x n matrix - X: double dense or sparse m x p matrix + X: double dense or sparse m x p matrix W0: double dense p x n matrix or p x Nn matrix (for multi-logistic loss) initial guess tree: named list (see documentation of proximalTree) - return_optim_info: + return_optim_info: if true the function will return a tuple of matrices. Kwargs: @@ -1217,7 +1214,7 @@ def fistaTree( intercept: (optional, do not regularize last row of W, false by default). ista: (optional, use ista instead of fista, false by default). subgrad: (optional, if not ista, use subradient descent instead of fista, false by default). - a: + a: b: (optional, if subgrad, the gradient step is a/(t+b) also similar options as proximalTree @@ -1306,11 +1303,11 @@ def fistaGraph( Args: Y: double dense m x n matrix - X: double dense or sparse m x p matrix + X: double dense or sparse m x p matrix W0: double dense p x n matrix or p x Nn matrix (for multi-logistic loss) initial guess graph: struct (see documentation of proximalGraph) - return_optim_info: + return_optim_info: if true the function will return a tuple of matrices. Kwargs: @@ -1336,7 +1333,7 @@ def fistaGraph( intercept: (optional, do not regularize last row of W, false by default). ista: (optional, use ista instead of fista, false by default). subgrad: (optional, if not ista, use subradient descent instead of fista, false by default). - a: + a: b: (optional, if subgrad, the gradient step is a/(t+b) also similar options as proximalTree @@ -1405,10 +1402,10 @@ def proximalFlat(U, return_val_loss=False, numThreads=-1, lambda1=1.0, lambda2=0 pos=False, clever=True, size_group=1, groups=None, transpose=False): """ proximalFlat computes proximal operators. Depending - on the value of regul, it computes + on the value of regul, it computes - Given an input matrix U=[u^1,\ldots,u^n], it computes a matrix + Given an input matrix U=[u^1,\ldots,u^n], it computes a matrix V=[v^1,\ldots,v^n] such that if one chooses a regularization functions on vectors, it computes for each column u of U, a column v of V solving @@ -1429,37 +1426,37 @@ def proximalFlat(U, return_val_loss=False, numThreads=-1, lambda1=1.0, lambda2=0 argmin 0.5||u-v||_2^2 s.t. ||v||_1 <= lambda1 if regul='l2-not-squared' argmin 0.5||u-v||_2^2 + lambda1||v||_2 - if regul='group-lasso-l2' - argmin 0.5||u-v||_2^2 + lambda1 sum_g ||v_g||_2 + if regul='group-lasso-l2' + argmin 0.5||u-v||_2^2 + lambda1 sum_g ||v_g||_2 where the groups are either defined by groups or by size_group, if regul='group-lasso-linf' argmin 0.5||u-v||_2^2 + lambda1 sum_g ||v_g||_inf - if regul='sparse-group-lasso-l2' + if regul='sparse-group-lasso-l2' argmin 0.5||u-v||_2^2 + lambda1 sum_g ||v_g||_2 + lambda1_2 ||v||_1 where the groups are either defined by groups or by size_group, if regul='sparse-group-lasso-linf' argmin 0.5||u-v||_2^2 + lambda1 sum_g ||v_g||_inf + lambda1_2 ||v||_1 - if regul='trace-norm-vec' - argmin 0.5||u-v||_2^2 + lambda1 ||mat(v)||_* + if regul='trace-norm-vec' + argmin 0.5||u-v||_2^2 + lambda1 ||mat(v)||_* where mat(v) has size_group rows if one chooses a regularization function on matrices - if regul='l1l2', V= + if regul='l1l2', V= argmin 0.5||U-V||_F^2 + lambda1||V||_{1/2} - if regul='l1linf', V= + if regul='l1linf', V= argmin 0.5||U-V||_F^2 + lambda1||V||_{1/inf} - if regul='l1l2+l1', V= + if regul='l1l2+l1', V= argmin 0.5||U-V||_F^2 + lambda1||V||_{1/2} + lambda1_2||V||_{1/1} - if regul='l1linf+l1', V= + if regul='l1linf+l1', V= argmin 0.5||U-V||_F^2 + lambda1||V||_{1/inf} + lambda1_2||V||_{1/1} - if regul='l1linf+row-column', V= + if regul='l1linf+row-column', V= argmin 0.5||U-V||_F^2 + lambda1||V||_{1/inf} + lambda1_2||V'||_{1/inf} - if regul='trace-norm', V= + if regul='trace-norm', V= argmin 0.5||U-V||_F^2 + lambda1||V||_* - if regul='rank', V= + if regul='rank', V= argmin 0.5||U-V||_F^2 + lambda1 rank(V) - if regul='none', V= - argmin 0.5||U-V||_F^2 + if regul='none', V= + argmin 0.5||U-V||_F^2 for all these regularizations, it is possible to enforce non-negativity constraints with the option pos, and to prevent the last row of U to be regularized, with @@ -1468,7 +1465,7 @@ def proximalFlat(U, return_val_loss=False, numThreads=-1, lambda1=1.0, lambda2=0 Args: U: double m x n matrix (input signals) m is the signal size - return_val_loss: + return_val_loss: if true the function will return a tuple of matrices. Kwargs: @@ -1543,7 +1540,7 @@ def proximalTree(U, tree, return_val_loss=False, numThreads=-1, lambda1=1.0, lam pos=False, clever=True, size_group=1, transpose=False): """ proximalTree computes a proximal operator. Depending - on the value of regul, it computes + on the value of regul, it computes Given an input matrix U=[u^1,\ldots,u^n], and a tree-structured set of groups T, @@ -1554,10 +1551,10 @@ def proximalTree(U, tree, return_val_loss=False, numThreads=-1, lambda1=1.0, lam if regul='tree-l0' argmin 0.5||u-v||_2^2 + lambda1 \sum_{g \in T} \delta^g(v) if regul='tree-l2' - for all i, v^i = + for all i, v^i = argmin 0.5||u-v||_2^2 + lambda1\sum_{g \in T} \eta_g||v_g||_2 if regul='tree-linf' - for all i, v^i = + for all i, v^i = argmin 0.5||u-v||_2^2 + lambda1\sum_{g \in T} \eta_g||v_g||_inf when the regularization function is for matrices: @@ -1574,24 +1571,24 @@ def proximalTree(U, tree, return_val_loss=False, numThreads=-1, lambda1=1.0, lam Args: U: double m x n matrix (input signals) m is the signal size - tree: named list + tree: named list with four fields, eta_g, groups, own_variables and N_own_variables. The tree structure requires a particular organization of groups and variables * Let us denote by N = |T|, the number of groups. the groups should be ordered T={g1,g2,\ldots,gN} such that if gi is included - in gj, then j <= i. g1 should be the group at the root of the tree + in gj, then j <= i. g1 should be the group at the root of the tree and contains every variable. - * Every group is a set of contiguous indices for instance + * Every group is a set of contiguous indices for instance gi={3,4,5} or gi={4,5,6,7} or gi={4}, but not {3,5}; * We define root(gi) as the indices of the variables that are in gi, but not in its descendants. For instance for - T={ g1={1,2,3,4},g2={2,3},g3={4} }, then, root(g1)={1}, + T={ g1={1,2,3,4},g2={2,3},g3={4} }, then, root(g1)={1}, root(g2)={2,3}, root(g3)={4}, We assume that for all i, root(gi) is a set of contigous variables * We assume that the smallest of root(gi) is also the smallest index of gi. - For instance, + For instance, T={ g1={1,2,3,4},g2={2,3},g3={4} }, is a valid set of groups. but we can not have T={ g1={1,2,3,4},g2={1,2},g3={3} }, since root(g1)={4} and 4 is not the @@ -1603,25 +1600,25 @@ def proximalTree(U, tree, return_val_loss=False, numThreads=-1, lambda1=1.0, lam see more examples in test_ProximalTree.m of valid tree-structured sets of groups. The first fields sets the weights for every group - tree['eta_g'] double N vector + tree['eta_g'] double N vector - The next field sets inclusion relations between groups + The next field sets inclusion relations between groups (but not between groups and variables): - tree['groups'] sparse (double or boolean) N x N matrix - the (i,j) entry is non-zero if and only if i is different than j and + tree['groups'] sparse (double or boolean) N x N matrix + the (i,j) entry is non-zero if and only if i is different than j and gi is included in gj. the first column corresponds to the group at the root of the tree. - The next field define the smallest index of each group gi, + The next field define the smallest index of each group gi, which is also the smallest index of root(gi) tree['own_variables'] int32 N vector The next field define for each group gi, the size of root(gi) - tree['N_own_variables'] int32 N vector + tree['N_own_variables'] int32 N vector examples are given in test_ProximalTree.m - return_val_loss: + return_val_loss: if true the function will return a tuple of matrices. Kwargs: @@ -1696,7 +1693,7 @@ def proximalGraph(U, graph, return_val_loss=False, numThreads=-1, lambda1=1.0, l pos=False, clever=True, eval=None, size_group=1, transpose=False): """ proximalGraph computes a proximal operator. Depending - on the value of regul, it computes + on the value of regul, it computes Given an input matrix U=[u^1,\ldots,u^n], and a set of groups G, @@ -1728,22 +1725,22 @@ def proximalGraph(U, graph, return_val_loss=False, numThreads=-1, lambda1=1.0, l with three fields, eta_g, groups, and groups_var The first fields sets the weights for every group - graph.eta_g double N vector + graph.eta_g double N vector - The next field sets inclusion relations between groups + The next field sets inclusion relations between groups (but not between groups and variables): - graph.groups sparse (double or boolean) N x N matrix - the (i,j) entry is non-zero if and only if i is different than j and + graph.groups sparse (double or boolean) N x N matrix + the (i,j) entry is non-zero if and only if i is different than j and gi is included in gj. The next field sets inclusion relations between groups and variables graph.groups_var sparse (double or boolean) p x N matrix - the (i,j) entry is non-zero if and only if the variable i is included + the (i,j) entry is non-zero if and only if the variable i is included in gj, but not in any children of gj. examples are given in test_ProximalGraph.m - return_val_loss: + return_val_loss: if true the function will return a tuple of matrices. Kwargs: @@ -1900,12 +1897,12 @@ def trainDL( by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro arXiv:0908.0050 - "Online Dictionary Learning for Sparse Coding" + "Online Dictionary Learning for Sparse Coding" by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro ICML 2009. Note that if you use mode=1 or 2, if the training set has a - reasonable size and you have enough memory on your computer, you + reasonable size and you have enough memory on your computer, you should use trainDL_Memory instead. @@ -1917,29 +1914,29 @@ def trainDL( min_{D in C} (1/n) sum_{i=1}^n ||alpha_i||_1 s.t. ... ||x_i-Dalpha_i||_2^2 <= lambda1 3) if mode=2 - min_{D in C} (1/n) sum_{i=1}^n (1/2)||x_i-Dalpha_i||_2^2 + ... + min_{D in C} (1/n) sum_{i=1}^n (1/2)||x_i-Dalpha_i||_2^2 + ... lambda1||alpha_i||_1 + lambda1_2||alpha_i||_2^2 4) if mode=3, the sparse coding is done with OMP - min_{D in C} (1/n) sum_{i=1}^n (1/2)||x_i-Dalpha_i||_2^2 s.t. ... + min_{D in C} (1/n) sum_{i=1}^n (1/2)||x_i-Dalpha_i||_2^2 s.t. ... ||alpha_i||_0 <= lambda1 5) if mode=4, the sparse coding is done with OMP min_{D in C} (1/n) sum_{i=1}^n ||alpha_i||_0 s.t. ... ||x_i-Dalpha_i||_2^2 <= lambda1 6) if mode=5, the sparse coding is done with OMP - min_{D in C} (1/n) sum_{i=1}^n 0.5||x_i-Dalpha_i||_2^2 +lambda1||alpha_i||_0 + min_{D in C} (1/n) sum_{i=1}^n 0.5||x_i-Dalpha_i||_2^2 +lambda1||alpha_i||_0 C is a convex set verifying 1) if modeD=0 C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 <= 1 } 2) if modeD=1 - C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... gamma1||d_j||_1 <= 1 } 3) if modeD=2 - C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... gamma1||d_j||_1 + gamma2 FL(d_j) <= 1 } 4) if modeD=3 - C={ D in Real^{m x p} s.t. forall j, (1-gamma1)||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, (1-gamma1)||d_j||_2^2 + ... gamma1||d_j||_1 <= 1 } Potentially, n can be very large with this algorithm. @@ -1948,7 +1945,7 @@ def trainDL( X: double m x n matrix (input signals) m is the signal size n is the number of signals to decompose - return_model: + return_model: if true the function will return the model as a named list ('A' = A, 'B' = B, 'iter' = n) model: None or model (as A,B,iter) to use as initialisation @@ -1956,40 +1953,40 @@ def trainDL( Kwargs: D: (optional) double m x p matrix (dictionary) p is the number of elements in the dictionary - When D is not provided, the dictionary is initialized + When D is not provided, the dictionary is initialized with random elements from the training set. K: (size of the dictionary, optional is D is provided) lambda1: (parameter) lambda2: (optional, by default 0) - iter: (number of iterations). If a negative number is + iter: (number of iterations). If a negative number is provided it will perform the computation during the corresponding number of seconds. For instance iter=-5 learns the dictionary during 5 seconds. - mode: (optional, see above, by default 2) + mode: (optional, see above, by default 2) posAlpha: (optional, adds positivity constraints on the - coefficients, false by default, not compatible with + coefficients, false by default, not compatible with mode =3,4) modeD: (optional, see above, by default 0) - posD: (optional, adds positivity constraints on the - dictionary, false by default, not compatible with + posD: (optional, adds positivity constraints on the + dictionary, false by default, not compatible with modeD=2) gamma1: (optional parameter for modeD >= 1) gamma2: (optional parameter for modeD = 2) - batchsize: (optional, size of the minibatch, by default + batchsize: (optional, size of the minibatch, by default 512) iter_updateD: (optional, number of BCD iterations for the dictionary update step, by default 1) modeParam: (optimization mode). - 1) if modeParam=0, the optimization uses the + 1) if modeParam=0, the optimization uses the parameter free strategy of the ICML paper - 2) if modeParam=1, the optimization uses the + 2) if modeParam=1, the optimization uses the parameters rho as in arXiv:0908.0050 - 3) if modeParam=2, the optimization uses exponential - decay weights with updates of the form + 3) if modeParam=2, the optimization uses exponential + decay weights with updates of the form A_{t} <- rho A_{t-1} + alpha_t alpha_t^T rho: (optional) tuning parameter (see paper arXiv:0908.0050) t0: (optional) tuning parameter (see paper arXiv:0908.0050) - clean: (optional, true by default. prunes + clean: (optional, true by default. prunes automatically the dictionary from unused elements). verbose: (optional, true by default, increase verbosity) numThreads: (optional, number of threads for exploiting @@ -2015,7 +2012,7 @@ def trainDL( Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting + - single precision setting """ @@ -2046,7 +2043,7 @@ def structTrainDL( by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro arXiv:0908.0050 - "Online Dictionary Learning for Sparse Coding" + "Online Dictionary Learning for Sparse Coding" by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro ICML 2009. @@ -2061,13 +2058,13 @@ def structTrainDL( 1) if modeD=0 C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 <= 1 } 2) if modeD=1 - C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... gamma1||d_j||_1 <= 1 } 3) if modeD=2 - C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... gamma1||d_j||_1 + gamma2 FL(d_j) <= 1 } 4) if modeD=3 - C={ D in Real^{m x p} s.t. forall j, (1-gamma1)||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, (1-gamma1)||d_j||_2^2 + ... gamma1||d_j||_1 <= 1 } Potentially, n can be very large with this algorithm. @@ -2076,7 +2073,7 @@ def structTrainDL( X: double m x n matrix (input signals) m is the signal size n is the number of signals to decompose - return_model: + return_model: if true the function will return the model as a named list ('A' = A, 'B' = B, 'iter' = n) model: None or model (as A,B,iter) to use as initialisation @@ -2084,19 +2081,19 @@ def structTrainDL( Kwargs: D: (optional) double m x p matrix (dictionary) p is the number of elements in the dictionary - When D is not provided, the dictionary is initialized + When D is not provided, the dictionary is initialized with random elements from the training set. K: (size of the dictionary, optional is D is provided) lambda1: (parameter) lambda2: (optional, by default 0) lambda3: (optional, regularization parameter, 0 by default) - iter: (number of iterations). If a negative number is + iter: (number of iterations). If a negative number is provided it will perform the computation during the corresponding number of seconds. For instance iter=-5 learns the dictionary during 5 seconds. regul: choice of regularization : one of 'l0' 'l1' 'l2' 'linf' 'none' 'elastic-net' 'fused-lasso' - 'graph' 'graph-ridge' 'graph-l2' 'tree-l0' 'tree-l2' 'tree-linf' + 'graph' 'graph-ridge' 'graph-l2' 'tree-l0' 'tree-l2' 'tree-linf' tree: struct (see documentation of proximalTree); needed for regul of graph kind. graph: struct (see documentation of proximalGraph); @@ -2104,29 +2101,29 @@ def structTrainDL( posAlpha: (optional, adds positivity constraints on the coefficients, false by default. modeD: (optional, see above, by default 0) - posD: (optional, adds positivity constraints on the - dictionary, false by default, not compatible with + posD: (optional, adds positivity constraints on the + dictionary, false by default, not compatible with modeD=2) gamma1: (optional parameter for modeD >= 1) gamma2: (optional parameter for modeD = 2) - batchsize: (optional, size of the minibatch, by default + batchsize: (optional, size of the minibatch, by default 512) iter_updateD: (optional, number of BCD iterations for the dictionary update step, by default 1) modeParam: (optimization mode). - 1) if modeParam=0, the optimization uses the + 1) if modeParam=0, the optimization uses the parameter free strategy of the ICML paper - 2) if modeParam=1, the optimization uses the + 2) if modeParam=1, the optimization uses the parameters rho as in arXiv:0908.0050 - 3) if modeParam=2, the optimization uses exponential - decay weights with updates of the form + 3) if modeParam=2, the optimization uses exponential + decay weights with updates of the form A_{t} <- rho A_{t-1} + alpha_t alpha_t^T ista: (optional, use ista instead of fista, false by default). tol: (optional, tolerance for stopping criteration, which is a relative duality gap fixed_step: (deactive the line search for L in fista and use K instead) rho: (optional) tuning parameter (see paper arXiv:0908.0050) t0: (optional) tuning parameter (see paper arXiv:0908.0050) - clean: (optional, true by default. prunes + clean: (optional, true by default. prunes automatically the dictionary from unused elements). verbose: (optional, true by default, increase verbosity) numThreads: (optional, number of threads for exploiting @@ -2152,7 +2149,7 @@ def structTrainDL( Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting + - single precision setting """ @@ -2170,7 +2167,7 @@ def trainDL_Memory(X, D=None, numThreads=-1, batchsize=-1, K=-1, lambda1=None, iter=-1, t0=1e-5, mode=spams_wrap.PENALTY, posD=False, expand=False, modeD=spams_wrap.L2, whiten=False, clean=True, gamma1=0., gamma2=0., rho=1.0, iter_updateD=1, stochastic_deprecated=False, modeParam=0, batch=False, log_deprecated=False, logName=''): """ - trainDL_Memory is an efficient but memory consuming + trainDL_Memory is an efficient but memory consuming variant of the dictionary learning technique presented in @@ -2178,11 +2175,11 @@ def trainDL_Memory(X, D=None, numThreads=-1, batchsize=-1, by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro arXiv:0908.0050 - "Online Dictionary Learning for Sparse Coding" + "Online Dictionary Learning for Sparse Coding" by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro ICML 2009. - Contrary to the approaches above, the algorithm here + Contrary to the approaches above, the algorithm here does require to store all the coefficients from all the training signals. For this reason this variant can not be used with large training sets, but is more efficient than the regular online @@ -2193,17 +2190,17 @@ def trainDL_Memory(X, D=None, numThreads=-1, batchsize=-1, min_{D in C} (1/n) sum_{i=1}^n ||alpha_i||_1 s.t. ... ||x_i-Dalpha_i||_2^2 <= lambda1 2) if mode=2 - min_{D in C} (1/n) sum_{i=1}^n (1/2)||x_i-Dalpha_i||_2^2 + ... - lambda1||alpha_i||_1 + min_{D in C} (1/n) sum_{i=1}^n (1/2)||x_i-Dalpha_i||_2^2 + ... + lambda1||alpha_i||_1 C is a convex set verifying 1) if modeD=0 C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 <= 1 } 1) if modeD=1 - C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... gamma1||d_j||_1 <= 1 } 1) if modeD=2 - C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... + C={ D in Real^{m x p} s.t. forall j, ||d_j||_2^2 + ... gamma1||d_j||_1 + gamma2 FL(d_j) <= 1 } Potentially, n can be very large with this algorithm. @@ -2216,36 +2213,36 @@ def trainDL_Memory(X, D=None, numThreads=-1, batchsize=-1, Kwargs: D: (optional) double m x p matrix (dictionary) p is the number of elements in the dictionary - When D is not provided, the dictionary is initialized + When D is not provided, the dictionary is initialized with random elements from the training set. K: (size of the dictionary, optional is D is provided) lambda1: (parameter) - iter: (number of iterations). If a negative number is + iter: (number of iterations). If a negative number is provided it will perform the computation during the corresponding number of seconds. For instance iter=-5 learns the dictionary during 5 seconds. - mode: (optional, see above, by default 2) + mode: (optional, see above, by default 2) modeD: (optional, see above, by default 0) - posD: (optional, adds positivity constraints on the - dictionary, false by default, not compatible with + posD: (optional, adds positivity constraints on the + dictionary, false by default, not compatible with modeD=2) gamma1: (optional parameter for modeD >= 1) gamma2: (optional parameter for modeD = 2) - batchsize: (optional, size of the minibatch, by default + batchsize: (optional, size of the minibatch, by default 512) - iter_updateD: (optional, number of BCD iterations for the dictionary + iter_updateD: (optional, number of BCD iterations for the dictionary update step, by default 1) modeParam: (optimization mode). - 1) if modeParam=0, the optimization uses the + 1) if modeParam=0, the optimization uses the parameter free strategy of the ICML paper - 2) if modeParam=1, the optimization uses the + 2) if modeParam=1, the optimization uses the parameters rho as in arXiv:0908.0050 - 3) if modeParam=2, the optimization uses exponential - decay weights with updates of the form + 3) if modeParam=2, the optimization uses exponential + decay weights with updates of the form A_{t} <- rho A_{t-1} + alpha_t alpha_t^T rho: (optional) tuning parameter (see paper arXiv:0908.0050) t0: (optional) tuning parameter (see paper arXiv:0908.0050) - clean: (optional, true by default. prunes + clean: (optional, true by default. prunes automatically the dictionary from unused elements). numThreads: (optional, number of threads for exploiting multi-core / multi-cpus. By default, it takes the value -1, @@ -2269,7 +2266,7 @@ def trainDL_Memory(X, D=None, numThreads=-1, batchsize=-1, Note: this function admits a few experimental usages, which have not been extensively tested: - - single precision setting (even though the output alpha is double + - single precision setting (even though the output alpha is double precision) """ @@ -2287,14 +2284,14 @@ def nmf(X, return_lasso=False, model=None, iter=-1, t0=1e-5, clean=True, rho=1.0, modeParam=0, batch=False): """ trainDL is an efficient implementation of the - non-negative matrix factorization technique presented in + non-negative matrix factorization technique presented in "Online Learning for Matrix Factorization and Sparse Coding" by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro arXiv:0908.0050 - "Online Dictionary Learning for Sparse Coding" + "Online Dictionary Learning for Sparse Coding" by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro ICML 2009. @@ -2304,32 +2301,32 @@ def nmf(X, return_lasso=False, model=None, X: double m x n matrix (input signals) m is the signal size n is the number of signals to decompose - return_lasso: + return_lasso: if true the function will return a tuple of matrices. Kwargs: K: (number of required factors) - iter: (number of iterations). If a negative number + iter: (number of iterations). If a negative number is provided it will perform the computation during the corresponding number of seconds. For instance iter=-5 learns the dictionary during 5 seconds. - batchsize: (optional, size of the minibatch, by default + batchsize: (optional, size of the minibatch, by default 512) modeParam: (optimization mode). - 1) if modeParam=0, the optimization uses the + 1) if modeParam=0, the optimization uses the parameter free strategy of the ICML paper - 2) if modeParam=1, the optimization uses the + 2) if modeParam=1, the optimization uses the parameters rho as in arXiv:0908.0050 - 3) if modeParam=2, the optimization uses exponential - decay weights with updates of the form + 3) if modeParam=2, the optimization uses exponential + decay weights with updates of the form A_{t} <- rho A_{t-1} + alpha_t alpha_t^T - rho: (optional) tuning parameter (see paper + rho: (optional) tuning parameter (see paper arXiv:0908.0050) - t0: (optional) tuning parameter (see paper + t0: (optional) tuning parameter (see paper arXiv:0908.0050) - clean: (optional, true by default. prunes automatically + clean: (optional, true by default. prunes automatically the dictionary from unused elements). - batch: (optional, false by default, use batch learning + batch: (optional, false by default, use batch learning instead of online learning) numThreads: (optional, number of threads for exploiting multi-core / multi-cpus. By default, it takes the value -1, @@ -2337,9 +2334,9 @@ def nmf(X, return_lasso=False, model=None, model: struct (optional) learned model for "retraining" the data. Returns: - U: double m x p matrix + U: double m x p matrix V: double p x n matrix (optional) - model: struct (optional) learned model to be used for + model: struct (optional) learned model to be used for "retraining" the data. U = spams.nmf(X,return_lasso = False,...) (U,V) = spams.nmf(X,return_lasso = True,...) @@ -2372,14 +2369,14 @@ def nnsc(X, return_lasso=False, model=None, lambda1=None, iter=-1, t0=1e-5, clean=True, rho=1.0, modeParam=0, batch=False): """ trainDL is an efficient implementation of the - non-negative sparse coding technique presented in + non-negative sparse coding technique presented in "Online Learning for Matrix Factorization and Sparse Coding" by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro arXiv:0908.0050 - "Online Dictionary Learning for Sparse Coding" + "Online Dictionary Learning for Sparse Coding" by Julien Mairal, Francis Bach, Jean Ponce and Guillermo Sapiro ICML 2009. @@ -2389,33 +2386,33 @@ def nnsc(X, return_lasso=False, model=None, lambda1=None, X: double m x n matrix (input signals) m is the signal size n is the number of signals to decompose - return_lasso: + return_lasso: if true the function will return a tuple of matrices. Kwargs: K: (number of required factors) lambda1: (parameter) - iter: (number of iterations). If a negative number + iter: (number of iterations). If a negative number is provided it will perform the computation during the corresponding number of seconds. For instance iter=-5 learns the dictionary during 5 seconds. - batchsize: (optional, size of the minibatch, by default + batchsize: (optional, size of the minibatch, by default 512) modeParam: (optimization mode). - 1) if modeParam=0, the optimization uses the + 1) if modeParam=0, the optimization uses the parameter free strategy of the ICML paper - 2) if modeParam=1, the optimization uses the + 2) if modeParam=1, the optimization uses the parameters rho as in arXiv:0908.0050 - 3) if modeParam=2, the optimization uses exponential - decay weights with updates of the form + 3) if modeParam=2, the optimization uses exponential + decay weights with updates of the form A_{t} <- rho A_{t-1} + alpha_t alpha_t^T rho: (optional) tuning parameter (see paper arXiv:0908.0050) - t0: (optional) tuning parameter (see paper + t0: (optional) tuning parameter (see paper arXiv:0908.0050) - clean: (optional, true by default. prunes automatically + clean: (optional, true by default. prunes automatically the dictionary from unused elements). - batch: (optional, false by default, use batch learning + batch: (optional, false by default, use batch learning instead of online learning) numThreads: (optional, number of threads for exploiting multi-core / multi-cpus. By default, it takes the value -1, @@ -2423,9 +2420,9 @@ def nnsc(X, return_lasso=False, model=None, lambda1=None, model: struct (optional) learned model for "retraining" the data. Returns: - U: double m x p matrix + U: double m x p matrix V: double p x n matrix (optional) - model: struct (optional) learned model to be used for + model: struct (optional) learned model to be used for "retraining" the data. U = spams.nnsc(X,return_lasso = False,...) (U,V) = spams.nnsc(X,return_lasso = True,...) @@ -2554,8 +2551,8 @@ def displayPatches(D): tmp = np.zeros(((sizeEdge+1)*nBins+1, (sizeEdge+1)*nBins+1, V), order='F') # patch = np.zeros(sizeEdge,sizeEdge) mm = sizeEdge * sizeEdge - for ii in six.moves.xrange(0, nBins): - for jj in six.moves.xrange(0, nBins): + for ii in range(0, nBins): + for jj in range(0, nBins): io = ii jo = jj offsetx = 0 @@ -2612,7 +2609,7 @@ def readGroupStruct(file): but '[' and '] must be present. Numbers in the range (0 - Nv-1) children-list : a space separated list of node-id's If the list is empty, '->' may be omitted. - The data must obey some rules : + The data must obey some rules : - A group contains the variables of the corresponding node and of the whole subtree. - Variables attached to a node are those that are not int the subtree. - If the data destination is a Graph, there may be several independant trees, @@ -2654,7 +2651,7 @@ def groupStructOfString(s): but '[' and '] must be present. Numbers in the range (0 - Nv-1) children-list : a space separated list of node-id's If the list is empty, '->' may be omitted. - The data must obey some rules : + The data must obey some rules : - A group contains the variables of the corresponding node and of the whole subtree. - Variables attached to a node are those that are not int the subtree. - If the data destination is a Graph, there may be several independant trees, diff --git a/tests/test_decomp.py b/tests/test_decomp.py index eb6125a0c0804fc0b068cfa5e21c55f177d8ecc8..8c2c182d171c3b1f42912b719b009e2c28a715b2 100644 --- a/tests/test_decomp.py +++ b/tests/test_decomp.py @@ -1,9 +1,4 @@ -from __future__ import absolute_import, division, print_function -import six.moves - -import sys import numpy as np -import scipy import scipy.sparse as ssp import spams @@ -119,7 +114,7 @@ def test_l1L2BCD(): D = np.asfortranarray( D / np.tile(np.sqrt((D*D).sum(axis=0)), (D.shape[0], 1)), dtype=myfloat) # indices of the first signals in each group - ind_groups = np.array(six.moves.xrange(0, X.shape[1], 10), dtype=np.int32) + ind_groups = np.array(range(0, X.shape[1], 10), dtype=np.int32) # parameters of the optimization procedure are chosen itermax = 100 tol = 1e-3 @@ -293,7 +288,7 @@ def test_somp(): D = np.asfortranarray(np.random.normal(size=(64, 200))) D = np.asfortranarray( D / np.tile(np.sqrt((D*D).sum(axis=0)), (D.shape[0], 1)), dtype=myfloat) - ind_groups = np.array(six.moves.xrange(0, 10000, 10), dtype=np.int32) + ind_groups = np.array(range(0, 10000, 10), dtype=np.int32) tic = time.time() alpha = spams.somp(X, D, ind_groups, L=10, eps=0.1, numThreads=-1) tac = time.time() diff --git a/tests/test_dictLearn.py b/tests/test_dictLearn.py index c6ea44a460f9646ad4da8d02c427e64cd8744374..49096ce62bff90f9c2200b2a0d8c8edfa9940985 100644 --- a/tests/test_dictLearn.py +++ b/tests/test_dictLearn.py @@ -1,9 +1,5 @@ -from __future__ import absolute_import, division, print_function - -import sys import os import numpy as np -import scipy import scipy.sparse as ssp from PIL import Image @@ -11,23 +7,16 @@ import spams import time from test_utils import * -if not ('rand' in ssp.__dict__): - import myscipy_rand - ssprand = myscipy_rand.rand -else: - ssprand = ssp.rand - - def get_img_file_path(img): """Return path to an image file - + Arguments: - img (string): image filename without path among 'boat.png' or + img (string): image filename without path among 'boat.png' or 'lena.png'. - + Output: img_file (string): normalized path to image input filename. - + """ # check input if not img in ["boat.png", "lena.png"]: diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 0bd2bef52364fa7f441a5093ffd706c4118cf84c..af7893efd7378ef3fa86daa2838e5b3f8527bb29 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,20 +1,10 @@ -from __future__ import absolute_import, division, print_function -import six.moves - -import sys import numpy as np -import scipy import scipy.sparse as ssp import spams import time from test_utils import * -if not ('rand' in ssp.__dict__): - import myscipy_rand - ssprand = myscipy_rand.rand -else: - ssprand = ssp.rand - +ssprand = ssp.rand def test_sort(): n = 2000000 @@ -81,7 +71,7 @@ def test_conjGrad(): itermax = int(0.5 * len(b)) tic = time.time() - for i in six.moves.xrange(0, 20): + for i in range(0, 20): y1 = np.linalg.solve(A, b) tac = time.time() print(" Time (numpy): ", tac - tic) @@ -89,7 +79,7 @@ def test_conjGrad(): print("Mean error on b : %f" % (x1.sum() / b.shape[0])) tic = time.time() - for i in six.moves.xrange(0, 20): + for i in range(0, 20): y2 = spams.conjGrad(A, b, x0, tol, itermax) # * y2 = spams.conjGrad(A,b) tac = time.time() diff --git a/tests/test_spams.py b/tests/test_spams.py index 6e1a9c0429414beec46b8b369b27e0d2cbfbf0a3..88658709b617e85c38ca14924420e6471d59d8ab 100644 --- a/tests/test_spams.py +++ b/tests/test_spams.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, print_function -import six.moves - import sys import time import test_utils @@ -68,7 +65,7 @@ def main(argv): print("**** %s ****" % testname) # exec('lstm = test_%s.tests' %testname) lstm = locals()['test_%s' % testname].tests - for i in six.moves.xrange(0, len(lstm), 2): + for i in range(0, len(lstm), 2): run_test(lstm[i], lstm[i+1]) continue else: @@ -76,7 +73,7 @@ def main(argv): for m in modules: # exec('lstm = test_%s.tests' %m) lstm = locals()['test_%s' % m].tests - for i in six.moves.xrange(0, len(lstm), 2): + for i in range(0, len(lstm), 2): if (lstm[i] == testname): found = True run_test(lstm[i], lstm[i+1])