Commit 5b8db4e2 authored by MAIRAL Julien's avatar MAIRAL Julien

release of the code

parent 01b744a9
path_mkl='/scratch2/clear/mairal/intel/compilers_and_libraries/linux/mkl/lib/intel64/';
include_mkl='/scratch2/clear/mairal/intel/mkl/include/';
pathlibiomp='/scratch2/clear/mairal/intel/compilers_and_libraries_2017/linux/lib/intel64/';
path_icc='/scratch2/clear/mairal/intel/compilers_and_libraries/linux/';
path_cuda='/scratch2/clear/mairal/cuda-9.0/';
path_matlab='/softs/stow/matlab-2016a/bin/';
path_libstd='/usr/lib/gcc/x86_64-linux-gnu/5/';
debug=false;
cuda8=false; % for cuda 8 and above
%%%%%% list of mex files %%%%
names_mklst={'mex_svm_miso','mex_permutation','mex_create_dataset','mex_centering','mex_normalize','mex_encode_cpu'};
names_mklmt={'mex_exp','mex_eig','mex_kmeans'};
names_mklmt={}
names_mklst={}
names_cuda={'mex_encode_cudnn','mex_train_ckn_cudnn'};
%%%%%% various flags %%%%%
format compact;
compiler_icc=[path_icc '/bin/intel64/icpc'];
lib_mkl_sequential=sprintf('-Wl,--start-group %slibmkl_intel_ilp64.a %slibmkl_sequential.a %slibmkl_core.a -Wl,--end-group',path_mkl,path_mkl,path_mkl);
lib_mkl_mt=sprintf('-Wl,--start-group %slibmkl_intel_ilp64.a %slibmkl_intel_thread.a %slibmkl_core.a -Wl,--end-group -L%s -liomp5 -ldl',path_mkl,path_mkl,path_mkl,pathlibiomp);
lib_openmp='-liomp5';
defines='-DTIMINGS -DNDEBUG -DHAVE_MKL -DINT_64BITS -DAXPBY';
defines='-DNDEBUG -DHAVE_MKL -DINT_64BITS -DAXPBY';
if cuda8
defines=[defines ' -DCUDA_8'];
end
flags='-O3 -fopenmp -static-intel -fno-alias -align -falign-functions';
lflags='';
includes=sprintf('-I./utils/ -I%s',include_mkl);
fid=fopen('run_matlab.sh','w+');
fprintf(fid,'#!/bin/sh\n');
fprintf(fid,sprintf('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:%s:%s:%s\n',[path_icc 'lib/intel64/'],path_mkl,[path_cuda 'lib64/']));
fprintf(fid,sprintf('export LIB_INTEL=%s\n',[path_icc 'lib/intel64/']));
fprintf(fid,'export KMP_AFFINITY=verbose,granularity=fine,compact,1,0\n');fprintf(fid,'export LD_PRELOAD=$LIB_INTEL/libimf.so:$LIB_INTEL/libintlc.so.5:$LIB_INTEL/libiomp5.so:$LIB_INTEL/libsvml.so:%s/libstdc++.so\n',path_libstd);
fprintf(fid,[path_matlab 'matlab -nodisplay -singleCompThread -r \"addpath(''./mex/''); "\n']);
fclose(fid);
!chmod +x run_matlab.sh
for ii=1:length(names_mklmt)
name=names_mklmt{ii};
name
str=sprintf(' -v -largeArrayDims CXX="%s" DEFINES="\\$DEFINES %s" CXXFLAGS="\\$CXXFLAGS %s" INCLUDE="\\$INCLUDE %s" LDFLAGS="\\$LDFLAGS " LINKLIBS="\\$LINKLIBS -L"%s" %s %s" mex/%s.cpp -output mex/%s.mexa64',compiler_icc,defines,flags,includes,path_mkl,lib_mkl_mt,lib_openmp,name,name);
args = regexp(str, '\s+', 'split');
args = args(find(~cellfun(@isempty, args)));
mex(args{:});
end
for ii=1:length(names_mklst)
name=names_mklst{ii};
name
str=sprintf(' -v -largeArrayDims CXX="%s" DEFINES="\\$DEFINES %s" CXXFLAGS="\\$CXXFLAGS %s" LDFLAGS="\\$LDFLAGS " INCLUDE="\\$INCLUDE %s" LINKLIBS="\\$LINKLIBS -L"%s" %s %s" mex/%s.cpp -output mex/%s.mexa64',compiler_icc,defines,flags,includes,path_mkl,lib_mkl_mt,lib_openmp,name,name);
args = regexp(str, '\s+', 'split');
args = args(find(~cellfun(@isempty, args)));
mex(args{:});
end
%% creates mex_optimize_sgd_cuda
includes=[includes ' -I' path_cuda '/include/'];
system(sprintf('%s/bin/nvcc -c utils/cuda_kernels.cu -Xcompiler -fpic',path_cuda));
system('mv cuda_kernels.o mex/');
for ii=1:length(names_cuda)
name=names_cuda{ii};
name
str=sprintf(' -v -largeArrayDims CXX="%s" DEFINES="\\$DEFINES %s -DCUDA -DCUDNN" CXXFLAGS="\\$CXXFLAGS %s" INCLUDE="\\$INCLUDE %s" LDFLAGS="\\$LDFLAGS %s" LINKLIBS="\\$LINKLIBS -L"%s" -L"%s" %s -lcudart -lcublas -lcusolver -lcudnn %s mex/cuda_kernels.o" mex/%s.cpp -output mex/%s.mexa64',compiler_icc,defines,flags,includes,lflags,path_mkl,[path_cuda '/lib64/'],lib_mkl_mt,lib_openmp,name,name);
args = regexp(str, '\s+', 'split');
args = args(find(~cellfun(@isempty, args)));
mex(args{:});
end
#include <mexutils.h>
#include <common.h>
template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nrhs) {
Matrix<T> X;
getMatrix<T>(prhs[0],X);
const int nchannels = nrhs == 1 ? 1 : static_cast<int>(mxGetScalar(prhs[1]));
centering(X,nchannels);
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 1 && nrhs != 2)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 0)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nrhs);
} else {
callFunction<float>(plhs,prhs,nrhs);
}
}
#include <linalg.h>
#include <mexutils.h>
#include "common.h"
template <typename Tin, typename T>
inline void callFunctionAux(mxArray* plhs[], mxArray* pr_layers, const int nlayers, const int ntrain, const T max_memory, const Map<Tin>& map, const int nlhs) {
Layer<T> layers[nlayers];
for (int ii=0; ii<nlayers; ++ii) {
mxArray* layer=mxGetCell(pr_layers,ii);
layers[ii].num_layer=ii+1;
layers[ii].npatch=getScalarStruct<int>(layer,"npatch");
layers[ii].nfilters=getScalarStruct<int>(layer,"nfilters");
layers[ii].subsampling=getScalarStruct<int>(layer,"subsampling");
layers[ii].stride=getScalarStructDef<int>(layer,"stride",1);
layers[ii].zero_padding=getScalarStructDef<bool>(layer,"zero_padding",false);
layers[ii].type_layer=getScalarStruct<int>(layer,"type_layer");
layers[ii].type_kernel=getScalarStruct<int>(layer,"type_kernel");
/// whitening is done subsequently
if (ii < nlayers-1) {
layers[ii].sigma=getScalarStruct<T>(layer,"sigma");
mxArray *pr_W = mxGetField(layer,0,"W");
getMatrix(pr_W,layers[ii].W);
mxArray *pr_b = mxGetField(layer,0,"b");
getVector(pr_b,layers[ii].b);
mxArray *pr_Wfilt = mxGetField(layer,0,"Wfilt");
getMatrix(pr_Wfilt,layers[ii].Wfilt);
mxArray *pr_mu = mxGetField(layer,0,"mu");
getVector(pr_mu,layers[ii].mu);
mxArray *pr_W2 = mxGetField(layer,0,"W2");
getMatrix(pr_W2,layers[ii].W2);
}
}
/// query the descriptor size and initialize the memory
Map<Tin> map_zero;
map.refSubMapZ(0,map_zero);
Map<T> map_zero_out;
encode_ckn_map(map_zero,layers,nlayers-1,map_zero_out);
const INTM e = layers[nlayers-1].npatch;
const INTM m = (map_zero_out.y()-e+1)*(map_zero_out.z()-e+1);
const INTM n = map.z();
const INTM ndesc=e*e*map_zero_out.x();
const INTM max_ntrain = MIN(ntrain,floor((max_memory*1024*1024*1024)/(4*ndesc)));
const INTM per_image = MIN(floor(max_ntrain/T(n)),m);
printf("Number of patches to extract: %d\n",per_image*n);
plhs[0]=createMatrix<T>(ndesc,per_image*n);
Matrix<T> X;
getMatrix(plhs[0],X);
Vector<int> labels;
if (nlhs>=2) {
plhs[1]=mxCreateNumericMatrix(static_cast<mwSize>(per_image*n),1,mxINT32_CLASS,mxREAL);
getVector<int>(plhs[1],labels);
}
/* Vector<T> nrms;
if (nlhs>=3) {
plhs[2]=mxCreateMatrix(per_image*n,1);
getVector<T>(plhs[2],nrms);
}*/
extract_dataset(map,layers,nlayers,X,labels);
};
template <typename Tin>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nlhs) {
Map<Tin> map;
getMap(prhs[0],map);
mxArray *pr_layers = mxGetField(prhs[1],0,"layer");
bool double_precision = getScalarStructDef<bool>(prhs[2],"double_precision",false);
const int nlayers=getScalarStruct<int>(prhs[2],"current_layer");
const int seed=getScalarStructDef<int>(prhs[2],"seed",-1);
if (seed >= 0)
srandom(seed);
const double max_memory=getScalarStructDef<double>(prhs[2],"max_memory",10000.0);
const int ntrain=getScalarStruct<int>(prhs[2],"ntrain");
int threads = getScalarStructDef<int>(prhs[2],"threads",-1);
if (threads == -1) {
threads=1;
#ifdef _OPENMP
threads = MIN(MAX_THREADS,omp_get_num_procs());
#endif
}
threads=init_omp(threads);
if (double_precision) {
callFunctionAux<Tin,double>(plhs,pr_layers,nlayers,ntrain,max_memory,map,nlhs);
} else {
callFunctionAux<Tin,float>(plhs,pr_layers,nlayers,ntrain,max_memory,map,nlhs);
}
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 3)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 1 && nlhs != 2 && nlhs != 3)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nlhs);
} else if (mxGetClassID(prhs[0]) == mxUINT8_CLASS) {
callFunction<unsigned char>(plhs,prhs,nlhs);
} else {
callFunction<float>(plhs,prhs,nlhs);
}
}
#include <linalg.h>
#include <mexutils.h>
template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[]) {
Matrix<T> X;
getMatrix(prhs[0],X);
const int m = X.m();
plhs[0]=createMatrix<T>(m,m);
Matrix<T> U;
getMatrix(plhs[0],U);
plhs[1]=createMatrix<T>(m,1);
Vector<T> S;
getVector(plhs[1],S);
Matrix<T> Y;
Y.copy(X);
Y.SymEig(U,S);
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 1)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 2)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs);
} else {
callFunction<float>(plhs,prhs);
}
}
#include <linalg.h>
#include <mexutils.h>
#include "common.h"
template <typename Tin, typename T>
inline void callFunctionAux(mxArray* plhs[], const int nlhs, mxArray* pr_layers, const int nlayers, const Map<Tin>& map, const bool verbose = true) {
Layer<T> layers[nlayers];
for (int ii=0; ii<nlayers; ++ii) {
mxArray* layer=mxGetCell(pr_layers,ii);
layers[ii].num_layer=ii+1;
layers[ii].npatch=getScalarStruct<int>(layer,"npatch");
layers[ii].nfilters=getScalarStruct<int>(layer,"nfilters");
layers[ii].subsampling=getScalarStruct<int>(layer,"subsampling");
layers[ii].stride=getScalarStructDef<int>(layer,"stride",1);
layers[ii].zero_padding=getScalarStructDef<bool>(layer,"zero_padding",false);
layers[ii].type_layer=getScalarStruct<int>(layer,"type_layer");
layers[ii].type_kernel=getScalarStruct<int>(layer,"type_kernel");
layers[ii].sigma=getScalarStruct<T>(layer,"sigma");
mxArray *pr_W2 = mxGetField(layer,0,"W2");
getMatrix(pr_W2,layers[ii].W2);
mxArray *pr_W = mxGetField(layer,0,"W");
getMatrix(pr_W,layers[ii].W);
mxArray *pr_b = mxGetField(layer,0,"b");
getVector(pr_b,layers[ii].b);
mxArray *pr_Wfilt = mxGetField(layer,0,"Wfilt");
getMatrix(pr_Wfilt,layers[ii].Wfilt);
mxArray *pr_mu = mxGetField(layer,0,"mu");
getVector(pr_mu,layers[ii].mu);
};
Map<Tin> map_zero;
map.refSubMapZ(0,map_zero);
Map<T> map_zero_out;
encode_ckn_map(map_zero,layers,nlayers,map_zero_out,verbose);
const INTM ndesc=map_zero_out.x()*map_zero_out.y()*map_zero_out.z();
if (verbose) {
PRINT_I(map_zero_out.x())
PRINT_I(map_zero_out.y())
PRINT_I(map_zero_out.z())
}
plhs[0]=createMatrix<T>(ndesc,map.z());
Matrix<T> psi;
getMatrix(plhs[0],psi);
#ifdef TIMINGS
RESET_TIMERS
#endif
encode_ckn(map,layers,nlayers,psi);
#ifdef TIMINGS
PRINT_TIMERS
#endif
};
template <typename Tin>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nlhs) {
Map<Tin> map;
getMap(prhs[0],map);
mxArray *pr_layers = mxGetField(prhs[1],0,"layer");
const mwSize* dims_layer=mxGetDimensions(pr_layers);
const int nlayers=dims_layer[0]*dims_layer[1];
bool double_precision = getScalarStructDef<bool>(prhs[2],"double_precision",false);
int threads = getScalarStructDef<int>(prhs[2],"threads",-1);
bool verbose = getScalarStructDef<bool>(prhs[2],"verbose",true);
if (threads == -1) {
threads=1;
#ifdef _OPENMP
threads = MIN(MAX_THREADS,omp_get_num_procs());
#endif
}
threads=init_omp(threads);
#ifdef HAVE_MKL
vmlSetMode(0x00000003 | 0x00280000 | 0x00000100);
#endif
if (double_precision) {
callFunctionAux<Tin,double>(plhs,nlhs,pr_layers,nlayers,map,verbose);
} else {
callFunctionAux<Tin,float>(plhs,nlhs,pr_layers,nlayers,map,verbose);
}
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 3)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 1)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nlhs);
} else if (mxGetClassID(prhs[0]) == mxUINT8_CLASS) {
callFunction<unsigned char>(plhs,prhs,nlhs);
} else {
callFunction<float>(plhs,prhs,nlhs);
}
}
#include <linalg.h>
#include <mexutils.h>
#include "common_cudnn.h"
template <typename Tin, typename T>
inline void callFunctionAux(mxArray* plhs[], const int nlhs, mxArray* pr_layers, const int nlayers, const Map<Tin>& map, const bool verbose, const int batchsize) {
Layer<T> layers[nlayers];
for (int ii=0; ii<nlayers; ++ii) {
mxArray* layer=mxGetCell(pr_layers,ii);
layers[ii].num_layer=ii+1;
layers[ii].npatch=getScalarStruct<int>(layer,"npatch");
layers[ii].nfilters=getScalarStruct<int>(layer,"nfilters");
layers[ii].subsampling=getScalarStruct<int>(layer,"subsampling");
layers[ii].stride=getScalarStructDef<int>(layer,"stride",1);
layers[ii].zero_padding=getScalarStructDef<bool>(layer,"zero_padding",false);
layers[ii].type_layer=getScalarStruct<int>(layer,"type_layer");
layers[ii].type_kernel=getScalarStruct<int>(layer,"type_kernel");
layers[ii].sigma=getScalarStruct<T>(layer,"sigma");
layers[ii].pooling_mode=getScalarStructDef<pooling_mode_t>(layer,"pooling_mode",POOL_GAUSSIAN_FILTER);
mxArray *pr_W2 = mxGetField(layer,0,"W2");
getMatrix(pr_W2,layers[ii].W2);
mxArray *pr_W = mxGetField(layer,0,"W");
getMatrix(pr_W,layers[ii].W);
mxArray *pr_b = mxGetField(layer,0,"b");
getVector(pr_b,layers[ii].b);
mxArray *pr_Wfilt = mxGetField(layer,0,"Wfilt");
getMatrix(pr_Wfilt,layers[ii].Wfilt);
mxArray *pr_mu = mxGetField(layer,0,"mu");
getVector(pr_mu,layers[ii].mu);
};
Map<Tin> map_zero;
map.refSubMapZ(0,map_zero);
Map<T> map_zero_out;
encode_ckn_map(map_zero,layers,nlayers,map_zero_out,false);
const INTM ndesc=map_zero_out.x()*map_zero_out.y()*map_zero_out.z();
if (verbose) {
PRINT_I(map_zero_out.x())
PRINT_I(map_zero_out.y())
PRINT_I(map_zero_out.z())
}
plhs[0]=createMatrix<T>(ndesc,map.z());
Matrix<T> psi;
getMatrix(plhs[0],psi);
#ifdef TIMINGS
RESET_TIMERS
#endif
encode_ckn_cudnn(map,layers,nlayers,psi,batchsize);
#ifdef TIMINGS
PRINT_TIMERS
#endif
};
template <typename Tin>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nlhs) {
Map<Tin> map;
getMap(prhs[0],map);
mxArray *pr_layers = mxGetField(prhs[1],0,"layer");
const mwSize* dims_layer=mxGetDimensions(pr_layers);
const int nlayers=dims_layer[0]*dims_layer[1];
bool double_precision = getScalarStructDef<bool>(prhs[2],"double_precision",false);
int threads = getScalarStructDef<int>(prhs[2],"threads",-1);
bool verbose = getScalarStructDef<bool>(prhs[2],"verbose",true);
const int device = getScalarStruct<int>(prhs[2],"device");
const int batchsize = getScalarStructDef<int>(prhs[2],"batchsize",256);
init_cuda(device,true);
if (double_precision) {
} else {
callFunctionAux<Tin,float>(plhs,nlhs,pr_layers,nlayers,map,verbose,batchsize);
}
destroy_cuda(true);
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 3)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 1)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nlhs);
} else if (mxGetClassID(prhs[0]) == mxUINT8_CLASS) {
callFunction<unsigned char>(plhs,prhs,nlhs);
} else {
callFunction<float>(plhs,prhs,nlhs);
}
}
#include <mexutils.h>
template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nrhs) {
Matrix<T> X;
getMatrix<T>(prhs[0],X);
vmlSetMode(0x00000003 | 0x00280000 | 0x00000100);
plhs[0]=createMatrix<T>(X.m(),X.n());
Matrix<T> Z;
getMatrix<T>(plhs[0],Z);
vExp(Z.m()*Z.n(),X.rawX(),Z.rawX());
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 1)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 1)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nrhs);
} else {
callFunction<float>(plhs,prhs,nrhs);
}
}
#include <mexutils.h>
#include <kmeans.h>
template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nrhs) {
Matrix<T> X;
getMatrix<T>(prhs[0],X);
const int d = getScalarStruct<int>(prhs[1],"d");
int threads = getScalarStructDef<int>(prhs[1],"threads",-1);
const int num_iter = getScalarStructDef<int>(prhs[1],"num_iter",10);
if (threads == -1) {
threads=1;
#ifdef _OPENMP
threads = MIN(MAX_THREADS,omp_get_num_procs());
#endif
}
threads=init_omp(threads);
// mkl_set_num_threads(threads);
plhs[0]=createMatrix<T>(X.m(),d);
Matrix<T> Z;
getMatrix<T>(plhs[0],Z);
fast_kmeans(X,Z,num_iter);
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 2)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 1)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nrhs);
} else {
callFunction<float>(plhs,prhs,nrhs);
}
}
#include <mexutils.h>
#include <common.h>
template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nrhs, const int nlhs) {
Matrix<T> X;
getMatrix<T>(prhs[0],X);
if (nlhs == 1) {
Vector<T> nrms;
plhs[0]=createMatrix<T>(1,X.n());
getVector<T>(plhs[0],nrms);
normalize(X,nrms);
} else {
normalize(X);
}
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 1)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 1 && nlhs != 0)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nrhs,nlhs);
} else {
callFunction<float>(plhs,prhs,nrhs,nlhs);
}
}
#include <mexutils.h>
template <typename T>
inline void swap_scalar(T& X, T& Y) {
T tmp=X;
X=Y;
Y=tmp;
}
template <typename T>
inline void swap(T* X, T* Y, T* buff, const int n) {
memcpy(buff,Y,n*sizeof(T));
memcpy(Y,X,n*sizeof(T));
memcpy(X,buff,n*sizeof(T));
};
template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],const int nlhs) {
Matrix<T> X;
getMatrix<T>(prhs[0],X);
const INTM m = X.m();
const INTM n = X.n();
T* prX=X.rawX();
Vector<int> per;
getVector<int>(prhs[1],per);
T* buff= new T[m];
INTM* who = new INTM[n];
INTM* where = new INTM[n];
for (INTM j = 0; j<n; ++j) who[j]=j;
for (INTM j = 0; j<n; ++j) where[j]=j;
for (INTM i=0; i<n; ++i) {
const INTM j=where[per[i]];
swap<T>(prX+i*m,prX+j*m,buff,m);
swap_scalar<INTM>(where[who[i]],where[who[j]]);
swap_scalar<INTM>(who[i],who[j]);
}
delete[](buff);
delete[](who);
delete[](where);
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 2)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 0)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nlhs);
} else {
callFunction<float>(plhs,prhs,nlhs);
}
}
/* Software SPAMS v2.1 - Copyright 2009-2011 Julien Mairal
*
* This file is part of SPAMS.
*
* SPAMS is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SPAMS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SPAMS. If not, see <http://www.gnu.org/licenses/>.
*/
#include <mex.h>
#include <mexutils.h>
#include <svm.h>
// w=mexSvmMiso(y,X,tablambda,param);
template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[],
const int nlhs) {
if (!mexCheckType<T>(prhs[0]))
mexErrMsgTxt("type of argument 1 is not consistent");
if (mxIsSparse(prhs[0]))
mexErrMsgTxt("argument 1 should not be sparse");
if (!mexCheckType<T>(prhs[1]))
mexErrMsgTxt("type of argument 2 is not consistent");
if (!mxIsStruct(prhs[2]))
mexErrMsgTxt("argument 3 should be a struct");
T* pry = reinterpret_cast<T*>(mxGetPr(prhs[0]));
const mwSize* dimsy=mxGetDimensions(prhs[0]);
INTM my=static_cast<INTM>(dimsy[0]);
INTM ny=static_cast<INTM>(dimsy[1]);
Vector<T> y(pry,my*ny);
T* prX = reinterpret_cast<T*>(mxGetPr(prhs[1]));
const mwSize* dimsX=mxGetDimensions(prhs[1]);
INTM p=static_cast<INTM>(dimsX[0]);
INTM n=static_cast<INTM>(dimsX[1]);
Matrix<T> X(prX,p,n);
const int nclasses=y.maxval()+1;
plhs[0]=createMatrix<T>(p,nclasses);
T* prw=reinterpret_cast<T*>(mxGetPr(plhs[0]));
Matrix<T> W(prw,p,nclasses);
Vector<int> info;
Vector<T> objs;
srandom(0);
const int max_it = getScalarStructDef<int>(prhs[2],"max_it",1000*n);
const T eps = getScalarStructDef<T>(prhs[2],"eps",0.001);
int threads = getScalarStructDef<int>(prhs[2],"threads",-1);
const T lambda = getScalarStruct<T>(prhs[2],"lambda");
const int seed = getScalarStruct<int>(prhs[2],"seed");
const int reweighted = getScalarStructDef<int>(prhs[2],"reweighted",0);
const bool non_uniform = getScalarStructDef<bool>(prhs[2],"non_uniform",false);
srandom(seed);
const bool accelerated = getScalarStructDef<T>(prhs[2],"accelerated",false);
if (threads == -1) {
threads=1;
#ifdef _OPENMP
threads = MIN(MAX_THREADS,omp_get_num_procs());
#endif
}
threads=init_omp(threads);
miso_svm_onevsrest(y,X,W,lambda,eps,max_it,info,objs,accelerated,reweighted,non_uniform);
if (nlhs==2) {
plhs[1]=createMatrix<T>(1,nclasses);
Vector<T> out;
getVector(plhs[1],out);
for (int ii=0; ii<nclasses; ++ii)
out[ii]=info[ii];
}
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
if (nrhs != 3)
mexErrMsgTxt("Bad number of inputs arguments");
if (nlhs != 1 && nlhs != 2)
mexErrMsgTxt("Bad number of output arguments");
if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
callFunction<double>(plhs,prhs,nlhs);
} else {
callFunction<float>(plhs,prhs,nlhs);
}
}
#include <linalg.h>
#include <mexutils.h>
#include "common_cudnn.h"
template <typename T>
inline void getModel(mxArray* pr_model,Layer<T>*& layers, int& nlayers) {
mxArray *pr_layers = mxGetField(pr_model,0,"layer");
const mwSize* dims_layer=mxGetDimensions(pr_layers);
nlayers=dims_layer[0]*dims_layer[1];
layers = new Layer<T>[nlayers];
for (int ii=0; ii<nlayers; ++ii) {
mxArray* layer=mxGetCell(pr_layers,ii);