Commit b5eda6aa authored by MAIRAL Julien's avatar MAIRAL Julien

update

parent c30d2b39
......@@ -13,7 +13,9 @@ names_mklst={'mex_svm_miso','mex_permutation','mex_create_dataset','mex_centerin
names_mklmt={'mex_exp','mex_eig','mex_kmeans'};
names_mklmt={}
names_mklst={}
%names_mklst={'mex_encode_cpu','mex_create_dataset'}
names_cuda={'mex_encode_cudnn','mex_train_ckn_cudnn'};
%names_cuda={};
%%%%%% various flags %%%%%
format compact;
......
......@@ -63,9 +63,9 @@ inline void callFunctionAux(mxArray* plhs[], const mxArray*prhs[], const int nlh
param.update_Wb=getScalarStructDef<bool>(prhs[7],"update_Wb",true);
param.update_model=getScalarStructDef<bool>(prhs[7],"update_model",true);
param.update_miso=getScalarStructDef<bool>(prhs[7],"update_miso",false);
param.data_augmentation=getScalarStructDef<bool>(prhs[7],"data_augmentation",false);
param.preconditioning_model=getScalarStructDef<bool>(prhs[7],"preconditioning_model",false);
param.learning_rate_mode=getScalarStructDef<int>(prhs[7],"learning_rate_mode",0);
param.data_augmentation=getScalarStructDef<int>(prhs[7],"data_augmentation",0);
param.it_eval=getScalarStructDef<int>(prhs[7],"it_eval",1);
param.it_decrease=getScalarStructDef<int>(prhs[7],"it_decrease",10);
param.active_set=getScalarStructDef<bool>(prhs[7],"active_set",false);
......
......@@ -3,12 +3,14 @@ threads=4;
zero_pad=1; % careful zero_padding is incompatible with even patch sizes
centering=0;
whitening=0;
type_learning_init=0; % same as type_learning in script_ckn_unsupervised
type_learning_init=8; % fast, cheap initialization
dataset='cifar-10_w'; % pre-centered-and-whitened dataset
lambda2=0.01;
npatches=[3 1 3 1 3 1 3 1 3 1 3 1 3 1];
% ************************* with no data augmentation ********
% produces about 90.5% accuracy on cifar, with single model and no data augmentation.
data_augment=0;
npatches= [3 1 3 1 3 1 3 1 3 1 3 1 3 1];
subsampling=[1 1 1 2 1 1 1 2 1 1 1 2 1 2];
nfilters=[256 128 256 128 256 256 256 256 256 256 256 256 256 256]; % the "128" in the filter size are here only to gain a x4 speed-up without losing accuracy
type_kernel=[0 1 0 1 0 1 0 1 0 1 0 1 0 1]
......@@ -19,16 +21,14 @@ it_eval=1; % evaluate validation or test every it_eval epochs (cost some time);
init_rate=10;
ndecrease_rate=100;
nepochs=105;
train_ckn_supervised_gpu(npatches,subsampling,nfilters,sigmas,type_kernel,zero_pad,centering,whitening,type_learning_init,lambda,lambda2,alt_optim,it_eval,init_rate,ndecrease_rate,nepochs,data_augment,device,threads,dataset);
% not that the initialization part is relatively slow (mostly done in a crude, slow, but effectve fashion).
%train_ckn_supervised_gpu(npatches,subsampling,nfilters,sigmas,type_kernel,zero_pad,centering,whitening,type_learning_init,lambda,lambda2,alt_optim,it_eval,init_rate,ndecrease_rate,nepochs,device,threads,dataset);
% produces about 90.5% accuracy on cifar, with single model and no data augmentation.
% npatches=[3 1 3 1 3];
% subsampling=[2 1 2 1 3];
% nfilters=[128 128 128 128 128];
% type_kernel=[0 1 0 1 0]
%ndecrease_rate=50;
% alt_optim=0;
% nepochs=155;
% train_ckn_supervised_gpu(npatches,subsampling,nfilters,sigmas,type_kernel,zero_pad,centering,whitening,type_learning_init,lambda,lambda2,alt_optim,it_eval,init_rate,ndecrease_rate,nepochs,device,threads,dataset);
% ************************* with basic data augmentation ******************
% does about 92%
%data_augment=1;
% npatches= [3 1 3 1 3 1 3 1 3 1 3 1 3 1 3 1];
% subsampling=[1 1 1 2 1 1 1 2 1 1 1 2 1 2 1 2];
% nfilters=[384 384 384 384 384 384 384 384 384 384 384 384 384 384];
% type_kernel=[0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1]
% nepochs=110;
% train_ckn_supervised_gpu(npatches,subsampling,nfilters,sigmas,type_kernel,zero_pad,centering,whitening,type_learning_init,lambda,lambda2,alt_optim,it_eval,init_rate,ndecrease_rate,nepochs,data_augment,device,threads,dataset);
......@@ -7,7 +7,7 @@ subsampling=[2 6];
nfilters=[256 1024];
% dimension 9216, obtains around 82% with [3,2]
%nfilters=[512 8192];
% nfilters=[512 8192];
% dimension 73728, obtains around 85% accuracy with [3,2]
%nfilters=[1024 16384];
......@@ -26,8 +26,13 @@ whitening=1;
gradients=0;
type_learning=0; % 0: k-means on 1000000 patches
% 1: k-means on 100000 patches
% 2: k-means on 10000 patches
% 3: random patches, no k-means
% 2: k-means on 100000 patches from 10% data
% 3: k-means on 100000 patches from 1% data
% 4: random patches (noise), no k-means
% 5: no k-means, random patches from data.
% 6: no k-means, random patches from 10% data.
% 7: no k-means, random patches from 1% data.
% 8: 2 epochs k-means from 1% data.
device='mkl'; % unsupervised learning is not coded on gpus
threads=8;
......
function [] = main(npatches,subsampling,nfilters,sigmas,type_kernel,zero_pad,centering,whitening,type_init,lambda,lambda2,alt_optim,it_eval,init_rate,ndecrease_rate,nepochs,device,threads,dataset)
function [] = main(npatches,subsampling,nfilters,sigmas,type_kernel,zero_pad,centering,whitening,type_init,lambda,lambda2,alt_optim,it_eval,init_rate,ndecrease_rate,nepochs,data_augment,device,threads,dataset)
if isdeployed
npatches=str2num(npatches)
subsampling=str2num(subsampling)
......@@ -16,8 +16,10 @@ if isdeployed
init_rate=str2num(init_rate)
ndecrease_rate=str2num(ndecrease_rate)
nepochs=str2num(nepochs)
data_augment=str2num(data_augment)
threads=str2num(threads)
end
data_augment
if length(sigmas)==1
sigmas=sigmas*ones(1,length(npatches));
end
......@@ -65,14 +67,35 @@ elseif strcmp(dataset,'cifar-10_w')
load('data/cifar_white.mat');
end
param.num_iter_kmeans=10;
param.num_train_images=-1;
if type_init==0
param.ntrain=1000000
elseif type_init==1
param.ntrain=100000
elseif type_init==2
param.ntrain=10000
param.ntrain=100000
param.num_train_images=5000;
elseif type_init==3
param.ntrain=0
param.ntrain=100000
param.num_train_images=500;
elseif type_init==4
param.ntrain=0;
elseif type_init==5
param.ntrain=1000000;
param.num_iter_kmeans=0;
elseif type_init==6
param.ntrain=1000000;
param.num_iter_kmeans=0;
param.num_train_images=5000;
elseif type_init==7
param.ntrain=1000000;
param.num_iter_kmeans=0;
param.num_train_images=500;
elseif type_init==8
param.ntrain=100000;
param.num_iter_kmeans=2;
param.num_train_images=500;
end
name=get_name(dataset,npatches,subsampling,nfilters,sigmas,type_kernel,zero_pad,centering,whitening,type_init,lambda,lambda2);
......@@ -92,14 +115,14 @@ if exist(savename)
else
fprintf('Train the network\n');
model=training_unsupervised(Xtr,Ytr,param,dataset);
save(savename,'model');
% save(savename,'model');
end
param.device=0; % this is where you setup your gpu device
% while isempty(param.device)
% [tmp gpu]=system('gpu_getIDs.sh');
% param.device=str2num(gpu);
% end
while isempty(param.device)
[tmp gpu]=system('gpu_getIDs.sh');
param.device=str2num(gpu);
end
n=size(Xtr,3);
nte=size(Xte,3);
......@@ -112,6 +135,7 @@ lambda=lambda*scal_intercept*scal_intercept*size(psi,1);
if true
psib=[psi; scal_intercept*ones(1,size(psi,2))];
scal_intercept
%mex_centering(psi);
param_miso.lambda=lambda/n;
param_miso.seed=0;
param_miso.threads=param.threads;
......@@ -138,10 +162,11 @@ paramsgd.epochs=nepochs;
paramsgd.loss=1;
paramsgd.lambda=lambda/n;
paramsgd.lambda2=lambda2;
paramsgd.device=str2num(gpu);
paramsgd.device=param.device;
paramsgd.threads=threads;
paramsgd.scal_intercept=scal_intercept^2;
paramsgd.updateWb=true;
paramsgd.data_augmentation=data_augment;
Ytr_bin=-ones(nclasses,n,'single');
Ytr_bin((1:nclasses:n*nclasses) + Ytr')=1;
Yte_bin=-ones(nclasses,nte,'single');
......
......@@ -58,14 +58,36 @@ elseif strcmp(dataset,'cifar-10_w')
load('data/cifar_white.mat');
end
if type_learning==0
param.num_iter_kmeans=10;
param.num_train_images=-1;
type_init=type_learning;
if type_init==0
param.ntrain=1000000
elseif type_learning==1
elseif type_init==1
param.ntrain=100000
elseif type_learning==2
param.ntrain=10000
elseif type_learning==3
param.ntrain=0
elseif type_init==2
param.ntrain=100000
param.num_train_images=5000;
elseif type_init==3
param.ntrain=100000
param.num_train_images=500;
elseif type_init==4
param.ntrain=0;
elseif type_init==5
param.ntrain=1000000;
param.num_iter_kmeans=0;
elseif type_init==6
param.ntrain=1000000;
param.num_iter_kmeans=0;
param.num_train_images=5000;
elseif type_init==7
param.ntrain=1000000;
param.num_iter_kmeans=0;
param.num_train_images=500;
elseif type_init==8
param.ntrain=100000;
param.num_iter_kmeans=2;
param.num_train_images=500;
end
name=get_name(dataset,npatches,subsampling,nfilters,param.sigmas,param.type_kernel,zero_pad,centering,whitening,gradients,type_learning,lambda2);
......
......@@ -49,7 +49,12 @@ for ii=1:nlayers
model.layer{ii}=model_layer;
if (param.ntrain > 0)
X=mex_create_dataset(Input,model,param_dataset);
if param.num_train_images==-1
X=mex_create_dataset(Input,model,param_dataset);
else
per=randperm(size(Input,3));
X=mex_create_dataset(Input(:,:,per(1:param.num_train_images)),model,param_dataset);
end
nrms=mex_normalize(X);
ind=find(nrms);
......@@ -63,8 +68,7 @@ for ii=1:nlayers
fprintf('Train the filters\n');
param_approx.d=model_layer.nfilters;
param_approx.threads=param.threads;
param_approx.num_iter=10;
param_approx
param_approx.num_iter=param.num_iter_kmeans;
Z = mex_kmeans(X,param_approx);
ind=find(sum(Z.^2)==0);
if ~isempty(ind)
......
......@@ -443,11 +443,31 @@ inline void convert_image_data_map(const Tin* input, Tout* output, const int n)
};
template <typename Tin, typename Tout>
inline void convert_image_data_map_switch(const Tin* input, Tout* output, const int nc, const int channels, const int nimages) {
for (int ii=0; ii<nimages; ++ii)
for (int jj=0; jj<channels; ++jj)
for (int kk=0; kk<nc; ++kk)
output[ii*nc*channels+kk*channels+jj]=convert_image_data<Tout>(input[ii*nc*channels+jj*nc+kk]);
inline void convert_image_data_map_switch(const Tin* input, Tout* output, const int nc, const int channels, const int nimages, const bool augment = false) {
if (augment) {
const int h = static_cast<int>(sqrt(nc));
for (int ii=0; ii<nimages; ++ii) {
const bool flip = (random() % 2) == 0;
const int offset1 = static_cast<int>((random() % 5))-2;
const int offset2 = static_cast<int>((random() % 5))-2;
for (int jj=0; jj<channels; ++jj)
for (int kk=0; kk<h; ++kk)
for (int ll=0; ll<h; ++ll) {
const int ind_output= flip ? (kk)*h + h-1 -ll : kk*h+ll;
const int ind_input= (kk+offset1)*h+ll+offset2;
if (kk + offset1 < 0 || kk + offset1 >= h || ll + offset2 < 0 || ll + offset2 >= h) {
output[ii*nc*channels+ind_output*channels+jj] = 0;
} else {
output[ii*nc*channels+ind_output*channels+jj]=convert_image_data<Tout>(input[ii*nc*channels+jj*nc+ind_input]);
}
}
}
} else {
for (int ii=0; ii<nimages; ++ii)
for (int jj=0; jj<channels; ++jj)
for (int kk=0; kk<nc; ++kk)
output[ii*nc*channels+kk*channels+jj]=convert_image_data<Tout>(input[ii*nc*channels+jj*nc+kk]);
}
};
template <typename T>
......
......@@ -717,8 +717,8 @@ template <typename T> class GaussianPoolLayer : public PoolLayer<T> {
checkCUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle,
_filters,_output_conv,_conv,_input_conv,
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,0,&_conv_backward_algo));
// if (_ci*_n >= 65536)
// _conv_backward_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
if (_ci*_n >= 65536)
_conv_backward_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
PRINT_I(_ci*_n);
PRINT_I(_conv_backward_algo);
checkCUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle,
......@@ -1610,9 +1610,9 @@ struct ParamSGD {
bool update_miso;
int learning_rate_mode;
bool active_set;
int data_augmentation;
int it_eval;
int it_decrease;
bool data_augmentation;
};
template <typename Tin, typename T>
......@@ -1947,8 +1947,7 @@ inline void sgd_solver_supervised(const Map<Tin>& maps, const Matrix<T>& Y, cons
/// get the data and perform the forward pass
for (int kk=0; kk<batch_size; ++kk) {
convert_image_data_map_switch<Tin,T>(maps.rawX()+size_map*ind_active_set[per[jj*batch_size+kk % nactive]],
input_data+kk*size_map,size_map/nchannels,nchannels,1);
// if (param.data_augmentation) TODO
input_data+kk*size_map,size_map/nchannels,nchannels,1,param.data_augmentation);
}
network.forward(input_data,batch_size);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment