Mentions légales du service

Skip to content
Snippets Groups Projects
Commit a72eb4cb authored by GUYET Thomas's avatar GUYET Thomas
Browse files

modification for the experiment dedicated to non-succession regularisation

parent 0ed82141
No related branches found
No related tags found
No related merge requests found
......@@ -2,57 +2,121 @@ import os
import argparse
import pickle
from gen_data import gen_synthetic_data
from gen_data import gen_synthetic_data, gen_phenosuccession_synthetic_data
from sklearn.model_selection import train_test_split
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("-k", "--patients", type=int,
help="specify the number of patients (default 100)", default=100)
parser.add_argument("-n", "--medical_events", type=int,
help="specify the number of medical events (default 20)", default=20)
parser.add_argument("-t", "--time", type=int,
help="specify the length of the patients stay (default 8)", default=8)
parser.add_argument("-r", "--phenotypes", type=int,
help="specify the number of phenotypes (default 5)", default=5)
parser.add_argument("-tw", "--temporal_window", type=int,
help="specify the length of the temporal window (default 3)", default=3)
parser.add_argument("-sw", "--sliding_window", action="store_true",
help="generating patient matrices with sliding windows (default T)", default=True)
parser.add_argument("-no","--noise", type=float,
help="add noise (default False)", default=0.0)
parser.add_argument("-tr","--truncate", action="store_true",
help="truncate values greater than 1 (default True)", default=True)
parser.add_argument("-p","--path",
help="specify the path to store the generated data", default="./data.pickle")
parser.add_argument("-tt","--test", default=0.0, type=float,
help="if not nul, it generates a train/test dataset, the specified value (in) indicates the proportion for the test data")
return parser.parse_args()
if __name__ == '__main__':
args = get_arguments()
W_, Ph_, X, params = gen_synthetic_data(
args.patients,
args.medical_events,
args.time,
args.phenotypes,
args.temporal_window,
sliding_window=args.sliding_window,
noise= args.noise,
truncate=args.truncate)
if not os.path.exists( os.path.dirname(args.path) ):
os.makedirs(os.path.dirname(args.path))
if args.test<=0.0 or args.test>1.0:
pickle.dump((W_, Ph_, X, params), open(args.path, "wb" ))
else:
# Split the data into train/test dataset
X_train, X_test, W_train, W_test = train_test_split(X, W_,
train_size=1-args.test,
test_size=args.test)
pickle.dump((Ph_, W_train, X_train, W_test, X_test, params), open(args.path, "wb" ))
\ No newline at end of file
parser = argparse.ArgumentParser()
parser.add_argument(
"-k",
"--patients",
type=int,
help="specify the number of patients (default 100)",
default=100,
)
parser.add_argument(
"-n",
"--medical_events",
type=int,
help="specify the number of medical events (default 20)",
default=20,
)
parser.add_argument(
"-t",
"--time",
type=int,
help="specify the length of the patients stay (default 8)",
default=8,
)
parser.add_argument(
"-r",
"--phenotypes",
type=int,
help="specify the number of phenotypes (default 5)",
default=5,
)
parser.add_argument(
"-tw",
"--temporal_window",
type=int,
help="specify the length of the temporal window (default 3)",
default=3,
)
parser.add_argument(
"-sw",
"--sliding_window",
action="store_true",
help="generating patient matrices with sliding windows (default T)",
default=True,
)
parser.add_argument(
"-no", "--noise", type=float, help="add noise (default False)", default=0.0
)
parser.add_argument(
"-tr",
"--truncate",
action="store_true",
help="truncate values greater than 1 (default True)",
default=True,
)
parser.add_argument(
"-p",
"--path",
help="specify the path to store the generated data",
default="./data.pickle",
)
parser.add_argument(
"-tt",
"--test",
default=0.0,
type=float,
help="if not nul, it generates a train/test dataset, the specified value (in) indicates the proportion for the test data",
)
parser.add_argument(
"-ph",
"--pheno",
action="store_true",
default=True,
help="use specific pheno-non-succession dataset (ignore many parameters)",
)
return parser.parse_args()
if __name__ == "__main__":
args = get_arguments()
if args.pheno:
W_, Ph_, X, params = gen_phenosuccession_synthetic_data(
args.patients,
args.time,
truncate=args.truncate,
eventdensity=0.2,
nooverlap=True,
)
else:
W_, Ph_, X, params = gen_synthetic_data(
args.patients,
args.medical_events,
args.time,
args.phenotypes,
args.temporal_window,
sliding_window=args.sliding_window,
noise=args.noise,
truncate=args.truncate,
)
if not os.path.exists(os.path.dirname(args.path)):
os.makedirs(os.path.dirname(args.path))
if args.test <= 0.0 or args.test > 1.0:
pickle.dump((W_, Ph_, X, params), open(args.path, "wb"))
else:
# Split the data into train/test dataset
X_train, X_test, W_train, W_test = train_test_split(
X, W_, train_size=1 - args.test, test_size=args.test
)
pickle.dump(
(Ph_, W_train, X_train, W_test, X_test, params), open(args.path, "wb")
)
This diff is collapsed.
......@@ -21,13 +21,13 @@ normalization='True'
phenotypesuccession=0.0
sparsity=0.25
batchsize=50
epochs=100
epochs=200
for N in 20
do
for T in 30
do
for R in 4 12 36
for R in 12 4 # 4 12 36
do
Rhidden=$R
for it in {1..10}
......@@ -35,18 +35,18 @@ do
dataset="$exp/data_${it}_sw.pickle"
python3 experiments_gen_data.py -k $K -n $N -t $T -r $Rhidden -tw $Twhidden -tr -sw -p $dataset
for model in SWoTTeD
for model in "fastswotted" # "swotted" "fastswotted"
do
for loss in 'Bernoulli' 'Poisson' 'Frobenius'
for loss in 'Bernoulli' #'Poisson' 'Frobenius'
do
for phenotypesuccession in 0.0 0.125 0.25 0.5 0.75 1
do
for sparsity in 0.0 0.125 0.25 0.5 0.75 1
do
for normalization in 'True' 'False'
for normalization in 'True' # 'True' 'False'
do
echo "running SWoTTeD..."
cmd="../competitors/run_swotted.py -it $it -l $loss -p $dataset -r $R -tw $Tw -b $batchsize -e $epochs -sp $sparsity -ps $phenotypesuccession"
cmd="../competitors/run_$model.py -it $it -l $loss -p $dataset -r $R -tw $Tw -b $batchsize -e $epochs -sp $sparsity -ps $phenotypesuccession"
if [ "$normalization" = "True" ]; then
cmd="${cmd} -nr"
fi
......
#!/bin/bash
exp="EXP_$(date +%F)_$(date +%s)"
mkdir -p "$exp/"
file="$exp/results.csv"
echo -e "it,R_hidden,Tw_hidden,K,N,T,R,Tw,loss,model,normalization,sparsity,pheno_nonsuccession,error_Ph,error_W_train,error_X_train,time,error_W_test,error_X_test" >> $file
# default synthetic dataset parameters
K=200
N=20
T=10
Rhidden=4
Twhidden=3 # 1 is mandatory for comparison with LogPar, CNTF and SWIFT in order to be able to compute the FIT
# default models' parameters
R=4
Tw=$Twhidden
loss='Bernoulli'
normalization='True'
phenotypesuccession=0.0
sparsity=0.25
batchsize=50
epochs=200
for N in 20
do
for T in 10
do
for R in 12 # 4 12 36
do
Rhidden=$R
for it in {1..10}
do
dataset="$exp/data_${it}_sw.pickle"
python3 experiments_gen_data.py -k $K -n $N -t $T -r $Rhidden -tw $Twhidden -tr -sw -ph -p $dataset
for model in "fastswotted" # "swotted" "fastswotted"
do
for loss in 'Bernoulli' #'Poisson' 'Frobenius'
do
for phenotypesuccession in 0.0 0.5 1 #0.0 0.125 0.25 0.5 0.75 1
do
for sparsity in 0.0 0.5 1 #0.0 0.125 0.25 0.5 0.75 1
do
for normalization in 'True' # 'True' 'False'
do
echo "running SWoTTeD..."
cmd="../competitors/run_$model.py -it $it -l $loss -p $dataset -r $R -tw $Tw -b $batchsize -e $epochs -sp $sparsity -ps $phenotypesuccession"
if [ "$normalization" = "True" ]; then
cmd="${cmd} -nr"
fi
res=$(python3 $cmd)
echo -e "$it,$Rhidden,$Twhidden,$K,$N,$T,$R,$Tw,$loss,$model,$normalization,$sparsity,$phenotypesuccession, $res" >> $file
done
done
done
done
done
done
done
done
done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment