modification for the experiment dedicated to non-succession regularisation

a72eb4cb · GUYET Thomas · 0ed82141 · a72eb4cb · a72eb4cb · a72eb4cb
Commit a72eb4cb authored 1 year ago by GUYET Thomas
--- a/experiments/experiments_gen_data.py
+++ b/experiments/experiments_gen_data.py
@@ -2,57 +2,121 @@ import os
 import argparse
 import pickle

-from gen_data import gen_synthetic_data
+from gen_data import gen_synthetic_data, gen_phenosuccession_synthetic_data
 from sklearn.model_selection import train_test_split


 def get_arguments():
-	parser = argparse.ArgumentParser()
-	parser.add_argument("-k", "--patients", type=int,
-			help="specify the number of patients (default 100)", default=100)
-	parser.add_argument("-n", "--medical_events", type=int,
-			help="specify the number of medical events (default 20)", default=20)
-	parser.add_argument("-t", "--time", type=int,
-			help="specify the length of the patients stay (default 8)", default=8)
-	parser.add_argument("-r", "--phenotypes", type=int,
-			help="specify the number of phenotypes (default 5)", default=5)
-	parser.add_argument("-tw", "--temporal_window", type=int,
-			help="specify the length of the temporal window (default 3)", default=3)
-	parser.add_argument("-sw", "--sliding_window", action="store_true", 
-			help="generating patient matrices with sliding windows (default T)", default=True)
-	parser.add_argument("-no","--noise", type=float, 
-			help="add noise (default False)", default=0.0)
-	parser.add_argument("-tr","--truncate", action="store_true",
-			help="truncate values greater than 1 (default True)", default=True)
-	parser.add_argument("-p","--path",
-			help="specify the path to store the generated data", default="./data.pickle")
-	parser.add_argument("-tt","--test", default=0.0, type=float,
-			help="if not nul, it generates a train/test dataset, the specified value (in) indicates the proportion for the test data")
-	return parser.parse_args()
-
-
-if __name__ == '__main__':
-	
-	args = get_arguments()
-	
-	W_, Ph_, X, params = gen_synthetic_data(
-		args.patients, 
-		args.medical_events, 
-		args.time, 
-		args.phenotypes, 
-		args.temporal_window, 
-		sliding_window=args.sliding_window, 
-		noise= args.noise, 
-		truncate=args.truncate)
-
-	if not os.path.exists( os.path.dirname(args.path) ):
-		os.makedirs(os.path.dirname(args.path))
-
-	if args.test<=0.0 or args.test>1.0:
-		pickle.dump((W_, Ph_, X, params), open(args.path, "wb" ))
-	else:
-		# Split the data into train/test dataset
-		X_train, X_test, W_train, W_test = train_test_split(X, W_,
-										train_size=1-args.test,
-										test_size=args.test)
-		pickle.dump((Ph_, W_train, X_train, W_test, X_test, params), open(args.path, "wb" ))
\ No newline at end of file
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-k",
+        "--patients",
+        type=int,
+        help="specify the number of patients (default 100)",
+        default=100,
+    )
+    parser.add_argument(
+        "-n",
+        "--medical_events",
+        type=int,
+        help="specify the number of medical events (default 20)",
+        default=20,
+    )
+    parser.add_argument(
+        "-t",
+        "--time",
+        type=int,
+        help="specify the length of the patients stay (default 8)",
+        default=8,
+    )
+    parser.add_argument(
+        "-r",
+        "--phenotypes",
+        type=int,
+        help="specify the number of phenotypes (default 5)",
+        default=5,
+    )
+    parser.add_argument(
+        "-tw",
+        "--temporal_window",
+        type=int,
+        help="specify the length of the temporal window (default 3)",
+        default=3,
+    )
+    parser.add_argument(
+        "-sw",
+        "--sliding_window",
+        action="store_true",
+        help="generating patient matrices with sliding windows (default T)",
+        default=True,
+    )
+    parser.add_argument(
+        "-no", "--noise", type=float, help="add noise (default False)", default=0.0
+    )
+    parser.add_argument(
+        "-tr",
+        "--truncate",
+        action="store_true",
+        help="truncate values greater than 1 (default True)",
+        default=True,
+    )
+    parser.add_argument(
+        "-p",
+        "--path",
+        help="specify the path to store the generated data",
+        default="./data.pickle",
+    )
+    parser.add_argument(
+        "-tt",
+        "--test",
+        default=0.0,
+        type=float,
+        help="if not nul, it generates a train/test dataset, the specified value (in) indicates the proportion for the test data",
+    )
+    parser.add_argument(
+        "-ph",
+        "--pheno",
+        action="store_true",
+        default=True,
+        help="use specific pheno-non-succession dataset (ignore many parameters)",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_arguments()
+
+    if args.pheno:
+        W_, Ph_, X, params = gen_phenosuccession_synthetic_data(
+            args.patients,
+            args.time,
+            truncate=args.truncate,
+            eventdensity=0.2,
+            nooverlap=True,
+        )
+
+    else:
+        W_, Ph_, X, params = gen_synthetic_data(
+            args.patients,
+            args.medical_events,
+            args.time,
+            args.phenotypes,
+            args.temporal_window,
+            sliding_window=args.sliding_window,
+            noise=args.noise,
+            truncate=args.truncate,
+        )
+
+    if not os.path.exists(os.path.dirname(args.path)):
+        os.makedirs(os.path.dirname(args.path))
+
+    if args.test <= 0.0 or args.test > 1.0:
+        pickle.dump((W_, Ph_, X, params), open(args.path, "wb"))
+    else:
+        # Split the data into train/test dataset
+        X_train, X_test, W_train, W_test = train_test_split(
+            X, W_, train_size=1 - args.test, test_size=args.test
+        )
+        pickle.dump(
+            (Ph_, W_train, X_train, W_test, X_test, params), open(args.path, "wb")
+        )
--- a/experiments/gen_data.py
+++ b/experiments/gen_data.py
--- a/experiments/job_swotted_params.sh
+++ b/experiments/job_swotted_params.sh
@@ -21,13 +21,13 @@ normalization='True'
 phenotypesuccession=0.0
 sparsity=0.25
 batchsize=50
-epochs=100
+epochs=200

 for N in 20
 do
 for T in 30
 do
-for R in 4 12 36
+for R in 12 4 # 4 12 36
 do
 Rhidden=$R
 for it in {1..10}
@@ -35,18 +35,18 @@ do
    dataset="$exp/data_${it}_sw.pickle"
    python3 experiments_gen_data.py -k $K -n $N -t $T -r $Rhidden -tw $Twhidden -tr -sw -p $dataset
    
-    for model in SWoTTeD
+    for model in "fastswotted" # "swotted" "fastswotted"
    do 
-    	for loss in 'Bernoulli' 'Poisson' 'Frobenius'
+    	for loss in 'Bernoulli' #'Poisson' 'Frobenius'
    	do
    	for phenotypesuccession in 0.0 0.125 0.25 0.5 0.75 1
    	do
    	for sparsity in 0.0 0.125 0.25 0.5 0.75 1
    	do
-    	for normalization in 'True' 'False'
+    	for normalization in 'True' # 'True' 'False'
    	do
            echo "running SWoTTeD..." 
-            cmd="../competitors/run_swotted.py -it $it -l $loss -p $dataset -r $R -tw $Tw -b $batchsize -e $epochs -sp $sparsity -ps $phenotypesuccession"
+            cmd="../competitors/run_$model.py -it $it -l $loss -p $dataset -r $R -tw $Tw -b $batchsize -e $epochs -sp $sparsity -ps $phenotypesuccession"
            if [ "$normalization" = "True" ]; then
                cmd="${cmd} -nr"
            fi

--- a/experiments/job_swotted_phenoparams.sh
+++ b/experiments/job_swotted_phenoparams.sh
+#!/bin/bash
+exp="EXP_$(date +%F)_$(date +%s)"
+
+mkdir -p "$exp/"
+file="$exp/results.csv"
+
+echo -e "it,R_hidden,Tw_hidden,K,N,T,R,Tw,loss,model,normalization,sparsity,pheno_nonsuccession,error_Ph,error_W_train,error_X_train,time,error_W_test,error_X_test" >> $file
+
+# default synthetic dataset parameters
+K=200
+N=20
+T=10
+Rhidden=4
+Twhidden=3 # 1 is mandatory for comparison with LogPar, CNTF and SWIFT in order to be able to compute the FIT
+
+# default models' parameters
+R=4
+Tw=$Twhidden
+loss='Bernoulli'
+normalization='True'
+phenotypesuccession=0.0
+sparsity=0.25
+batchsize=50
+epochs=200
+
+for N in 20
+do
+for T in 10
+do
+for R in 12 # 4 12 36
+do
+Rhidden=$R
+for it in {1..10}
+do  
+    dataset="$exp/data_${it}_sw.pickle"
+    python3 experiments_gen_data.py -k $K -n $N -t $T -r $Rhidden -tw $Twhidden -tr -sw -ph -p $dataset
+    
+    for model in "fastswotted" # "swotted" "fastswotted"
+    do 
+    	for loss in 'Bernoulli' #'Poisson' 'Frobenius'
+    	do
+    	for phenotypesuccession in 0.0 0.5 1 #0.0 0.125 0.25 0.5 0.75 1
+    	do
+    	for sparsity in 0.0 0.5 1 #0.0 0.125 0.25 0.5 0.75 1
+    	do
+    	for normalization in 'True' # 'True' 'False'
+    	do
+            echo "running SWoTTeD..." 
+            cmd="../competitors/run_$model.py -it $it -l $loss -p $dataset -r $R -tw $Tw -b $batchsize -e $epochs -sp $sparsity -ps $phenotypesuccession"
+            if [ "$normalization" = "True" ]; then
+                cmd="${cmd} -nr"
+            fi
+            res=$(python3 $cmd)
+            echo -e "$it,$Rhidden,$Twhidden,$K,$N,$T,$R,$Tw,$loss,$model,$normalization,$sparsity,$phenotypesuccession, $res" >> $file
+        done
+        done
+        done
+        done
+
+    done
+
+done
+done
+done
+done
+