Commit a025de42 authored by François Gindraud's avatar François Gindraud

Fix run_ori.py

parent 5d061818
......@@ -79,17 +79,25 @@ def get_roc(y_true, y_pred, cutoff):
def compute_metrics(y_true, y_pred):
y_true, y_pred = np.asarray(y_true), np.asarray(y_pred)
class TryOrNan:
def __init__(self, f):
self.f = f
def __call__(self, *args):
try:
return self.f(*args)
except:
return float('nan')
metric = {}
metric['log.loss'] = log_loss(y_true, y_pred)
metric['mse'] = mean_squared_error(y_true, y_pred)
metric['accuracy'] = accuracy_score(y_true, y_pred > 0.5)
metric['auROC'] = roc_auc_score(y_true, y_pred)
metric['auROC50'] = get_roc(y_true, y_pred, 50)
metric['auPRC'] = average_precision_score(y_true, y_pred)
metric['recall_at_10_fdr'] = recall_at_fdr(y_true, y_pred, 0.10)
metric['recall_at_5_fdr'] = recall_at_fdr(y_true, y_pred, 0.05)
metric["pearson.r"], metric["pearson.p"] = stats.pearsonr(y_true, y_pred)
metric["spearman.r"], metric["spearman.p"] = stats.spearmanr(y_true, y_pred)
metric['log.loss'] = TryOrNan(log_loss)(y_true, y_pred)
metric['mse'] = TryOrNan(mean_squared_error)(y_true, y_pred)
metric['accuracy'] = TryOrNan(accuracy_score)(y_true, y_pred > 0.5)
metric['auROC'] = TryOrNan(roc_auc_score)(y_true, y_pred)
metric['auROC50'] = TryOrNan(get_roc)(y_true, y_pred, 50)
metric['auPRC'] = TryOrNan(average_precision_score)(y_true, y_pred)
metric['recall_at_10_fdr'] = TryOrNan(recall_at_fdr)(y_true, y_pred, 0.10)
metric['recall_at_5_fdr'] = TryOrNan(recall_at_fdr)(y_true, y_pred, 0.05)
metric["pearson.r"], metric["pearson.p"] = TryOrNan(stats.pearsonr)(y_true, y_pred)
metric["spearman.r"], metric["spearman.p"] = TryOrNan(stats.spearmanr)(y_true, y_pred)
df = pd.DataFrame.from_dict(metric, orient='index')
df.columns = ['value']
df.sort_index(inplace=True)
......
......@@ -4,9 +4,9 @@ import itertools
import ckn.main
import torch.utils.data
import sklearn.model_selection
import sklearn.metrics
from ckn.models import unsupCKN, supCKN
from ckn.scores import compute_metrics
from ckn.models import supCKN
import torch
from torch import nn
from torch.optim.lr_scheduler import StepLR, MultiStepLR, ReduceLROnPlateau
......@@ -50,22 +50,10 @@ def load_args():
"--penalty", metavar="penal", dest="penalty", default='l2',
type=str, choices=['l2', 'l1'],
help="regularization used in the last layer (default: l2)")
parser.add_argument(
"--outdir", metavar="outdir", dest="outdir",
default='', type=str, help="output path(default: '')")
parser.add_argument(
"--preprocessor", type=str, default='standard_row',
choices=['standard_row', 'standard_col'],
help="preprocessor for unsup CKN (default: standard_row)")
parser.add_argument(
"--use-cuda", action='store_true', default=False,
help="use gpu (default: False)")
parser.add_argument(
"--pooling", default='mean', choices=['mean', 'max'], type=str,
help='mean or max global pooling (default: mean)')
parser.add_argument(
"--logo", action='store_true',
help="generate logo, only useful after setting outdir (default: False)")
parser.add_argument( "--preprocessor", type=str, default='standard_row', choices=['standard_row', 'standard_col'], help="preprocessor for unsup CKN (default: standard_row)")
parser.add_argument( "--use-cuda", action='store_true', default=False, help="use gpu (default: False)")
parser.add_argument( "--pooling", default='mean', choices=['mean', 'max'], type=str, help='mean or max global pooling (default: mean)')
parser.add_argument( "--logo", type=str, dest="outdir", default=None, help="generate logos in dir")
args = parser.parse_args()
args.use_cuda = args.use_cuda and torch.cuda.is_available()
# check shape
......@@ -74,34 +62,6 @@ def load_args():
if args.use_cuda:
torch.cuda.manual_seed(args.seed)
np.random.seed(args.seed)
args.save_logs = False
if args.outdir != "":
args.save_logs = True
outdir = args.outdir
if not os.path.exists(outdir):
try:
os.makedirs(outdir)
except:
pass
outdir = outdir + "/{}".format(name)
if not os.path.exists(outdir):
try:
os.makedirs(outdir)
except:
pass
outdir = outdir + "/{}".format(args.method)
if not os.path.exists(outdir):
try:
os.makedirs(outdir)
except:
pass
outdir = outdir + "/{}".format(args.pooling)
if not os.path.exists(outdir):
try:
os.makedirs(outdir)
except:
pass
return args
......@@ -113,9 +73,6 @@ def main():
if args.use_cuda:
torch.cuda.manual_seed(args.seed)
np.random.seed(args.seed)
if args.save_logs:
if not os.path.exists(args.outdir):
os.makedirs(args.outdir)
loader_args = {}
if args.use_cuda:
loader_args = {'num_workers': 1, 'pin_memory': True}
......@@ -163,61 +120,53 @@ def main():
print("Testing...")
y_pred, y_true = model.predict(test_loader, proba=True, use_cuda=args.use_cuda)
scores = compute_metrics(y_true, y_pred)
scores.loc['training_time'] = training_time
print(scores)
score = sklearn.metrics.mean_squared_error(y_true, y_pred)
print(score)
print("##### Trained for ({}, {}, {})".format(regularization, pattern_size, pattern_number))
return (model, scores)
return (model, score, y_pred)
# Find best config
regularization_values = [1e-3, 1e-4, 1e-5, 1e-6]
pattern_size_values = [4, 6, 8]
regularization_values = [1e-3, 1e-4, 1e-5, 1e-6][:1]
pattern_size_values = [4, 6, 8][:1]
pattern_number_values = [50, 100]
best = None
for config in itertools.product(regularization_values, pattern_size_values, pattern_number_values):
model, scores = do_training(*config)
score = scores['mse']
model, score, y_pred = do_training(*config)
if best is None or best["score"] < score:
best = {
"model": model,
"score": score,
"y_pred": y_pred.numpy(),
"config": config,
}
print("##### Best config: {}".format(best["config"]))
model = best["model"]
if args.save_logs:
scores.to_csv(tfid_outdir + "/metric.csv",
header=['value'], index_label='metric')
np.save(tfid_outdir + "/predict", y_pred.numpy())
torch.save(
{'args': args,
'state_dict': model.state_dict()},
tfid_outdir + '/model.pkl')
if args.logo:
import matplotlib
matplotlib.use('Agg')
from ckn.data.pltlogo import draw_logo, pwm_to_bits
import matplotlib.pyplot as plt
model.cpu()
print("Generating logo...")
logo_outdir = tfid_outdir + "/logo"
if not os.path.exists(logo_outdir):
os.makedirs(logo_outdir)
pwm_all = model.compute_motif()
np.savez_compressed(logo_outdir+'/pwm', pwm=pwm_all)
for i in range(pwm_all.shape[0]):
pwm = pwm_all[i]
bits = pwm_to_bits(pwm, trim=True)
draw_logo(bits, width_scale=0.8, palette='deepbind')
plt.savefig(logo_outdir + "/logo_{}.png".format(i))
bits_rc = np.flip(np.flip(bits, axis=0), axis=1)
draw_logo(bits_rc, width_scale=0.8, palette='deepbind')
plt.savefig(logo_outdir + "/logo_{}_rc.png".format(i))
plt.close("all")
y_pred = best["y_pred"]
if args.outdir is not None:
if not os.path.exists(args.outdir):
os.makedirs(args.outdir)
np.save(args.outdir + "/predict", y_pred)
import matplotlib
matplotlib.use('Agg')
from ckn.data.pltlogo import draw_logo, pwm_to_bits
import matplotlib.pyplot as plt
model.cpu()
print("Generating logo...")
pwm_all = model.compute_motif()
np.savez_compressed(args.outdir+'/pwm', pwm=pwm_all)
for i in range(pwm_all.shape[0]):
pwm = pwm_all[i]
bits = pwm_to_bits(pwm, trim=True)
draw_logo(bits, width_scale=0.8, palette='deepbind')
plt.savefig(args.outdir + "/logo_{}.png".format(i))
bits_rc = np.flip(np.flip(bits, axis=0), axis=1)
draw_logo(bits_rc, width_scale=0.8, palette='deepbind')
plt.savefig(args.outdir + "/logo_{}_rc.png".format(i))
plt.close("all")
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment