Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 23c77a5b authored by jeanrjc's avatar jeanrjc
Browse files

Fix Error in SFS

and change how to write and  load sumstats  files
parent ca870029
No related branches found
No related tags found
No related merge requests found
......@@ -369,22 +369,31 @@ def do_sum_stats(scenario_dir, name_id, size_chr=2e6, circular=True, label="", n
except Exception as e:
logging.error("While computing LD for {}\n>>> Error: {}".format(sim_id, e))
all_sfs2 = all_sfs.groupby("N_indiv").sum() / n_indiv
all_sfs2["i_xi_norm"] = all_sfs2.i_xi / all_sfs2.i_xi.sum()
all_sfs2 = all_sfs.groupby("N_indiv").mean()
all_sfs2["i_xi_norm"] = all_sfs2.i_xi / all_sfs2.i_xi.mean()
all_sfs2["freq_indiv"] = all_sfs2.index / n_indiv
all_sfs2["i_xi_sem_norm"] = all_sfs.groupby("N_indiv").count_SNP.sem() / all_sfs2.i_xi.sum()
all_sfs2["i_xi_sem_norm"] = all_sfs.groupby("N_indiv").i_xi.sem() / all_sfs2.i_xi.mean()
all_sfs2["sim_id"] = sim_id
all_sfs2["scenario"] = scenario
all_sfs2["label"] = label
all_sfs2.reset_index(inplace=True)
all_sfs2.to_csv(os.path.join(outdir, name_id + ".sfs"), sep="\t", index=False, mode="a", header=False)
writing_mode = "w" if overwrite else "a"
with open(os.path.join(outdir, name_id + ".sfs"), writing_mode) as sfsfile:
all_sfs2.to_csv(sfsfile,
sep="\t",
index=False,
header=False if (sfsfile.tell()==0 and not overwrite) else True)
all_ld2 = all_ld.groupby("dist_group").mean()
all_ld2["sim_id"] = sim_id
all_ld2["scenario"] = scenario
all_ld2["label"] = label
all_ld2.reset_index(inplace=True)
all_ld2.to_csv(os.path.join(outdir, name_id + ".ld"), sep="\t", index=False, mode="a", header=False)
with open(os.path.join(outdir, name_id + ".ld"), writing_mode) as ldfile:
all_ld2.to_csv(ldfile,
sep="\t",
index=False,
header=False if (ldfile.tell()==0 and not overwrite) else True)
#
......@@ -417,12 +426,9 @@ def do_sum_stats(scenario_dir, name_id, size_chr=2e6, circular=True, label="", n
def load_sum_stats(name_id, label=""):
"""Load data from name_id/name_id.{sfs|ld|sel} and return the 3 df"""
df_sfs = pd.read_table(os.path.join(name_id+ ".sfs"),
header=None,
names=["N_indiv", "count_SNP", "i_xi", "freq_indiv", "i_xi_norm", "i_xi_sem_norm", "sim_id", "scenario", "label"])
df_ld = pd.read_table(os.path.join(name_id+ ".ld"),
header=None,
names=["dist_group", "mean_dist", "mean_r2", "Count", "sem_r2", "sim_id", "scenario", "run_id", "label"])
df_sfs = pd.read_table(os.path.join(name_id+ ".sfs"))
df_ld = pd.read_table(os.path.join(name_id+ ".ld"))
# df_sel = pd.read_table(os.path.join(name_id+ ".sel"),
# header=None,
# names=["position_percent", "TajD", "IHS", "NSL", "sim_id", "scenario", "run_id", "label"])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment