Commit 25967ec9 authored by MARIJON Pierre's avatar MARIJON Pierre

download in paralelle

parent 4eeb67a9
......@@ -7,6 +7,7 @@ import requests
import itertools
import subprocess
import statistics
import multiprocessing
from pathlib import Path
from bs4 import BeautifulSoup
......@@ -100,7 +101,7 @@ def finish(args):
assembly_path.mkdir(parents=True, exist_ok=True)
logging.info("begin finish canu assembly")
canu_call = canu_call_template.format(" ".join(fastq_files),
canu_call = canu_call_template.format(fastq_files,
os.path.abspath(assembly_path),
assembly_len, *canu_parameters)
logging.info(canu_call)
......@@ -171,7 +172,7 @@ def select(args):
assembly_path = args["assembly"] / accession / "canu"
assembly_path.mkdir(parents=True, exist_ok=True)
canu_call = canu_call_template_unitig.format(" ".join(fastq_files),
canu_call = canu_call_template_unitig.format(fastq_files,
os.path.abspath(assembly_path),
assembly_len, *canu_parameters)
logging.info(canu_call)
......@@ -243,31 +244,29 @@ def download_extract_merge(data_path, NCTC_id):
logname = os.path.abspath(str(data_path / "wget_log"))
bax_files = list()
job_list = list()
with open(logname+".out", "w") as outfile:
for file_path in itertools.chain.from_iterable(bact_dict[NCTC_id]["file_paths"].values()):
if file_path.endswith(".bax.h5"):
bax_files.append(str(data_path) + file_path)
bax_files.append(str(data_path / os.path.basename(file_path)))
logging.info("download of file : "file_path.split("/")[-1])
cmd = ["wget", ftp_url + file_path, "-P", str(data_path)]
subprocess.call(cmd, stdout=outfile, stderr=subprocess.STDOUT,
universal_newlines=True)
cmd = ["wget", "-N", ftp_url + file_path, "-P", str(data_path)]
job_list.append(cmd)
dextract_cmd = ["dextract", "-o", data_path] + [f for f in bax_files]
p = multiprocessing.Pool()
p.map(run_paralelle, job_list)
dextract_cmd = ["dextract", "-o"] + bax_files
logname = os.path.abspath(str(data_path / "dextract_log"))
with open(logname+".out", "w") as outfile:
subprocess.call(dextract_cmd, stdout=outfile,
stderr=subprocess.STDOUT,
with open(logname+".err", "w") as errfile, open(str(data_path / "merged.fasta"), "w") as outfile:
subprocess.call(dextract_cmd, stdout=outfile, stderr=errfile,
universal_newlines=True)
cat_call = ["cat"] + list(fastq_file(data_path))
merged_fastq = str(data_path / "merged.fasta")
merged_fastq_log = merged_fastq + ".log"
with open(merged_fastq_log, "w") as errfile, open(merged_fastq, "w") as outfile:
out = subprocess.call(cat_call,
stdout=outfile, stderr=errfile,
universal_newlines=True)
def run_paralelle(cmd):
logging.info("run " + " ".join(cmd))
subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
universal_newlines=True)
logging.info("end " + " ".join(cmd))
def fastq_file(data_path):
for child in data_path.iterdir():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment