Commit f1f14f4b authored by MARIJON Pierre's avatar MARIJON Pierre

Output contain all pair of read even path isn't found, good choose of read orientation

parent 23c3b22f
...@@ -11,24 +11,20 @@ import subprocess ...@@ -11,24 +11,20 @@ import subprocess
# project import # project import
# Class to call help and snakemake help # print snakemake help
class MyArgumentParser(argparse.ArgumentParser): def snakemake_help(p):
def print_help(self): subprocess.run(["snakemake", "--help"])
super().print_help()
print()
subprocess.run(["snakemake", "--help"])
def main(args = None): def main(args = None):
if args is None: if args is None:
args = sys.argv[1:] args = sys.argv[1:]
parser = MyArgumentParser(prog="KNOT") parser = argparse.ArgumentParser(prog="KNOT")
# avaible # # avaible
# b e g i m o q u w x y z # # b e g i m o q u w x y z
# A B C E G H I J K L M N Q V W X Y Z # # A B C E G H I J K L M N Q V W X Y Z
parser.add_argument("-C", "--contigs", required=True, parser.add_argument("-C", "--contigs", required=True,
help="fasta file than contains contigs") help="fasta file than contains contigs")
parser.add_argument("-g", "--contigs_graph", parser.add_argument("-g", "--contigs_graph",
...@@ -41,24 +37,30 @@ def main(args = None): ...@@ -41,24 +37,30 @@ def main(args = None):
help="output prefix") help="output prefix")
parser.add_argument("--read-type", choices=["pb", "ont"], default="pb", parser.add_argument("--read-type", choices=["pb", "ont"], default="pb",
help="type of input read, default pb") help="type of input read, default pb")
parser.add_argument("--help-all", action='store_true',
help="Show knot help and snakemake help")
args, unknow_arg = parser.parse_known_args(args) args, unknow_arg = parser.parse_known_args(args)
args = vars(args) args = vars(args)
# Check parameter # Check parameter
## raw_reads or correct ## raw_reads or correct
if args["help_all"]:
parser.print_help()
snakemake_help()
return 1
go_out = False go_out = False
if args["raw_reads"] is None and args["correct_reads"] is None: if args["raw_reads"] is None and args["correct_reads"] is None:
print("You need set --raw-reads or --correct-reads\n", file=sys.stderr) print("You need set --raw-reads or --correct-reads\n", file=sys.stderr)
go_out = True go_out = True
if args["raw_reads"] is not None and args["correct_reads"] is not None: if args["raw_reads"] is not None and args["correct_reads"] is not None:
print("You can't set --raw-reads and --correct-reads at same time\n", file=sys.stderr) print("You can't set --raw-reads and --correct-reads at same time\n", file=sys.stderr)
go_out = True go_out = True
if go_out: if go_out:
parser.print_help() parser.print_help()
sys.exit(1) return 1
## if contig graph isn't set generate empty file ## if contig graph isn't set generate empty file
if args["contigs_graph"] is None: if args["contigs_graph"] is None:
...@@ -69,11 +71,11 @@ def main(args = None): ...@@ -69,11 +71,11 @@ def main(args = None):
snakemake_config_path = os.path.join(package_path, "config.yaml") snakemake_config_path = os.path.join(package_path, "config.yaml")
config = [ config = [
"contigs="+args["contigs"], "contigs="+args["contigs"],
"out_prefix="+args["output"], "out_prefix="+args["output"],
"contigs_graph="+args["contigs_graph"], "contigs_graph="+args["contigs_graph"],
"read_type="+args["read_type"], "read_type="+args["read_type"],
"package_path="+package_path, "package_path="+package_path,
] ]
if args["raw_reads"] is not None: if args["raw_reads"] is not None:
...@@ -86,13 +88,15 @@ def main(args = None): ...@@ -86,13 +88,15 @@ def main(args = None):
"--config", "--config",
*config, *config,
"--snakefile", snakemake_rule "--snakefile", snakemake_rule
] ]
call += unknow_arg call += unknow_arg
print(" ".join(call)) print(" ".join(call))
out = subprocess.call(call) out = subprocess.call(call)
return 0
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
main(sys.argv[1:]) sys.exit(main(sys.argv[1:]))
...@@ -32,12 +32,12 @@ def main(args=None): ...@@ -32,12 +32,12 @@ def main(args=None):
for tig in tig2posread.keys(): for tig in tig2posread.keys():
ext = tig[0]+"_begin" ext = tig[0]+"_begin"
print(ext, tig2posread[tig][0][2], tig2posread[tig][0][3], print(ext, tig2posread[tig][1][2], tig2posread[tig][1][3],
sep=",", file=args["output"]) sep=",", file=args["output"])
ext = tig[0]+"_end" ext = tig[0]+"_end"
tig2posread[tig].sort(key=lambda x: x[1]) tig2posread[tig].sort(key=lambda x: x[1])
print(ext, tig2posread[tig][-1][2], tig2posread[tig][-1][3], print(ext, tig2posread[tig][-2][2], tig2posread[tig][-2][3],
sep=",", file=args["output"]) sep=",", file=args["output"])
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -26,3 +26,34 @@ def __gfa_line_parse(row): ...@@ -26,3 +26,34 @@ def __gfa_line_parse(row):
second_suffix = "_end" second_suffix = "_end"
return (row[1]+first_suffix, row[3]+second_suffix) return (row[1]+first_suffix, row[3]+second_suffix)
def choose_read_ori(read, ori, pos, source):
"""
To much magique her:
read: read id
ori: orientation of read on contig
pos: at begin or end of contig
source: this read are source of search or not
"""
if source:
if pos == "begin":
if ori == "+":
return read + "-"
else:
return read + "+"
else:
if ori == "+":
return read + "+"
else:
return read + "-"
else:
if pos == "begin":
if ori == "+":
return read + "+"
else:
return read + "-"
else:
if ori == "+":
return read + "-"
else:
return read + "+"
...@@ -8,7 +8,7 @@ import argparse ...@@ -8,7 +8,7 @@ import argparse
import itertools import itertools
# project import # project import
from knot import path_search from knot import path_search
from knot import extremity_search from knot import extremity_search
from knot.path_search import paths from knot.path_search import paths
from knot.path_search import gfa2sg from knot.path_search import gfa2sg
...@@ -55,30 +55,32 @@ def main(args=None): ...@@ -55,30 +55,32 @@ def main(args=None):
for row in reader: for row in reader:
tig_ext.append(tuple(row.values())) tig_ext.append(tuple(row.values()))
print("tig1", "read1", "tig2", "read2", "path_len", "nbread_contig", print("tig1", "read1", "tig2", "read2", "nb_read", "nb_base", "paths", "nbread_contig",
sep=",", file=args["result"]) sep=",", file=args["result"])
no_search = path_search.get_ext_ovl(args["asm_graph"], args["tig2tig"]) no_search = path_search.get_ext_ovl(args["asm_graph"], args["tig2tig"])
for ext1, ext2 in itertools.permutations(tig_ext, 2): for ext1, ext2 in itertools.permutations(tig_ext, 2):
if ext1[0][:-6] == ext2[0][:-4] or ext1[0][:-4] == ext2[0][:-6]: if ext1[0][:-6] == ext2[0][:-4] or ext1[0][:-4] == ext2[0][:-6]:
continue # don't search path between same contig ext continue # don't search path between same contig ext
if(ext1, ext2) in no_search:
print(ext1,"no",ext2,"no",0,"", sep=",", file=args["result"]) node1 = path_search.choose_read_ori(ext1[1], ext1[2], ext1[0].split("_")[-1], True)
node2 = path_search.choose_read_ori(ext2[1], ext2[2], ext2[0].split("_")[-1], False)
node1 = ext1[1] + "-" if ext1[2] == "+" else ext1[1] + "+" if(ext1[0], ext2[0]) in no_search:
node2 = ext2[1] + "-" if ext2[2] == "+" else ext2[1] + "+" print(ext1[0], node1, ext2[0], node2, 0, 0, "not_search", "not_search", sep=",", file=args["result"])
continue
path = paths.get_path(sg, node1, node2) (path, weight) = paths.get_path(sg, node1, node2)
if path: if path:
nbread_contig = paths.format_node_contig_counter( nbread_contig = paths.format_node_contig_counter(
paths.path_through_contig(tig2reads, path), paths.path_through_contig(tig2reads, path),
tig2reads) tig2reads)
print(ext1[0], node1, ext2[0], node2, len(path), nbread_contig, print(ext1[0], node1, ext2[0], node2, len(path), weight, ";".join(path), nbread_contig, sep=",", file=args["result"])
sep=",", file=args["result"]) else:
print(ext1[0], node1, ext2[0], node2, 0, 0, "not_found", "not_found", sep=",", file=args["result"])
if __name__ == "__main__": if __name__ == "__main__":
main() main()
...@@ -62,8 +62,7 @@ def __add_dovetails(d, G, is_contain): ...@@ -62,8 +62,7 @@ def __add_dovetails(d, G, is_contain):
qual=qual_val, qual=qual_val,
len=d.alignment.length_on_query(), len=d.alignment.length_on_query(),
overhang_len=d.to_segment.length - d.alignment.length_on_query(), overhang_len=d.to_segment.length - d.alignment.length_on_query(),
overhang_maplen=d.get("om"), weight=int(d.to.LN-d.alignment.length_on_query()))
weight=1)
class isContain: class isContain:
......
...@@ -8,7 +8,7 @@ import networkx as nx ...@@ -8,7 +8,7 @@ import networkx as nx
def get_path(graph, n1, n2): def get_path(graph, n1, n2):
try: try:
path = nx.shortest_path(graph, n1, n2) path = nx.shortest_path(graph, n1, n2, weight="weight")
except nx.exception.NetworkXError as e: except nx.exception.NetworkXError as e:
logging.debug("Networkx exception"+str(e)) logging.debug("Networkx exception"+str(e))
path = [] path = []
...@@ -19,7 +19,7 @@ def get_path(graph, n1, n2): ...@@ -19,7 +19,7 @@ def get_path(graph, n1, n2):
logging.debug("Node not found exception "+str(e)) logging.debug("Node not found exception "+str(e))
path = [] path = []
return path return path, sum([graph.edges[x, y]["weight"] for x, y in zip(path, path[1:])])
def path_through_contig(tig2reads, path): def path_through_contig(tig2reads, path):
tig2nb_read = Counter() tig2nb_read = Counter()
......
...@@ -27,7 +27,7 @@ def main(args=None): ...@@ -27,7 +27,7 @@ def main(args=None):
skip_read = set() skip_read = set()
for tig, val in tig2readspos.items(): for tig, val in tig2readspos.items():
t = max(tig[1]*0.2, 100000) t = max(tig[1]*0.2, 10000)
for v in val: for v in val:
if v[0] > t and v[1] < tig[1] - t: if v[0] > t and v[1] < tig[1] - t:
skip_read.add(v[2]) skip_read.add(v[2])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment