Commit 1e1151e6 authored by MARIJON Pierre's avatar MARIJON Pierre

Add overhang maplen ratio with option om

parent 638c05d5
......@@ -117,14 +117,16 @@ class Parser:
self._add_segment(l.read_a, l.len_a)
self._add_segment(l.read_b, l.len_b)
self._add_link(l.read_a, l.read_b, l.strand, l.nb_base, l.nb_match)
self._add_link(l.read_a, l.read_b, l.strand, l.nb_base, l.nb_match,
overhang/maplen)
else:
# B overlap A
logging.debug("B overlap A")
self._add_segment(l.read_a, l.len_a)
self._add_segment(l.read_b, l.len_b)
self._add_link(l.read_b, l.read_a, l.strand, l.nb_base, l.nb_match)
self._add_link(l.read_b, l.read_a, l.strand, l.nb_base, l.nb_match,
overhang/maplen)
def parse_lines(self, lines):
for line in lines:
......@@ -137,7 +139,7 @@ class Parser:
def generate_gfa(self):
for seq, length in self.__segments.items():
if not self.__containment and seq in self.__containments:
if not self.containment and seq in self.__containments:
continue
yield "S\t{}\t*\tLN:i:{}".format(seq, length)
......@@ -156,6 +158,7 @@ class Parser:
data = self.__links.get_edge_data(node, child)
link.append(data["ov_len"] + "M")
link.append("NM:i:" + str(int(data["ov_len"]) - int(data["nb_match"])))
link.append("om:f:{:.2f}".format(data["overhang_len"]))
yield "\t".join(link)
for conted, list_conter in self.__containments.items():
......@@ -173,26 +176,30 @@ class Parser:
def _add_segment(self, name, length):
self.__segments[name] = length
def _add_link(self, name_a, name_b, strand, ov_len, nb_match):
def _add_link(self, name_a, name_b, strand, ov_len, nb_match, overhang_maplen):
if strand == "+":
if name_a + "_" in self.__links.nodes() or name_b + "_" in self.__links.nodes():
logging.debug("add", name_a+"_", name_b+"_")
self.__links.add_edge(name_a + "_", name_b + "_",
**{"ov_len": ov_len, "nb_match": nb_match})
**{"ov_len": ov_len, "nb_match": nb_match,
"overhang_len": overhang_maplen})
else:
logging.debug("add", name_a, name_b)
self.__links.add_edge(name_a, name_b,
**{"ov_len": ov_len, "nb_match": nb_match})
**{"ov_len": ov_len, "nb_match": nb_match,
"overhang_len": overhang_maplen})
else:
if name_a in self.__links.nodes() or name_b + "_" in self.__links.nodes():
logging.debug("add ", name_a, name_b + "_")
self.__links.add_edge(name_a, name_b + "_",
**{"ov_len": ov_len, "nb_match": nb_match})
**{"ov_len": ov_len, "nb_match": nb_match,
"overhang_len": overhang_maplen})
else:
logging.debug("add ", name_a + "_", name_b)
self.__links.add_edge(name_a + "_", name_b,
**{"ov_len": ov_len, "nb_match": nb_match})
**{"ov_len": ov_len, "nb_match": nb_match,
"overhang_len": overhang_maplen})
def _add_containment(self, container, strand_ner, contained, strand_ned, pos, length):
if contained not in self.__containments:
......@@ -235,7 +242,7 @@ def main(args):
rm_all_internal = arg["remove_all_internal"]
parser = Parser(not rm_all_contain, not rm_all_internal)
for line in arg["paf"]:
parser.parse_line(line)
......
......@@ -16,10 +16,10 @@ S 2 * LN:i:10000
S 5 * LN:i:12000
S 3 * LN:i:10000
S 4 * LN:i:10000
L 1 - 2 + 8001M NM:i:0
L 5 + 1 - 7999M NM:i:0
L 2 + 3 - 8000M NM:i:0
L 3 - 4 - 8000M NM:i:0
L 1 - 2 + 8001M NM:i:0 om:f:0.25
L 5 + 1 - 7999M NM:i:0 om:f:0.25
L 2 + 3 - 8000M NM:i:0 om:f:0.25
L 3 - 4 - 8000M NM:i:0 om:f:0.00
"""
p = paf2gfa.Parser()
......@@ -50,15 +50,15 @@ S 9 * LN:i:10000
S 7 * LN:i:10000
S 8 * LN:i:10000
S 6 * LN:i:10000
L 2 + 3 - 8000M NM:i:0
L 1 - 2 + 8001M NM:i:0
L 4 - 7 + 8000M NM:i:0
L 4 - 9 - 8000M NM:i:0
L 7 + 6 + 8000M NM:i:0
L 3 - 4 - 8000M NM:i:0
L 5 + 1 - 8000M NM:i:0
L 6 + 5 - 8000M NM:i:0
L 8 + 7 + 8000M NM:i:0
L 2 + 3 - 8000M NM:i:0 om:f:0.25
L 1 - 2 + 8001M NM:i:0 om:f:0.25
L 4 - 7 + 8000M NM:i:0 om:f:0.00
L 4 - 9 - 8000M NM:i:0 om:f:0.00
L 7 + 6 + 8000M NM:i:0 om:f:0.00
L 3 - 4 - 8000M NM:i:0 om:f:0.00
L 5 + 1 - 8000M NM:i:0 om:f:0.25
L 6 + 5 - 8000M NM:i:0 om:f:0.25
L 8 + 7 + 8000M NM:i:0 om:f:0.00
"""
p = paf2gfa.Parser()
......
......@@ -11,7 +11,7 @@ def test_minimap_3contain():
S 63107 * LN:i:23535
S 59727 * LN:i:27255
S 59847 * LN:i:18269
L 59727 - 63107 + 12461M NM:i:11163
L 59727 - 63107 + 12461M NM:i:11163 om:f:0.90
C 63107 + 56001 + 5870 3275M
C 63107 + 59847 - 4047 18235M
"""
......
......@@ -7,7 +7,7 @@ import paf2gfa
# ------------->
def test_A_3_same():
line = "1\t1000\t20\t1000\t+\t2\t1000\t0\t980\t30\t980\t255"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t+\t2\t+\t980M\tNM:i:950\n"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t+\t2\t+\t980M\tNM:i:950\tom:f:0.00\n"
p = paf2gfa.Parser()
p.parse_line(line)
......@@ -19,7 +19,7 @@ def test_A_3_same():
# <-------------
def test_A_3_diff():
line = "1\t1000\t10\t1000\t-\t2\t1000\t10\t1000\t30\t980\t255"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t-\t1\t+\t980M\tNM:i:950\n"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t-\t1\t+\t980M\tNM:i:950\tom:f:0.01\n"
p = paf2gfa.Parser()
p.parse_line(line)
......@@ -31,7 +31,7 @@ def test_A_3_diff():
# ---------------->
def test_A_5_same():
line = "1\t1000\t0\t980\t+\t2\t1000\t20\t1000\t30\t980\t255"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t+\t1\t+\t980M\tNM:i:950\n"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t+\t1\t+\t980M\tNM:i:950\tom:f:0.00\n"
p = paf2gfa.Parser()
p.parse_line(line)
......@@ -43,7 +43,7 @@ def test_A_5_same():
# <----------------
def test_A_5_diff():
line = "1\t1000\t0\t980\t-\t2\t1000\t0\t980\t30\t960\t255"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t-\t1\t+\t960M\tNM:i:930\n"
resu = "S\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t-\t1\t+\t960M\tNM:i:930\tom:f:0.02\n"
p = paf2gfa.Parser()
p.parse_lines(line.split("\n"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment