Commit 94dbe403 authored by MARIJON Pierre's avatar MARIJON Pierre

Replace gfapy by networkx DiGraph for reduce runtime

parent 6b486e49
This diff is collapsed.
......@@ -17,10 +17,10 @@ S 2 * LN:i:10000
S 5 * LN:i:10000
S 3 * LN:i:10000
S 4 * LN:i:10000
L 1 + 2 - 8001M NM:i:0 om:f:0.0
L 1 - 5 + 7999M NM:i:0 om:f:0.0
L 2 - 3 + 8000M NM:i:0 om:f:0.0
L 3 + 4 + 8000M NM:i:0 om:f:0.0
L 1 + 2 - 8001M NM:i:0 om:f:0.00
L 1 - 5 + 7999M NM:i:0 om:f:0.00
L 2 - 3 + 8000M NM:i:0 om:f:0.00
L 3 + 4 + 8000M NM:i:0 om:f:0.00
"""
p = paf2gfa.Parser()
......@@ -52,17 +52,21 @@ S 9 * LN:i:10000
S 7 * LN:i:10000
S 8 * LN:i:10000
S 6 * LN:i:10000
L 1 + 2 - 8001M NM:i:0 om:f:0.0
L 5 - 1 + 8000M NM:i:0 om:f:0.0
L 2 - 3 + 8000M NM:i:0 om:f:0.0
L 3 + 4 + 8000M NM:i:0 om:f:0.0
L 4 + 9 + 8000M NM:i:0 om:f:0.0
L 4 + 7 - 8000M NM:i:0 om:f:0.0
L 8 + 7 + 8000M NM:i:0 om:f:0.0
L 7 + 6 + 8000M NM:i:0 om:f:0.0
L 6 + 5 - 8000M NM:i:0 om:f:0.0
L 1 + 2 - 8001M NM:i:0 om:f:0.00
L 5 - 1 + 8000M NM:i:0 om:f:0.00
L 2 - 3 + 8000M NM:i:0 om:f:0.00
L 3 + 4 + 8000M NM:i:0 om:f:0.00
L 4 + 9 + 8000M NM:i:0 om:f:0.00
L 4 + 7 - 8000M NM:i:0 om:f:0.00
L 8 + 7 + 8000M NM:i:0 om:f:0.00
L 7 + 6 + 8000M NM:i:0 om:f:0.00
L 6 + 5 - 8000M NM:i:0 om:f:0.00
"""
p = paf2gfa.Parser()
p.parse_lines(line.split("\n"))
print(resu)
print()
print(p.get_gfa())
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
......@@ -17,7 +17,7 @@ C 1 + 2 + 500 1000M
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# B is contain in A
# --------------------------->
......@@ -29,7 +29,7 @@ def test_A_contain_B_keep_contain_diff():
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# A is contain in B
# --------->
......@@ -41,7 +41,7 @@ def test_A_contained_B_keep_contain_same():
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# A is contain in B
# --------->
......@@ -53,7 +53,7 @@ def test_A_contained_B_keep_contain_diff():
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# B is contain in A
# --------------------------->
......@@ -65,7 +65,7 @@ def test_A_contain_B_leave_contain_same():
p = paf2gfa.Parser(False)
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# B is contain in A
# --------------------------->
......@@ -77,7 +77,7 @@ def test_A_contain_B_leave_contain_diff():
p = paf2gfa.Parser(False)
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# A is contain in B
# --------->
......@@ -89,7 +89,7 @@ def test_A_contained_B_leave_contain_same():
p = paf2gfa.Parser(False)
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# A is contain in B
# --------->
......@@ -101,7 +101,7 @@ def test_A_contained_B_leave_contain_diff():
p = paf2gfa.Parser(False)
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
# A contain B
# B contain A
......@@ -123,5 +123,5 @@ C\tA\t+\tB\t+\t1\t1000M
p = paf2gfa.Parser()
p.parse_lines(line.split("\n"))
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
......@@ -15,7 +15,7 @@ B\t1000\t0\t800\t+\tA\t1000\t200\t1000\t800\t800\t255
H\tVN:Z:1.0
S\tA\t*\tLN:i:1000
S\tB\t*\tLN:i:1000
L\tA\t+\tB\t+\t800M\tNM:i:0\tom:f:0.0
L\tA\t+\tB\t+\t800M\tNM:i:0\tom:f:0.00
"""
p = paf2gfa.Parser()
......
......@@ -20,5 +20,5 @@ C 63107 + 59847 - 4047 18235M
p = paf2gfa.Parser()
p.parse_lines(line.split("\n"))
assert resu.strip() == p.get_gfa().strip()
assert set(resu.strip().split("\n")) == set(p.get_gfa().strip().split("\n"))
......@@ -7,43 +7,43 @@ import paf2gfa
# ------------->
def test_A_3_same():
line = "1\t1000\t20\t1000\t+\t2\t1000\t0\t980\t30\t980\t255"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t+\t2\t+\t980M\tNM:i:950\tom:f:0.0\n"
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
# overlap at A 3' orientation is different
# ---------------->
# <-------------
def test_A_3_diff():
line = "1\t1000\t10\t1000\t-\t2\t1000\t10\t1000\t30\t980\t255"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t+\t2\t-\t980M\tNM:i:950\tom:f:0.0\n"
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
# overlap at A 5' orientation is different
# ------------->
# ---------------->
def test_A_5_same():
line = "1\t1000\t0\t980\t+\t2\t1000\t20\t1000\t30\t980\t255"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t+\t1\t+\t980M\tNM:i:950\tom:f:0.0\n"
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
# overlap at A 5' orientation is different
# ------------->
# <----------------
def test_A_5_diff():
line = "1\t1000\t0\t980\t-\t2\t1000\t0\t980\t30\t960\t255"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t-\t2\t+\t960M\tNM:i:930\tom:f:0.0\n"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t+\t2\t+\t980M\tNM:i:950\tom:f:0.00\n"
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
# overlap at A 3' orientation is different
# ---------------->
# <-------------
def test_A_3_diff():
line = "1\t1000\t10\t1000\t-\t2\t1000\t10\t1000\t30\t980\t255"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t+\t2\t-\t980M\tNM:i:950\tom:f:0.00\n"
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
# overlap at A 5' orientation is different
# ------------->
# ---------------->
def test_A_5_same():
line = "1\t1000\t0\t980\t+\t2\t1000\t20\t1000\t30\t980\t255"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t2\t+\t1\t+\t980M\tNM:i:950\tom:f:0.00\n"
p = paf2gfa.Parser()
p.parse_line(line)
assert resu.strip() == p.get_gfa().strip()
# overlap at A 5' orientation is different
# ------------->
# <----------------
def test_A_5_diff():
line = "1\t1000\t0\t980\t-\t2\t1000\t0\t980\t30\t960\t255"
resu = "H\tVN:Z:1.0\nS\t1\t*\tLN:i:1000\nS\t2\t*\tLN:i:1000\nL\t1\t-\t2\t+\t960M\tNM:i:930\tom:f:0.00\n"
p = paf2gfa.Parser()
p.parse_lines(line.split("\n"))
......
import pytest
import logging
import paf2gfa
def test_overlap_poped():
paf = """
57569 26221 1475 20050 - 60137 26950 8 18545 2156 18575 255 cm:i:258
57569 26221 195 18981 - 58292 33025 73 18781 1881 18786 255 cm:i:210
57569 26221 8737 25698 + 59880 37403 338 17320 1878 16982 255 cm:i:209
58292 33025 79 25731 + 60137 26950 1097 26798 2715 25701 255 cm:i:295
58292 33025 8282 32847 - 58572 32242 7553 32158 2364 24605 255 cm:i:260
58292 33025 83 10607 - 59880 37403 35 10589 985 10554 255 cm:i:102
58292 33025 26451 32902 + 60730 26036 56 6495 526 6451 255 cm:i:58
59880 37403 10 11577 - 60137 26950 109 11633 1100 11567 255 cm:i:122
"""
true = """
"""
all_ov = paf2gfa.Parser()
selected_ov = paf2gfa.Parser(False, False)
all_ov.parse_lines(paf.split("\n"))
selected_ov.parse_lines(paf.split("\n"))
all_ov.clean_data()
selected_ov.clean_data()
print(all_ov.get_gfa())
print()
print(selected_ov.get_gfa())
assert set(all_ov.get_gfa().strip().split("\n")) == set(selected_ov.get_gfa().strip().split("\n"))
def test_overlap_poped_short():
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
logger.addHandler(stream_handler)
paf = """
57569 26221 1475 20050 - 60137 26950 8 18545 2156 18575 255 cm:i:258
"""
true = """
"""
all_ov = paf2gfa.Parser()
selected_ov = paf2gfa.Parser(False, False)
all_ov.parse_lines(paf.split("\n"))
selected_ov.parse_lines(paf.split("\n"))
all_ov.clean_data()
selected_ov.clean_data()
print(all_ov.get_gfa())
print()
print(selected_ov.get_gfa())
assert all_ov.get_gfa().strip() == selected_ov.get_gfa().strip()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment