Commit 555918ba authored by Pierre Marijon's avatar Pierre Marijon

Tmp commit

parent 61652746
......@@ -4,9 +4,6 @@
import re
import sys
import pandas
import logging
import argparse
......@@ -14,102 +11,6 @@ import argparse
from collections import OrderedDict, defaultdict
from types import SimpleNamespace
def add_overlap_if_possible(links, contains, read_contained, name_a, len_a, beg_a, end_a, flip,
name_b, len_b, beg_b, end_b,
nb_base, nb_match):
#import ipdb; ipdb.set_trace()
num_misgap = str(abs(int(nb_base) - int(nb_match)))
logging.debug("name a {} name b {} nb_base {} nb_match {}".format(name_a,
name_b,
nb_base,
nb_match))
logging.debug("len_b {} beg_a - end_a {} len_a {} beg_b - end_b {}".format(
len_b, abs(beg_a - end_a), len_a, abs(beg_b - end_b)))
#print("name a {} name b {}".format(name_a, name_b), end="\t")
# B is contain in A
# --------------------------->
# --------->
# or
# --------------------------->
# <---------
#if name_a == "18584":
# import ipdb; ipdb.set_trace()
if len_b <= abs(beg_a - end_a) or len_b <= int(nb_base) or len_b <= abs(beg_b - end_b):
logging.debug("B is read_containedement in A")
links[frozenset((name_a, name_b, flip))] = False
read_contained.add(name_b)
if flip == "+":
contains[frozenset((name_b, name_a, flip))] = (name_b, "+", name_a,
"+", str(beg_a),
nb_base+"M")
else:
contains[frozenset((name_b, name_a, flip))] = (name_b, "+", name_a,
"-", str(beg_a),
nb_base+"M")
return True
elif len_a <= abs(beg_b - end_b) or len_a <= int(nb_base) or len_a <= abs(beg_a - end_a):
logging.debug("A is read_containedement in B")
links[frozenset((name_a, name_b, flip))] = False
read_contained.add(name_a)
if flip == "+":
contains[frozenset((name_a, name_b, flip))] = (name_a, "+", name_b,
"+", str(beg_b),
nb_base+"M")
else:
contains[frozenset((name_a, name_b, flip))] = (name_a, "+", name_b,
"-", str(beg_b),
nb_base+"M")
return True
# overlap at A 3' same orientation
# ---------------->
# ------------->
elif abs(end_a - len_a) < beg_a and flip == "+":
logging.debug("overlap at A 3' same orientation")
links[frozenset((name_a, name_b, flip))] = (name_a, "+", name_b, "+",
nb_base+"M",
"NM:i:"+num_misgap)
return True
# overlap at A 3' orientation is different
# ---------------->
# <-------------
elif abs(end_a - len_a) < beg_a and flip == "-":
logging.debug("overlap at A 3' orientation is different")
links[frozenset((name_a, name_b, flip))] = (name_a, "+", name_b, "-",
nb_base+"M",
"NM:i:"+num_misgap)
return True
# overlap at A 5' same orientation
# -------------->
# --------------->
elif beg_a < abs(end_a - len_a) and flip == "+":
logging.debug("overlap at A 5' same orientation")
links[frozenset((name_a, name_b, flip))] = (name_b, "+", name_a, "+",
nb_base+"M",
"NM:i:"+num_misgap)
return True
# overlap at A 5' orientation is different
# ------------->
# <----------------
elif beg_a < abs(end_a - len_a) and flip == "-":
logging.debug("overlap at A 5' orientation is different")
links[frozenset((name_a, name_b, flip))] = (name_b, "-", name_a, "+",
nb_base+"M",
"NM:i:"+num_misgap)
return True
else:
return False
class Parser:
def __init__(self, remove=False):
......@@ -154,12 +55,15 @@ class Parser:
overhang = min(l.beg_a, l.beg_b) + min(l.len_a - l.end_a, l.len_b - l.end_b)
maplen = max(l.end_a - l.beg_a, l.end_b - l.beg_b)
logging.critical("name A {} name B {}, beg_A {} beg_B {}".format(l.read_a, l.read_b, l.beg_a, l.beg_b))
strange_ov = False
if overhang > min(1000, maplen*0.8):
if overhang > maplen*0.8:
# Strange overlap
#logging.critical("Strange Overlap between {} {}".format(l.read_a, l.read_b))
#logging.critical("overhang {} maplen*x {}".format(overhang, maplen*0.8))
strange_ov = True
if not strange_ov and l.beg_a <= l.beg_b and l.len_a - l.end_a < l.len_b - l.end_b:
if overhang < 1000 and l.beg_a <= l.beg_b and l.len_a - l.end_a < l.len_b - l.end_b:
# B containe A
self._add_segment(l.read_a, l.len_a)
self._add_segment(l.read_b, l.len_b)
......@@ -170,7 +74,7 @@ class Parser:
else:
self._add_containment(l.read_b, "-", l.read_a, "+", l.beg_b,
l.nb_base)
elif not strange_ov and l.beg_a >= l.beg_b and l.len_a -l.end_a > l.len_b - l.end_b:
elif overhang < 1000 and l.beg_a >= l.beg_b and l.len_a - l.end_a > l.len_b - l.end_b:
# A containe B
self._add_segment(l.read_a, l.len_a)
self._add_segment(l.read_b, l.len_b)
......@@ -183,6 +87,7 @@ class Parser:
l.nb_base)
elif l.beg_a > l.beg_b:
# A overlap B
logging.critical("A overlap B")
self._add_segment(l.read_a, l.len_a)
self._add_segment(l.read_b, l.len_b)
......@@ -194,6 +99,7 @@ class Parser:
l.nb_match)
else:
# B overlap A
logging.critical("B overlap A")
self._add_segment(l.read_a, l.len_a)
self._add_segment(l.read_b, l.len_b)
......
1 10000 8000 10000 - 2 10000 8000 10000 2000 2000 255
1 10000 0 8000 - 5 12000 0 8000 8000 8000 255
2 10000 0 2000 - 3 10000 0 2000 2000 2000 255
3 10000 8000 10000 + 4 10000 8000 10000 2000 2000 255
1 10000 2000 10000 - 2 10000 1999 8001 0 8000 255
1 10000 0 8000 - 5 12000 1 8000 8000 8000 255
2 10000 0 8000 - 3 10000 0 8000 2000 2000 255
3 10000 2000 10000 + 4 10000 0 8000 2000 2000 255
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment