Commit 063c084f authored by MARIJON Pierre's avatar MARIJON Pierre

Correct biais in path weight computation

parent fbf6d321
......@@ -24,5 +24,5 @@ knot -r reads_raw.fasta -c assembly_contig.fasta -g assembly_graph.gfa -o raw
On corrected reads:
```
knot -C reads_raw.fasta -c assembly_contig.fasta -g assembly_graph.gfa -o corrected
knot -C reads_corrected.fasta -c assembly_contig.fasta -g assembly_graph.gfa -o corrected
```
......@@ -28,7 +28,7 @@ def get_tig2posread(read2tig, valid_read):
if valid_read and row[0] not in valid_read:
continue
if int(row[3]) - int(row[2]) > 0.7 * int(row[1]):
result[(row[5], int(row[6]))].append((int(row[7]), int(row[8]), row[0], row[4]))
result[(row[5], int(row[6]))].append((int(row[7]), int(row[8]), row[0], int(row[2]), int(row[1]) - int(row[3]), row[4]))
return result
......@@ -24,21 +24,43 @@ def main(args=None):
valid_read = get_valid_read(args["read2read"]) # read name
tig2posread = get_tig2posread(args["read2tig"], valid_read) # tig -> (begin, end, name)
# (tig, tig_len) -> (tig_begin, tig_end, name, read_begin, read_dist_to_end, read_ori)
tig2posread = get_tig2posread(args["read2tig"], valid_read)
for k in tig2posread:
tig2posread[k].sort()
print("tig","read","strand_to_tig", sep=",", file=args["output"])
print("tig","read","strand_to_tig","dist2ext", sep=",", file=args["output"])
for tig in tig2posread.keys():
ext = tig[0]+"_begin"
print(ext, tig2posread[tig][1][2], tig2posread[tig][1][3],
sep=",", file=args["output"])
print(tig, tig2posread[tig][1])
if tig2posread[tig][1][5] == "+":
print(ext,
tig2posread[tig][1][2],
tig2posread[tig][1][5],
(tig2posread[tig][1][0] - tig2posread[tig][1][3]) * -1,
sep=",", file=args["output"])
else:
print(ext,
tig2posread[tig][1][2],
tig2posread[tig][1][5],
(tig2posread[tig][1][0] - tig2posread[tig][1][4]) * -1,
sep=",", file=args["output"])
ext = tig[0]+"_end"
tig2posread[tig].sort(key=lambda x: x[1])
print(ext, tig2posread[tig][-2][2], tig2posread[tig][-2][3],
sep=",", file=args["output"])
if tig2posread[tig][-2][5] == "+":
print(ext,
tig2posread[tig][-2][2],
tig2posread[tig][-2][5],
((tig[1] - tig2posread[tig][-2][1]) - tig2posread[tig][-2][4]) * -1,
sep=",", file=args["output"])
else:
print(ext,
tig2posread[tig][-2][2],
tig2posread[tig][-2][5],
((tig[1] - tig2posread[tig][-2][1]) - tig2posread[tig][-2][3]) * -1,
sep=",", file=args["output"])
if __name__ == '__main__':
main(sys.argv[1:])
......
......@@ -74,6 +74,10 @@ def main(args=None):
continue
(path, weight) = paths.get_path(sg, node1, node2, args["search_mode"])
if len(path) <= 2:
weight = 0
else:
weight -= int(ext1[3]) - int(ext2[3])
if path:
nbread_contig = paths.format_node_contig_counter(
paths.path_through_contig(tig2reads, path),
......
......@@ -62,7 +62,9 @@ def __add_dovetails(d, G, is_contain):
qual=qual_val,
len=d.alignment.length_on_query(),
overhang_len=d.to_segment.length - d.alignment.length_on_query(),
weight=int(d.to.LN-d.alignment.length_on_query()))
weight=int(d.to_segment.length-d.alignment.length_on_query()),
to_length = d.to_segment.length
)
class isContain:
......
......@@ -22,7 +22,14 @@ def get_path(graph, n1, n2, mode="base"):
logging.debug("Node not found exception "+str(e))
path = []
return path, sum([graph.edges[x, y]["weight"] for x, y in zip(path, path[1:])])
if not path:
return path, 0
if len(path) == 2:
edge = graph.edges[path[0], path[1]]
return path, 0
return path, sum([graph.edges[x, y]["weight"] for x, y in zip(path, path[1:])]) - graph.edges[path[-2], path[-1]]["to_length"]
def path_through_contig(tig2reads, path):
tig2nb_read = Counter()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment