Commit 8aa8169b authored by MARIJON Pierre's avatar MARIJON Pierre

Add script to convert miniasm gfa in gfa with all read participate to contig

parent fed4f795
#!/usr/bin/env python3
import re
import sys
import argparse
from types import SimpleNamespace
from collections import defaultdict
a_regex = re.compile(r"^a\s+(?P<tig_name>[^\s]+)\s+.+\s+(?P<name>\d+):(?P<begin>\d+)-(?P<end>\d+)\s+(?P<strand>[+-])\s+(?P<begin_next>\d+)")
def main(args=None):
if args == None:
args = sys.argv[1:]
parser = argparse.ArgumentParser(prog="miniasm2gfa",
formatter_class=argparse.
ArgumentDefaultsHelpFormatter)
parser.add_argument('gfa', type=argparse.FileType('r'))
parser.add_argument('out', type=argparse.FileType('w'))
args = vars(parser.parse_args(args))
tig2read = defaultdict(list)
for line in args["gfa"]:
if line.startswith("S") or line.startswith("x"):
continue
m = a_regex.match(line)
if m == None:
print(line)
l = SimpleNamespace(**a_regex.match(line).groupdict())
l.length = int(l.end) - int(l.begin)
tig2read[l.tig_name].append(l)
args["out"].write("S\t{}\t*\tLN:i:{}\n".format(l.name, l.length))
for tig, reads in tig2read.items():
for read_a, read_b in zip(reads, reads[1:]):
args["out"].write("L\t{}\t{}\t{}\t{}\t{}M\n".format(
read_a.name, read_a.strand, read_b.name, read_b.strand,
read_a.length - int(read_a.begin_next)))
if __name__ == "__main__":
main(sys.argv[1:])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment