Commit 23c3b22f authored by MARIJON Pierre's avatar MARIJON Pierre

Add module to select not assign read and read at extrimity to build SG

parent 440c256e
......@@ -21,7 +21,6 @@ class MyArgumentParser(argparse.ArgumentParser):
def main(args = None):
if args is None:
args = sys.argv[1:]
......@@ -70,11 +69,11 @@ def main(args = None):
snakemake_config_path = os.path.join(package_path, "config.yaml")
config = [
"contigs="+args["contigs"],
"out_prefix="+args["output"],
"contigs_graph="+args["contigs_graph"],
"read_type="+args["read_type"],
"package_path="+package_path
"contigs="+args["contigs"],
"out_prefix="+args["output"],
"contigs_graph="+args["contigs_graph"],
"read_type="+args["read_type"],
"package_path="+package_path,
]
if args["raw_reads"] is not None:
......
......@@ -25,10 +25,10 @@ def get_tig2posread(read2tig, valid_read):
reader = csv.reader(read2tig, delimiter="\t")
for row in reader:
if row[0] not in valid_read:
if valid_read and row[0] not in valid_read:
continue
if int(row[3]) - int(row[2]) > 0.7 * int(row[1]):
result[row[5]].append((int(row[7]), int(row[8]), row[0], row[4]))
result[(row[5], int(row[6]))].append((int(row[7]), int(row[8]), row[0], row[4]))
return result
......@@ -31,11 +31,11 @@ def main(args=None):
print("tig","read","strand_to_tig", sep=",", file=args["output"])
for tig in tig2posread.keys():
ext = tig+"_begin"
ext = tig[0]+"_begin"
print(ext, tig2posread[tig][0][2], tig2posread[tig][0][3],
sep=",", file=args["output"])
ext = tig+"_end"
ext = tig[0]+"_end"
tig2posread[tig].sort(key=lambda x: x[1])
print(ext, tig2posread[tig][-1][2], tig2posread[tig][-1][3],
sep=",", file=args["output"])
......
......@@ -46,7 +46,7 @@ def main(args=None):
# get info about contig
valid_read = extremity_search.get_valid_read(args["ovl_graph"])
tig2reads = {tig: {v[2] for v in val} for tig, val in extremity_search.get_tig2posread(args["read2asm"], valid_read).items()}
tig2reads = {tig[0]: {v[2] for v in val} for tig, val in extremity_search.get_tig2posread(args["read2asm"], valid_read).items()}
# build list of search
......
#!/usr/bin/env python3
# std import
import sys
import argparse
# pip import
from Bio import SeqIO
# project import
from knot import extremity_search
def main(args=None):
if args is None:
args = sys.argv[1:]
parser = argparse.ArgumentParser(prog="knot.sg_generation")
parser.add_argument("reads2contig", type=argparse.FileType('r'))
parser.add_argument("input", type=argparse.FileType('r'))
parser.add_argument("output", type=argparse.FileType('w'))
args = vars(parser.parse_args(args))
tig2readspos = extremity_search.get_tig2posread(args["reads2contig"], set())
skip_read = set()
for tig, val in tig2readspos.items():
t = max(tig[1]*0.2, 100000)
for v in val:
if v[0] > t and v[1] < tig[1] - t:
skip_read.add(v[2])
for record in SeqIO.parse(args["input"], "fasta"):
if record.id not in skip_read:
SeqIO.write(record, args["output"], "fasta")
if __name__ == "__main__":
main()
......@@ -41,6 +41,7 @@ setup(
'console_scripts': [
'knot = knot.__main__:main',
'knot.path_search = knot.path_search.__main__:main',
'knot.sg_generation = knot.sg_generation.__main__:main',
'knot.extremity_search = knot.extremity_search.__main__:main'
]
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment