Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b1330e44 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

removed duplicate script dna file reader

parent 0aa90a14
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 23 09:26:22 2020
@author: oboulle
"""
import os
import sys
def read_fasta(fasta_file_path):
"""
:param fasta_file_path: path to the .fasta file
:return: a dictionary containing the name and the content of the sequences in the file
"""
sequence_dict = {}
if not os.path.isfile(fasta_file_path):
return None
fasta_file = open(fasta_file_path)
line = fasta_file.readline()
while line != "":
if line.startswith(">"):
sequence_name = line[1:].replace("\n", "")
sequence = ""
line = fasta_file.readline()
while not line.startswith(">") and line != "":
sequence += line.replace("\n", "")
line = fasta_file.readline()
sequence_dict[sequence_name] = sequence
else:
print("fasta format error :",line)
exit(1)
fasta_file.close()
return sequence_dict
def read_single_sequence_fasta(fasta_file_path):
"""
:param fasta_file_path: path to the .fasta file
:return: the name and the value of the sequence in the file
"""
if not os.path.isfile(fasta_file_path):
return None, None
fasta_file = open(fasta_file_path)
line = fasta_file.readline()
if line.startswith(">"):
sequence_name = line[1:].replace("\n", "")
sequence = ""
line = fasta_file.readline()
while not line.startswith(">") and line != "":
sequence += line.replace("\n", "")
line = fasta_file.readline()
else:
print("error, could not red the sequence from the file :",fasta_file_path)
sequence_name, sequence = "", ""
fasta_file.close()
return sequence_name, sequence
def read_fastq(fastq_file_path):
"""
:param fastq_file_path: path to the .fastq file
:return: a list containing the name, content, description and score of the sequences in the file
"""
sequence_list = []
fastq_file = open(fastq_file_path)
line = fastq_file.readline()
while line != "":
if line.startswith("@"):
seq_name = line.replace("\n", "")
line = fastq_file.readline()
seq_value = line.replace("\n", "")
line = fastq_file.readline()
seq_description = line.replace("\n", "")
line = fastq_file.readline()
seq_score = line.replace("\n", "")
line = fastq_file.readline()
sequence_list.append([seq_name, seq_value, seq_description, seq_score])
else:
print("fastq format error :",line)
exit(1)
fastq_file.close()
return sequence_list
def reverse_complement(sequence):
"""
return the reverse complement of a dna sequence (AGTT -> AACT)
:param sequence: the input sequence
:return: the reverse complement of the input sequence
"""
complement = {"A": "T", "T": "A", "C": "G", "G": "C"}
result = ""
for letter in sequence:
result = complement[letter] + result
return result
def get_sequence_size(fasta_file_path):
"""
:param fasta_file_path: path of the .fasta file
:return: the length of the dna sequence
"""
fasta_file = open(fasta_file_path)
fasta_file.readline() # skip the first line
line = fasta_file.readline()
return len(line)
def fasta_to_fastq(fasta_file_path, output_path):
"""
convert a fasta file into a fastq file
:param fasta_file_path: path of the .fasta file
:param output_path: path of the .fastq output
"""
sequences = read_fasta(fasta_file_path)
file_output = open(output_path, "w+")
for name, value in sequences.items():
file_output.write("@"+name+"\n")
file_output.write(value+"\n")
file_output.write("+\n")
file_output.write("---\n")
file_output.close()
# =================== main ======================= #
if __name__ == '__main__':
input_path = sys.argv[1] # file to read the sequences
output_path = sys.argv[2] # file to save the sequences
fasta_to_fastq(input_path, output_path)
...@@ -3,7 +3,6 @@ import numpy as np ...@@ -3,7 +3,6 @@ import numpy as np
import random import random
from PIL import Image from PIL import Image
import dna_file_reader as dfr
""" """
convert images to dna sequences convert images to dna sequences
...@@ -266,7 +265,6 @@ if __name__ == "__main__": ...@@ -266,7 +265,6 @@ if __name__ == "__main__":
pass pass
matrix_to_png(mat2, 'RGB', "img_decoded.png") matrix_to_png(mat2, 'RGB', "img_decoded.png")
""" """
seq_path = sys.argv[1] sequence = sys.argv[1]
name, sequence = dfr.read_single_sequence_fasta(seq_path)
decode_png(sequence, "test.png", "RGB;118;106") decode_png(sequence, "test.png", "RGB;118;106")
import sys import sys
import math import math
import dna_file_reader as dfr
import hashing import hashing
import source_encoding import source_encoding
import source_decoding import source_decoding
...@@ -165,9 +164,7 @@ if __name__ == '__main__': ...@@ -165,9 +164,7 @@ if __name__ == '__main__':
print("usage : sequence_control.py sequence_path") print("usage : sequence_control.py sequence_path")
sys.exit(1) sys.exit(1)
sequence_path = sys.argv[1] sequence = sys.argv[1]
_, sequence = dfr.read_single_sequence_fasta(sequence_path)
#find_hash_keys(sequence) #find_hash_keys(sequence)
decode_sequence(sequence) decode_sequence(sequence)
......
import sys import sys
import os.path import os.path
import hashing import inspect
import argparse import argparse
import hashing
import image_conversion as img_conv import image_conversion as img_conv
import file_conversion as file_conv import file_conversion as file_conv
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(0, os.path.dirname(currentdir)+"/synthesis_simulation")
import dna_file_reader as dfr import dna_file_reader as dfr
import dna_numbering as dnbr import dna_numbering as dnbr
......
import sys import sys
import os.path import os.path
import inspect
import argparse import argparse
import hashing import hashing
import image_conversion as img_conv import image_conversion as img_conv
import file_conversion as file_conv import file_conversion as file_conv
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(0, os.path.dirname(currentdir)+"/synthesis_simulation")
import dna_file_reader as dfr import dna_file_reader as dfr
import dna_numbering as dnbr import dna_numbering as dnbr
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment