From 1fceaea0c1fc55405b3ff5b51d706bd1cdb4a414 Mon Sep 17 00:00:00 2001 From: oboulle <olivier.boulle@inria.fr> Date: Thu, 26 Jan 2023 09:54:40 +0100 Subject: [PATCH] case for unexisting input --- reads_consensus.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/reads_consensus.py b/reads_consensus.py index eb9e929..0a58291 100755 --- a/reads_consensus.py +++ b/reads_consensus.py @@ -14,7 +14,7 @@ import dna_file_reader as dfr # used to get the consensus sequence from a .fastq reads file of an ordered assembly -KMER_SIZE = 30 # arbitrary constant, increase the risk of finding loop if too short +KMER_SIZE = 35 # arbitrary constant, increase the risk of finding loop if too short def pre_filter(sequence: str) -> bool: @@ -50,7 +50,7 @@ def count_kmers_dsk(input_path: str, min_occ: int) -> dict: returns the dict of kmer occurrences filtered with a threshold of minimum occurrence """ #start = time.time() - if os.stat(input_path).st_size == 0: + if not os.path.isfile(input_path) or os.stat(input_path).st_size == 0: #print(input_path,"is an empty file") return {} @@ -129,10 +129,10 @@ def build_result_sequence(total_path, current_kmer, stop_kmer, kmer_occurrences_ return path_list # try the second path if it has a non negligible weight compared to best path and is not a loop - if second_next_weight > best_next_weight/2 and not overlap_kmers_dict.get(second_next_kmer, False): #best_next_weight/2 - overlap_kmers_dict[second_next_kmer] = True - path_list = build_result_sequence(total_path + second_next_kmer[-1], second_next_kmer, stop_kmer, kmer_occurrences_dict, overlap_kmers_dict.copy(), path_list) - overlap_kmers_dict[second_next_kmer] = False + #if second_next_weight > best_next_weight/2 and not overlap_kmers_dict.get(second_next_kmer, False): + # overlap_kmers_dict[second_next_kmer] = True + # path_list = build_result_sequence(total_path + second_next_kmer[-1], second_next_kmer, stop_kmer, kmer_occurrences_dict, overlap_kmers_dict.copy(), path_list) + # overlap_kmers_dict[second_next_kmer] = False # the chosen next base is placed at the right of the result sequence @@ -351,6 +351,7 @@ def kmer_consensus(input_path: str, output_path: str, start_sequence: str, stop_ # build the resulting sequence from the dictionary of following bases result_sequence_list = build_with_known_extremities(start_sequence, stop_sequence, kmer_occurrences_dict) #result_sequence = build_from_start(start_sequence, kmer_occurrences_dict) + print(result_sequence_list) #result_sequence = build_without_extremities(kmer_occurrences_dict) #build_time = time.time() - start -- GitLab