From 1fceaea0c1fc55405b3ff5b51d706bd1cdb4a414 Mon Sep 17 00:00:00 2001
From: oboulle <olivier.boulle@inria.fr>
Date: Thu, 26 Jan 2023 09:54:40 +0100
Subject: [PATCH] case for unexisting input

---
 reads_consensus.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/reads_consensus.py b/reads_consensus.py
index eb9e929..0a58291 100755
--- a/reads_consensus.py
+++ b/reads_consensus.py
@@ -14,7 +14,7 @@ import dna_file_reader as dfr
 
 # used to get the consensus sequence from a .fastq reads file of an ordered assembly
 
-KMER_SIZE = 30  # arbitrary constant, increase the risk of finding loop if too short
+KMER_SIZE = 35 # arbitrary constant, increase the risk of finding loop if too short
 
 
 def pre_filter(sequence: str) -> bool:
@@ -50,7 +50,7 @@ def count_kmers_dsk(input_path: str, min_occ: int) -> dict:
     returns the dict of kmer occurrences filtered with a threshold of minimum occurrence
     """
     #start = time.time()
-    if os.stat(input_path).st_size == 0:
+    if not os.path.isfile(input_path) or os.stat(input_path).st_size == 0:
         #print(input_path,"is an empty file")
         return {}
     
@@ -129,10 +129,10 @@ def build_result_sequence(total_path, current_kmer, stop_kmer, kmer_occurrences_
             return path_list
         
         # try the second path if it has a non negligible weight compared to best path and is not a loop
-        if second_next_weight > best_next_weight/2 and not overlap_kmers_dict.get(second_next_kmer, False): #best_next_weight/2
-            overlap_kmers_dict[second_next_kmer] = True
-            path_list = build_result_sequence(total_path + second_next_kmer[-1], second_next_kmer, stop_kmer, kmer_occurrences_dict, overlap_kmers_dict.copy(), path_list)
-            overlap_kmers_dict[second_next_kmer] = False
+        #if second_next_weight > best_next_weight/2 and not overlap_kmers_dict.get(second_next_kmer, False):
+        #    overlap_kmers_dict[second_next_kmer] = True
+        #    path_list = build_result_sequence(total_path + second_next_kmer[-1], second_next_kmer, stop_kmer, kmer_occurrences_dict, overlap_kmers_dict.copy(), path_list)
+        #    overlap_kmers_dict[second_next_kmer] = False
         
         
         # the chosen next base is placed at the right of the result sequence 
@@ -351,6 +351,7 @@ def kmer_consensus(input_path: str, output_path: str, start_sequence: str, stop_
     # build the resulting sequence from the dictionary of following bases
     result_sequence_list = build_with_known_extremities(start_sequence, stop_sequence, kmer_occurrences_dict) 
     #result_sequence = build_from_start(start_sequence, kmer_occurrences_dict)
+    print(result_sequence_list)
     
     #result_sequence = build_without_extremities(kmer_occurrences_dict)
     #build_time = time.time() - start
-- 
GitLab