diff --git a/partitioning/fast_clustering_t.cpp b/partitioning/fast_clustering_t.cpp index 62c36b9dec736ace410022593d3e0439db6e3a95..dfbad00f88353968caeed32f1aa8a4ecb9821380 100755 --- a/partitioning/fast_clustering_t.cpp +++ b/partitioning/fast_clustering_t.cpp @@ -126,29 +126,26 @@ void process_file_part(const std::string& input_filename, int part_id, int start int main(int argc, char* argv[]) { // check if the input and output file paths are provided as arguments - if (argc != 3) { - std::cerr << "Usage: " << argv[0] << " <input_fastq> <output_dir>" << std::endl; + if (argc < 3 || argc > 4) { + std::cerr << "Usage: " << argv[0] << " <input_fastq> <output_dir> <n_thread>" << std::endl; return 1; } // ./fast_clustering reads.fastq clusters_dir_path // get the input and output paths from the arguments std::string input_fastq = argv[1]; std::string output_dir = argv[2]; + int n_thread; // number of threads used, default = maximum possible + if (argc == 3) { + n_thread = std::thread::hardware_concurrency(); + } else { + n_thread = std::stoi(argv[3]); + } std::string start_primer = "GTTCAGAGTTCTACAGTCCGACGATCC"; int seq_to_find_size = std::min(static_cast<int>(start_primer.size()), 8); std::string seq_to_find = start_primer.substr(start_primer.size() - seq_to_find_size); // search for a smaller sequence than the full primer if its len is > 10 - std::string data_name = "100k"; - - input_fastq = "new_tests/"+data_name+"/shuffled_reads_"+data_name+".fastq"; - output_dir = "new_tests/"+data_name+"/clusters_"+data_name; - - // number of threads used - int num_parts = 4;//std::thread::hardware_concurrency(); - - // start a timer auto start = std::chrono::high_resolution_clock::now(); @@ -182,12 +179,12 @@ int main(int argc, char* argv[]) { input_read_file.close(); // Calculate the number of lines in each part - int lines_per_part = (num_lines + num_parts - 1) / num_parts; + int lines_per_part = (num_lines + n_thread - 1) / n_thread; lines_per_part = (lines_per_part + 3) / 4 * 4; // Round up to the nearest multiple of 4 // Create a thread for each file part std::vector<std::thread> threads; - for (int i = 0; i < num_parts; i++) { + for (int i = 0; i < n_thread; i++) { // Calculate the start and end line numbers for this part int start_line = i * lines_per_part;