Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 7bb150f3 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

added comments, removed fixed params, optimise reverse comp usage

parent 56f91625
No related branches found
No related tags found
No related merge requests found
......@@ -57,32 +57,31 @@ std::vector<std::string> read_fastq(const std::string& filename) {
std::string get_cluster_id(const std::string& read, const std::string& seq_to_find, int split_level) {
/*
get the <split_level> bases following the primer in the read, check if the read is in reverse complement
get the <split_level> bases following the primer in the read, also try to find the primer in the reverse complement sequence
*/
std::string read_rc = reverse_complement(read); // reverse complement of the read sequence
double score_fw, score_rc; // alignment score with the forward and reverse complement sequence
int x_fw, y_fw, x_rc, y_rc; //x_fw and x_rc not used
int x_fw, y_fw, x_rc, y_rc; // positions start and stop of the alignments found, x_fw and x_rc not used
// call local alignment algorithm on the forward sequence
SmithWaterman(read, seq_to_find, score_fw, x_fw, y_fw);
// don't test for the reverse complement if near perfect alignment already found
// don't test for the reverse complement if near perfect alignment already found (maximum possible score = 10)
if (score_fw >= 9.5 && read.size() > y_fw + split_level) {
return read.substr(y_fw, split_level);
return read.substr(y_fw, split_level); // extract the cluster id
}
std::string read_rc = reverse_complement(read); // get reverse complement of the read sequence
// test local alignement with the reverse complement sequence
SmithWaterman(read_rc, seq_to_find, score_rc, x_rc, y_rc);
if (score_fw >= score_rc) { // primer found in forward sequence
if (score_fw >= 8 && read.size() > y_fw + split_level) {
if (score_fw >= 7.5 && read.size() > y_fw + split_level) {
return read.substr(y_fw, split_level);
}
} else { // primer found in reverse complement sequence
if (score_rc >= 8 && read_rc.size() > y_rc + split_level) {
if (score_rc >= 7.5 && read_rc.size() > y_rc + split_level) {
return read_rc.substr(y_rc, split_level);
}
}
......@@ -93,7 +92,7 @@ std::string get_cluster_id(const std::string& read, const std::string& seq_to_fi
void clustering(const std::string& reads_path, const std::string& start_primer, int split_level, const std::string& output_dir) {
/*
/* split_lvel = nbr of bases to use for the cluster id
*/
//delete all fasta files in the clusters dir
......@@ -127,7 +126,7 @@ void clustering(const std::string& reads_path, const std::string& start_primer,
std::getline(input_read_file, line);
std::getline(input_read_file, line);
// find the position fo the primer and get the bases following it
// find the position for the primer and get the bases following it
std::string cluster_id = get_cluster_id(sequence, seq_to_find, split_level);
if (cluster_id == "None") {
......@@ -157,16 +156,11 @@ int main(int argc, char* argv[]) {
// get the input and output paths from the arguments
std::string input_fastq = argv[1];
std::string output_dir = argv[2];
std::string output_dir = argv[2]; // must be an existing dir
// start a timer
auto start = std::chrono::high_resolution_clock::now();
std::string data_name = "100k";
input_fastq = "new_tests/"+data_name+"/shuffled_reads_"+data_name+".fastq";
output_dir = "new_tests/"+data_name+"/clusters_"+data_name;
std::string start_primer = "GTTCAGAGTTCTACAGTCCGACGATCC";
clustering(input_fastq, start_primer, 3, output_dir);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment