Commit 1509b803 authored by Mathieu Giraud's avatar Mathieu Giraud

vidjil.cpp, core/labels.h: remove normalization (-Z), cleaning the code

This feature was not documented and not used.
The browser offers better normalization features.
parent 26d4dff3
......@@ -172,30 +172,3 @@ string JsonArray::toString(){
return stream.str();
}
/*
Méthode permettant de retourner un tableau d'objets Json, contenant le nom des normalisations
*/
JsonArray json_normalization_names()
{
JsonArray result;
result.add("none");
result.add("highest standard");
result.add("all standards");
return result;
}
/*
Méthode permettant de retourner un tableau d'objets Json, contenant des données concernant la normalisation d'un jeu, donné en paramètre
*/
JsonArray json_normalization( list< pair <float, int> > norm_list, int nb_reads, int nb_segmented)
{
JsonArray result;
result.add( (float) nb_reads / nb_segmented );
result.add( (float) nb_reads * compute_normalization_one(norm_list, nb_reads) / nb_segmented );
result.add( (float) nb_reads * compute_normalization(norm_list, nb_reads) / nb_segmented );
return result;
}
......@@ -65,135 +65,3 @@ map <string, string> load_map(string map_file)
}
map <string, pair <string, float> > load_map_norm(string map_file)
{
// Loads a simple file with key, values, normalization into a map
map <string, pair <string, float> > the_map ;
if (!map_file.size())
return the_map ;
cout << " <== " << map_file ;
ifstream f(map_file.c_str());
if (!f.is_open())
{
cout << " [failed] " << endl ;
return the_map ;
}
int nb_keys = 0 ;
while (f.good())
{
string line ;
getline (f, line);
int i = line.find(" ");
int j = line.find(" ", i+1);
if (i != (int) string::npos && j != (int) string::npos)
{
string key = line.substr(0, i);
string value = line.substr(i+1, j);
float norm = atof(line.substr(j+1, string::npos).c_str());
nb_keys++ ;
the_map[key] = make_pair(value + " " + the_map[key].first, norm + the_map[key].second);
cout << key << " " << value << " " << norm << endl ;
}
}
cout << ": " << nb_keys << " elements" << endl ;
return the_map ;
}
list< pair <float, int> > compute_normalization_list(map<string, list<Sequence> > &seqs_by_window,
map <string, pair <string, float> > normalization,
int total
)
{
list< pair <float, int> > result;
for (map <string, pair <string, float> >::iterator it = normalization.begin();
it != normalization.end();
it++) {
int nb_occs = seqs_by_window[it->first].size();
// If a normalized window is not found
if (!nb_occs)
continue ;
float norm = it->second.second / (nb_occs*1. / total);
// PRINT_VAR(nb_occs);
// PRINT_VAR(norm);
result.push_back(make_pair(norm, nb_occs));
}
result.sort(pair_occurrence_sort<float>);
return result;
}
float compute_normalization_one(list< pair <float, int> > norm_list, int nb_reads)
// Normalization with the largest standard
{
if (norm_list.empty()) {
return 1.;
}
float highest_norm = norm_list.begin()->first;
return highest_norm;
}
float compute_normalization(list< pair <float, int> > norm_list, int nb_reads)
// Full normalization
{
list<pair <float, int> >::const_iterator it;
float higher_norm;
int higher_value;
if (norm_list.empty()) {
return 1.;
}
// Traverse the list until we find the interesting position (in-between
// someone bigger and smaller)
for (it = norm_list.begin();
it != norm_list.end() && nb_reads <= it->second ; it++) {
higher_norm = it->first;
higher_value = it->second;
// cout << it->second << endl;
}
// At the end the iterator is on the highest value that is lower than nb_reads
if (it == norm_list.begin()) {
// We are above the higher standard
return it->first;
} else {
if (it != norm_list.end()) {
float lower_norm = it->first;
int lower_value = it->second;
float ratio = (higher_value == lower_value) ? 0.5 :
(log(nb_reads) - log(lower_value)) / (log(higher_value) - log(lower_value)) ;
// PRINT_VAR(ratio);
// PRINT_VAR(lower_value);
// PRINT_VAR(higher_value);
// PRINT_VAR(nb_reads);
return lower_norm + (higher_norm - lower_norm) * ratio;
} else {
// We are below the lowest standard
return higher_norm;
}
}
}
......@@ -7,13 +7,4 @@ using namespace std;
#include "fasta.h"
map <string, string> load_map(string map_file);
map <string, pair <string, float> > load_map_norm(string map_file);
list< pair <float, int> > compute_normalization_list(map<string, list<Sequence> > &seqs_by_window,
map <string, pair <string, float> > normalization,
int total
);
float compute_normalization_one(list< pair <float, int> > norm_list, int nb_reads);
float compute_normalization(list< pair <float, int> > norm_list, int nb_reads);
......@@ -153,7 +153,6 @@ ostream &WindowsStorage::printSortedWindows(ostream &os) {
}
JsonArray WindowsStorage::sortedWindowsToJsonArray(map <junction, JsonList> json_data_segment,
list< pair <float, int> > norm_list,
int nb_segmented) {
JsonArray windowsArray;
int top = 1;
......
......@@ -131,7 +131,6 @@ class WindowsStorage {
ostream &printSortedWindows(ostream &os);
JsonArray sortedWindowsToJsonArray(map<junction, JsonList> json_data_segment,
list< pair <float, int> > norm_list,
int nb_segmented);
/**
......
......@@ -287,7 +287,6 @@ int main (int argc, char **argv)
string forced_edges = "" ;
string windows_labels_file = "" ;
string normalization_file = "" ;
char c ;
......@@ -298,7 +297,7 @@ int main (int argc, char **argv)
//$$ options: getopt
while ((c = getopt(argc, argv, "Ahag:G:V:D:J:k:r:vw:e:C:t:l:dc:m:M:N:s:p:Sn:o:L%:Z:y:z:uU")) != EOF)
while ((c = getopt(argc, argv, "Ahag:G:V:D:J:k:r:vw:e:C:t:l:dc:m:M:N:s:p:Sn:o:L%:y:z:uU")) != EOF)
switch (c)
{
......@@ -319,9 +318,6 @@ int main (int argc, char **argv)
case 'l':
windows_labels_file = optarg;
break;
case 'Z':
normalization_file = optarg;
break;
case 'c':
if (!strcmp(COMMAND_CLONES,optarg))
......@@ -569,9 +565,6 @@ int main (int argc, char **argv)
/// Load labels ;
map <string, string> windows_labels = load_map(windows_labels_file);
map <string, pair <string, float> > normalization = load_map_norm(normalization_file);
switch(command) {
case CMD_WINDOWS: cout << "Extracting windows" << endl;
break;
......@@ -852,7 +845,7 @@ int main (int argc, char **argv)
<< " (" << setprecision(3) << 100 * (float) nb_segmented_including_too_short / nb_total_reads << "%)"
<< endl ;
// nb_segmented is the main denominator for the following (but will be normalized)
// nb_segmented is the main denominator for the following
int nb_segmented = we.getNbSegmented(TOTAL_SEG_AND_WINDOW);
float ratio_segmented = 100 * (float) nb_segmented / nb_total_reads ;
......@@ -894,10 +887,6 @@ int main (int argc, char **argv)
ofstream out_all_windows(f_all_windows.c_str());
windowsStorage->printSortedWindows(out_all_windows);
//$$ Normalization
list< pair <float, int> > norm_list = compute_normalization_list(windowsStorage->getMap(), normalization, nb_segmented);
//////////////////////////////////
//$$ min_reads_clone (ou label)
......@@ -1034,8 +1023,7 @@ int main (int argc, char **argv)
oss_human << "#### Clone #" << right << setfill('0') << setw(WIDTH_NB_CLONES) << num_clone
<< " – " << setfill(' ') << setw(WIDTH_NB_READS) << clone_nb_reads << " reads"
<< " – " << setprecision(3) << 100 * (float) clone_nb_reads / nb_segmented << "% "
<< " – " << 100 * (float) clone_nb_reads * compute_normalization_one(norm_list, clone_nb_reads) / nb_segmented << "% "
<< compute_normalization_one(norm_list, clone_nb_reads) << " " ;
;
string clone_id_human = oss_human.str();
// Window label
......@@ -1245,7 +1233,6 @@ int main (int argc, char **argv)
json_cmdline.add(stream_cmdline.str());// TODO: escape "s in argv
JsonArray jsonSortedWindows = windowsStorage->sortedWindowsToJsonArray(json_data_segment,
norm_list,
nb_segmented);
//samples field
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment