Commit 5bef468b authored by Marc Duez's avatar Marc Duez

cluster-junctions.cpp : use sorted clones to compute clusters

parent 9ac1b0d5
......@@ -24,9 +24,10 @@
return lh.second>rh.second;
}
comp_matrix::comp_matrix(WindowsStorage &ws):windows(ws)
comp_matrix::comp_matrix(list<pair <junction, int> > sc)
{
matrix_size = windows.size();
sort_clones = sc;
matrix_size = sort_clones.size();
if (matrix_size > JUNCTION_LIMIT) matrix_size = JUNCTION_LIMIT;
}
......@@ -41,33 +42,33 @@ void comp_matrix::compare(ostream &out, Cost cluster_cost)
out << " Using cost " << compareCost << endl ;
string j1, j2;
m = alloc_matrix(windows.size());
m = alloc_matrix(sort_clones.size());
int c=0;
int c1=0;
int c2=0;
size_t i = 0;
for (mjs::const_iterator it0 = windows.getMap().begin();
(it0 != windows.getMap().end()) & (i<matrix_size); ++it0 )
for (list <pair<junction,int> >::const_iterator it0 = sort_clones.begin();
(it0 != sort_clones.end()) & (i<matrix_size); ++it0)
{
i++;
j1=it0->first;
size_t k = 0;
for (mjs::const_iterator it1 = it0;
(it1 != windows.getMap().end()) & (k<matrix_size); ++it1 )
{
k++;
j2=it1->first;
DynProg dp = DynProg(j1, j2, DynProg::Local, compareCost);
int score=dp.compute();
int distance = max(j1.size(), j2.size())-score;
m[c2][c1]=distance;
m[c1][c2]=distance;
c1++;
c++;
}//fin it1
for (list <pair<junction,int> >::const_iterator it1 = it0;
(it1 != sort_clones.end()) & (k<matrix_size); ++it1)
{
k++;
j2=it1->first;
DynProg dp = DynProg(j1, j2, DynProg::Local, compareCost);
int score=dp.compute();
int distance = max(j1.size(), j2.size())-score;
m[c2][c1]=distance;
m[c1][c2]=distance;
c1++;
c++;
}//fin it1
c2++;
c1=c2;
}//fin it0
......@@ -78,13 +79,13 @@ void comp_matrix::compare(ostream &out, Cost cluster_cost)
void comp_matrix::load(string file){
m = alloc_matrix(windows.size());
m = alloc_matrix(sort_clones.size());
char* tampon=(char*)malloc(windows.size()*sizeof(char));
char* tampon=(char*)malloc(sort_clones.size()*sizeof(char));
ifstream in_comp(file.c_str());
for(unsigned int i=0; i<windows.size();i++){
in_comp.read(tampon, windows.size()*sizeof(char));
for(unsigned int j=0;j<windows.size(); j++){
for(unsigned int i=0; i<sort_clones.size();i++){
in_comp.read(tampon, sort_clones.size()*sizeof(char));
for(unsigned int j=0;j<sort_clones.size(); j++){
m[i][j]=tampon[j];
}
}
......@@ -98,8 +99,8 @@ void comp_matrix::save(string file){
ofstream out_comp(file.c_str());
for(unsigned int i=0; i<windows.size();i++){
out_comp.write((char *)m[i],windows.size()*sizeof(char));
for(unsigned int i=0; i<sort_clones.size();i++){
out_comp.write((char *)m[i],sort_clones.size()*sizeof(char));
}
out_comp.close();
......@@ -107,7 +108,7 @@ void comp_matrix::save(string file){
void comp_matrix::del(){
for (unsigned int i=0;i<windows.size();i++){
for (unsigned int i=0;i<sort_clones.size();i++){
free(m[i]);
}
free(m);
......@@ -134,8 +135,8 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
int c2=0;
size_t i = 0;
for (mjs::const_iterator ite = windows.getMap().begin();
(ite != windows.getMap().end()) & (i<matrix_size); ++ite )
for (list <pair<junction,int> >::const_iterator ite = sort_clones.begin();
(ite != sort_clones.end()) & (i<matrix_size); ++ite)
{
i++;
n_j++;
......@@ -145,33 +146,32 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
}
i = 0;
for (mjs::const_iterator it0 = windows.getMap().begin();
(it0 != windows.getMap().end()) & (i<matrix_size); ++it0 )
{
for (list <pair<junction,int> >::const_iterator it0 = sort_clones.begin();
(it0 != sort_clones.end()) & (i<matrix_size); ++it0)
{
i++;
j1=it0->first;
int k=it0->second.size();
int k=it0->second;
count[j1]=k;
n_j2+=k;
size_t j = 0;
for (mjs::const_iterator it1 = windows.getMap().begin();
(it1 != windows.getMap().end()) & (j<matrix_size); ++it1 )
for (list <pair<junction,int> >::const_iterator it1 = sort_clones.begin();
(it1 != sort_clones.end()) & (j<matrix_size); ++it1)
{
j++;
j2=it1->first;
int distance = (int)m[c2][c1];
if (distance <= epsilon){
neighbor[j1].push_back(j2);
}
c1++;
c++;
}//fin it1
c2++;
c1=0;
}//fin it0
j++;
j2=it1->first;
int distance = (int)m[c2][c1];
if (distance <= epsilon){
neighbor[j1].push_back(j2);
}
c1++;
c++;
}//fin it1
c2++;
c1=0;
}//fin it0
/////////////////////////
//Forced - edges
......@@ -227,8 +227,8 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
int noise = 0;
int nb_comp = 0 ;
i = 0;
for (mjs::const_iterator it0 = windows.getMap().begin();
(it0 != windows.getMap().end()) & (i<matrix_size); ++it0 )
for (list <pair<junction,int> >::const_iterator it0 = sort_clones.begin();
(it0 != sort_clones.end()) & (i<matrix_size); ++it0)
{
i++;
j1=it0->first;
......
......@@ -14,13 +14,13 @@
using namespace std ;
#define SIMILAR_JUNCTIONS_THRESHOLD 1
#define JUNCTION_LIMIT 500
#define JUNCTION_LIMIT 200
class comp_matrix {
public:
char ** m;
WindowsStorage &windows;
list<pair <junction, int> > sort_clones;
map <string, int> count;
int n_j;
int n_j2;
......@@ -29,7 +29,7 @@ class comp_matrix {
/**
* create new distance matrix
*/
comp_matrix(WindowsStorage &windows);
comp_matrix(list<pair <junction, int> > sc);
/**
* init matrix with a KmerStore and compute distance value between sequences
......
......@@ -909,12 +909,16 @@ int main (int argc, char **argv)
cout << " ! No windows with current parameters." << endl;
}
//////////////////////////////////
//$$ Clustering
windowsStorage->sort();
list<pair <junction, int> > sort_clones = windowsStorage->getSortedList();
cout << " ==> " << sort_clones.size() << " clones" << endl ;
list <list <junction> > clones_windows;
comp_matrix comp=comp_matrix(*windowsStorage);
comp_matrix comp=comp_matrix(sort_clones);
if (command == CMD_CLONES) {
//////////////////////////////////
//$$ Clustering
if (epsilon || forced_edges.size())
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment