Commit 4a11da92 authored by Mathieu Giraud's avatar Mathieu Giraud
Browse files

merge -- some refactorizations in fuse.py, new diff.py

Merge branch 'rework_data' of git+ssh://scm.gforge.inria.fr//gitroot//vidjil/vidjil into rework_data
parents 5748a1bf 54a565cc
......@@ -24,7 +24,12 @@
return lh.second>rh.second;
}
comp_matrix::comp_matrix(WindowsStorage &ws):windows(ws){}
comp_matrix::comp_matrix(list<pair <junction, int> > sc)
{
sort_clones = sc;
matrix_size = sort_clones.size();
if (matrix_size > JUNCTION_LIMIT) matrix_size = JUNCTION_LIMIT;
}
void comp_matrix::compare(ostream &out, Cost cluster_cost)
{
......@@ -37,30 +42,33 @@ void comp_matrix::compare(ostream &out, Cost cluster_cost)
out << " Using cost " << compareCost << endl ;
string j1, j2;
m = alloc_matrix(windows.size());
m = alloc_matrix(sort_clones.size());
int c=0;
int c1=0;
int c2=0;
for (mjs::const_iterator it0 = windows.getMap().begin();
it0 != windows.getMap().end(); ++it0 )
size_t i = 0;
for (list <pair<junction,int> >::const_iterator it0 = sort_clones.begin();
(it0 != sort_clones.end()) & (i<matrix_size); ++it0)
{
i++;
j1=it0->first;
for (mjs::const_iterator it1 = it0;
it1 != windows.getMap().end(); ++it1 )
{
j2=it1->first;
DynProg dp = DynProg(j1, j2, DynProg::Local, compareCost);
int score=dp.compute();
int distance = max(j1.size(), j2.size())-score;
m[c2][c1]=distance;
m[c1][c2]=distance;
c1++;
c++;
}//fin it1
size_t k = 0;
for (list <pair<junction,int> >::const_iterator it1 = it0;
(it1 != sort_clones.end()) & (k<matrix_size); ++it1)
{
k++;
j2=it1->first;
DynProg dp = DynProg(j1, j2, DynProg::Local, compareCost);
int score=dp.compute();
int distance = max(j1.size(), j2.size())-score;
m[c2][c1]=distance;
m[c1][c2]=distance;
c1++;
c++;
}//fin it1
c2++;
c1=c2;
}//fin it0
......@@ -69,14 +77,15 @@ void comp_matrix::compare(ostream &out, Cost cluster_cost)
}
void comp_matrix::load(string file){
m = alloc_matrix(windows.size());
char* tampon=(char*)malloc(windows.size()*sizeof(char));
m = alloc_matrix(sort_clones.size());
char* tampon=(char*)malloc(sort_clones.size()*sizeof(char));
ifstream in_comp(file.c_str());
for(unsigned int i=0; i<windows.size();i++){
in_comp.read(tampon, windows.size()*sizeof(char));
for(unsigned int j=0;j<windows.size(); j++){
for(unsigned int i=0; i<sort_clones.size();i++){
in_comp.read(tampon, sort_clones.size()*sizeof(char));
for(unsigned int j=0;j<sort_clones.size(); j++){
m[i][j]=tampon[j];
}
}
......@@ -90,8 +99,8 @@ void comp_matrix::save(string file){
ofstream out_comp(file.c_str());
for(unsigned int i=0; i<windows.size();i++){
out_comp.write((char *)m[i],windows.size()*sizeof(char));
for(unsigned int i=0; i<sort_clones.size();i++){
out_comp.write((char *)m[i],sort_clones.size()*sizeof(char));
}
out_comp.close();
......@@ -99,7 +108,7 @@ void comp_matrix::save(string file){
void comp_matrix::del(){
for (unsigned int i=0;i<windows.size();i++){
for (unsigned int i=0;i<sort_clones.size();i++){
free(m[i]);
}
free(m);
......@@ -125,40 +134,44 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
int c1=0;
int c2=0;
for (mjs::const_iterator ite = windows.getMap().begin();
ite != windows.getMap().end(); ++ite )
size_t i = 0;
for (list <pair<junction,int> >::const_iterator ite = sort_clones.begin();
(ite != sort_clones.end()) & (i<matrix_size); ++ite)
{
i++;
n_j++;
list <string> voisins ;
j1=ite->first;
neighbor[j1]=voisins;
}
for (mjs::const_iterator it0 = windows.getMap().begin();
it0 != windows.getMap().end(); ++it0 )
{
i = 0;
for (list <pair<junction,int> >::const_iterator it0 = sort_clones.begin();
(it0 != sort_clones.end()) & (i<matrix_size); ++it0)
{
i++;
j1=it0->first;
int k=it0->second.size();
int k=it0->second;
count[j1]=k;
n_j2+=k;
for (mjs::const_iterator it1 = windows.getMap().begin();
it1 != windows.getMap().end(); ++it1 )
size_t j = 0;
for (list <pair<junction,int> >::const_iterator it1 = sort_clones.begin();
(it1 != sort_clones.end()) & (j<matrix_size); ++it1)
{
j2=it1->first;
int distance = (int)m[c2][c1];
if (distance <= epsilon){
neighbor[j1].push_back(j2);
}
c1++;
c++;
}//fin it1
c2++;
c1=0;
}//fin it0
j++;
j2=it1->first;
int distance = (int)m[c2][c1];
if (distance <= epsilon){
neighbor[j1].push_back(j2);
}
c1++;
c++;
}//fin it1
c2++;
c1=0;
}//fin it0
/////////////////////////
//Forced - edges
......@@ -213,9 +226,11 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
int noise = 0;
int nb_comp = 0 ;
for (mjs::const_iterator it0 = windows.getMap().begin();
it0 != windows.getMap().end(); ++it0 )
i = 0;
for (list <pair<junction,int> >::const_iterator it0 = sort_clones.begin();
(it0 != sort_clones.end()) & (i<matrix_size); ++it0)
{
i++;
j1=it0->first;
if(visit[j1]==false && clust[j1]==false){
......@@ -382,6 +397,27 @@ void comp_matrix::stat_cluster( list<list<junction> > cluster, ostream &out )
}
JsonArray comp_matrix::toJson(list<list<junction> > clusterList)
{
JsonArray result;
for (list <list <string> >::iterator it = clusterList.begin();
it != clusterList.end(); ++it )
{
JsonArray cluster;
list<string> li=*it;
for (list<string>::iterator it2 = li.begin();
it2 != li.end(); ++it2 )
{
cluster.add(*it2);
}
result.add(cluster);
}
return result;
}
char **comp_matrix::alloc_matrix(size_t s) {
char **matrix = (char**)malloc(s*sizeof(char*));
for (size_t i=0;i<s;i++){
......
......@@ -9,24 +9,27 @@
#include <ctime>
#include "dynprog.h"
#include "windows.h"
#include "json.h"
using namespace std ;
#define SIMILAR_JUNCTIONS_THRESHOLD 1
#define JUNCTION_LIMIT 200
class comp_matrix {
public:
char ** m;
WindowsStorage &windows;
list<pair <junction, int> > sort_clones;
map <string, int> count;
int n_j;
int n_j2;
size_t matrix_size;
/**
* create new distance matrix
*/
comp_matrix(WindowsStorage &windows);
comp_matrix(list<pair <junction, int> > sc);
/**
* init matrix with a KmerStore and compute distance value between sequences
......@@ -65,6 +68,9 @@ class comp_matrix {
void del();
void stat_cluster( list<list<junction> > cluster, ostream &out=cout);
JsonArray toJson( list<list<junction> > cluster);
private:
......
......@@ -164,7 +164,7 @@ string JsonArray::toString(){
stream << " [ ";
for ( list<string>::iterator i=l.begin(); i!= l.end(); ++i){
if (i!=l.begin()) stream << ", ";
if (i!=l.begin()) stream << ", " << endl;
stream << (*i);
}
......
......@@ -727,25 +727,17 @@ JsonList FineSegmenter::toJsonList(Germline *germline){
result.add("name", code_short);
JsonList seg;
// TODO: what is going on if some list is smaller than JSON_REMEMBER_BEST ?
JsonArray jsonV;
for (int i=0; i<JSON_REMEMBER_BEST; i++) jsonV.add( germline->rep_5.label(score_V[i].second) ) ;
seg.add("5", jsonV);
seg.add("5", germline->rep_5.label(best_V));
seg.add("5start", 0);
seg.add("5end", Vend);
if (score_D.size()>0){
JsonArray jsonD;
for (int i=0; i<JSON_REMEMBER_BEST; i++) jsonD.add( germline->rep_4.label(score_D[i].second) ) ;
result.add("4", jsonD);
seg.add("4", germline->rep_4.label(best_D));
result.add("4start", Dstart);
result.add("4end", Dend);
}
JsonArray jsonJ;
for (int i=0; i<JSON_REMEMBER_BEST; i++) jsonJ.add( germline->rep_3.label(score_J[i].second) ) ;
seg.add("3", jsonJ);
seg.add("3", germline->rep_3.label(best_J));
seg.add("3start", Jstart);
result.add("seg", seg);
......
......@@ -909,13 +909,16 @@ int main (int argc, char **argv)
cout << " ! No windows with current parameters." << endl;
}
if (command == CMD_CLONES) {
//////////////////////////////////
//$$ Clustering
windowsStorage->sort();
list<pair <junction, int> > sort_clones = windowsStorage->getSortedList();
cout << " ==> " << sort_clones.size() << " clones" << endl ;
list <list <junction> > clones_windows;
comp_matrix comp=comp_matrix(*windowsStorage);
comp_matrix comp=comp_matrix(sort_clones);
if (command == CMD_CLONES) {
if (epsilon || forced_edges.size())
{
......@@ -953,10 +956,6 @@ int main (int argc, char **argv)
// || ((clone_nb_reads >= min_reads_clone)
// && (clone_nb_reads * 100.0 / nb_segmented >= ratio_reads_clone)))
windowsStorage->sort();
list<pair <junction, int> > sort_clones = windowsStorage->getSortedList();
cout << " ==> " << sort_clones.size() << " clones" << endl ;
if (sort_clones.size() == 0)
{
cout << " ! No clones with current parameters." << endl;
......@@ -1278,6 +1277,11 @@ int main (int argc, char **argv)
json->add("reads", *json_reads);
json->add("germline", germline_system);
json->add("clones", jsonSortedWindows);
if (epsilon || forced_edges.size()){
JsonArray json_clusters = comp.toJson(clones_windows);
json->add("clusters", json_clusters );
}
//Added edges in the json output file
//json->add("links", jsonLevenshtein);
......
......@@ -72,10 +72,10 @@ Axis.prototype = {
var clone = self.m.clone(cloneID)
if (typeof clone.seg != "undefined"
&& typeof clone.seg[type2] != "undefined"
&& typeof gene_list[clone.seg[type2][0].split("*")[0]] != "undefined")
&& typeof gene_list[clone.seg[type2].split("*")[0]] != "undefined")
{
var allele = clone.seg[type2][0]
var gene = clone.seg[type2][0].split("*")[0]
var allele = clone.seg[type2]
var gene = clone.seg[type2].split("*")[0]
var pos = ((gene_list[gene].rank+0.5)/(total_gene+1))
if (displayAllele){
......
......@@ -136,9 +136,9 @@ Clone.prototype = {
withAllele = typeof withAllele !== 'undefined' ? withAllele : true;
if (typeof (this.seg) != 'undefined' && typeof (this.seg["5"]) != 'undefined') {
if (withAllele) {
return this.seg["5"][0]
return this.seg["5"]
}else{
return this.seg["5"][0].split('*')[0];
return this.seg["5"].split('*')[0];
}
}
return "undefined V";
......@@ -148,9 +148,9 @@ Clone.prototype = {
withAllele = typeof withAllele !== 'undefined' ? withAllele : true;
if (typeof (this.seg) != 'undefined' && typeof (this.seg["4"]) != 'undefined') {
if (withAllele) {
return this.seg["4"][0]
return this.seg["4"]
}else{
return this.seg["4"][0].split('*')[0];
return this.seg["4"].split('*')[0];
}
}
return "undefined D";
......@@ -160,9 +160,9 @@ Clone.prototype = {
withAllele = typeof withAllele !== 'undefined' ? withAllele : true;
if (typeof (this.seg) != 'undefined' && typeof (this.seg["3"]) != 'undefined') {
if (withAllele) {
return this.seg["3"][0]
return this.seg["3"]
}else{
return this.seg["3"][0].split('*')[0];
return this.seg["3"].split('*')[0];
}
}
return "undefined J";
......@@ -172,9 +172,9 @@ Clone.prototype = {
withAllele = typeof withAllele !== 'undefined' ? withAllele : true;
if (typeof (this.seg) != 'undefined' && typeof (this.seg[type]) != 'undefined') {
if (withAllele) {
return this.seg[type][0];
return this.seg[type];
}else{
return this.seg[type][0].split('*')[0];
return this.seg[type].split('*')[0];
}
}
return "undefined";
......
......@@ -59,7 +59,7 @@ Germline.prototype = {
if (typeof this.m.clone(i).seg != "undefined" &&
typeof this.m.clone(i).seg[type2] != "undefined"
){
var gene=this.m.clone(i).seg[type2][0];
var gene=this.m.clone(i).seg[type2];
if (this.m.system != "multi" || this.m.clone(i).getSystem() == system){
if ( typeof this.allele[gene] != "undefined"){
g[gene] = this.allele[gene]
......
......@@ -66,7 +66,7 @@ Model.prototype = {
reset: function () {
this.analysis = {
clones: [],
cluster: [],
clusters: [],
date: []
};
this.t = 0;
......@@ -233,8 +233,9 @@ Model.prototype = {
//copy .data file in model
for (var key in data){
self[key] = data[key]
if (key != "clusters") self[key] = data[key]
}
this.data_clusters = data.clusters;
//filter clones
self.clones = [];
......@@ -581,23 +582,34 @@ Model.prototype = {
}
}
}
this.loadCluster(this.data_clusters)
this.loadCluster(analysis.clusters)
}
this.init()
},
loadCluster: function (clusters) {
for (var i = 0; i < clusters.length; i++) {
// CUSTOM CLUSTER
var clusters = analysis.clusters;
for (var i = 0; i < clusters.length; i++) {
var new_cluster = [];
var l = this.mapID[clusters[i][0]]
for (var j=0; j<clusters[i].length;j++){
var new_cluster = [];
for (var j=0; j<clusters[i].length;j++){
if (typeof this.mapID[clusters[i][j]] != 'undefined'){
var cloneID = this.mapID[clusters[i][j]]
new_cluster = new_cluster.concat(this.clusters[cloneID]);
console.log(cloneID + " +++ "+new_cluster)
this.clusters[cloneID] = [];
}
}
if (new_cluster.length != 0){
var l = new_cluster[0]
for (var j=0; j<new_cluster.length;j++){
if (m.clone(new_cluster[j]).top < m.clone(l).top) l = new_cluster[j]
}
this.clusters[l] = new_cluster;
}
}
this.init()
},
/*
......@@ -806,7 +818,7 @@ Model.prototype = {
this.norm = false
for (var i=0; i<this.samples.number; i++){
this.normalization.A[i] = this.getSize(cloneID, i)
this.normalization.A[i] = this.clone(cloneID).getSize(i)
}
this.norm = tmp
......
......@@ -173,15 +173,15 @@ In the .analysis file, this section is intended to describe some specific clones
// positions are related to the 'sequence'
// names of V/D/J genes should match the ones in files referenced in germline/germline.data
{
"5": [],
"5": "IGHV5*01",
"5start": 0,
"5end": 0,
"4": [],
"4": "IGHD1*01",
"4start": 0,
"4end": 0,
"3": [],
"3": "IGHJ3*02",
"3start": 0,
"3end": 0,
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment