Commit 0cde045b authored by Mathieu Giraud's avatar Mathieu Giraud
Browse files

merge: A brand new way to visualize clone similarity

Developed by @duez, using tsne.js by @karpathy (https://github.com/karpathy/tsnejs).
Loads the similarity matrix computed in C++ with a code started last year by @k0pernicus.
parents 5cbefd86 236f38df
......@@ -30,15 +30,18 @@ LIBCORE=core/vidjil.a lib/lib.a
BINDIR=../
CGIDIR=../browser/cgi/
TOOLDIR=tools/
VIDJIL=$(BINDIR)$(EXEC)
ALIGN_CGI=$(CGIDIR)align.cgi
SIMILARITY_CGI=$(CGIDIR)similarity.cgi
SIMILARITY_TOOL=$(TOOLDIR)similarity
CREATE_VERSION_GIT_H := $(shell test -x ./create-git-version-h.sh && ./create-git-version-h.sh)
.PHONY: all core lib clean forcedep
all: $(VIDJIL) $(ALIGN_CGI)
all: $(VIDJIL) $(ALIGN_CGI) $(SIMILARITY_CGI) $(SIMILARITY_TOOL)
###
......@@ -73,6 +76,15 @@ $(ALIGN_CGI): cgi/align.o $(LIBCORE)
mkdir -p $(CGIDIR)
$(MAKE) -C core OPTIM="$(OPTIM)"
$(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS) $(CXXFLAGS)
$(SIMILARITY_CGI): cgi/similarity.o $(LIBCORE)
make -C core OPTIM="$(OPTIM)"
$(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS) $(CXXFLAGS)
$(SIMILARITY_TOOL): cgi/similarity.o $(LIBCORE)
make -C core OPTIM="$(OPTIM)"
$(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS) $(CXXFLAGS)
###
debug:
......
......@@ -6,35 +6,36 @@
#include "../core/dynprog.h"
#include "../core/fasta.h"
#include "../core/lazy_msa.h"
#include "../core/json.h"
#include "../lib/json.hpp"
using namespace std;
using namespace std ;
using json = nlohmann::json;
bool check_cgi_parameters(JsonList &result) {
bool check_cgi_parameters(json &result) {
char * requestMethod = getenv("REQUEST_METHOD");
if(!requestMethod) {
result.add("Error", "requestMethod");
result["Error"] = "requestMethod";
return false;
} else if(strcmp(requestMethod, "POST") != 0) {
result.add("Error", "requestMethod =/= post");
result["Error"] = "requestMethod =/= post";
return false;
}
char * contentType = getenv("CONTENT_TYPE");
if(!contentType) {
result.add("Error", "content_type");
result["Error"] = "content_type";
return false;
}
result.add("requestMethod",requestMethod);
result.add("contentType", contentType);
result["requestMethod"] = requestMethod;
result["contentType"] = contentType;
return true;
}
bool create_fasta_file(JsonList &result, int fd) {
bool create_fasta_file(json &result, int fd) {
char temp[1024];
FILE *f;
f = fdopen(fd,"w");
if(f == NULL){
result.add("Error", "opening tempfile");
result["Error"] = "opening tempfile";
return false;
}else{
while(cin) {
......@@ -54,7 +55,7 @@ int main(int argc, char* argv[])
const char* fdata;
// ostringstream ost;
char filename[] = "/tmp/VidjilAlignXXXXXX";
JsonList result;
json result;
bool error = false;
bool cgi_mode;
......@@ -67,7 +68,7 @@ int main(int argc, char* argv[])
if (! error) {
int fd = mkstemp(filename);
if (fd == -1) {
result.add("Error", "Temporary file");
result["Error"] = "Temporary file";
error = true;
} else {
error = ! create_fasta_file(result, fd);
......@@ -101,16 +102,16 @@ int main(int argc, char* argv[])
if (cgi_mode) {
if (! error) {
JsonArray alignment;
json alignment;
for (int i=0; i<lm.sizeUsed+2; i++){
alignment.add(align_str[i]);
alignment.push_back(align_str[i]);
}
result.add("seq", alignment);
result["seq"] = alignment;
}
remove(filename);
cout << result.toString();
cout << result.dump(2);
}else{
int length=60;
int n=align_str[0].size();
......
#include <fstream>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <cstdlib>
#include "../core/dynprog.h"
#include "../core/fasta.h"
#include "../core/lazy_msa.h"
#include "../core/similarityMatrix.h"
#include "../core/compare-all.h"
#include "../lib/json.hpp"
using namespace std ;
using json = nlohmann::json;
bool check_cgi_parameters(json &result) {
char * requestMethod = getenv("REQUEST_METHOD");
if(!requestMethod) {
result["Error"] = "requestMethod";
return false;
} else if(strcmp(requestMethod, "POST") != 0) {
result["Error"] = "requestMethod =/= post";
return false;
}
char * contentType = getenv("CONTENT_TYPE");
if(!contentType) {
result["Error"] = "content_type";
return false;
}
result["requestMethod"] = requestMethod;
result["contentType"] = contentType;
return true;
}
bool create_fasta_file(json &result, int fd) {
char temp[1024];
FILE *f;
f = fdopen(fd,"w");
if(f == NULL){
result["Error"] = "opening tempfile";
return false;
}else{
while(cin) {
cin.getline(temp, 1024);
fputs(temp, f);
fputs("\n", f);
}
}
fclose(f);
return true;
}
int main(int argc, char* argv[])
{
const char* fdata;
// ostringstream ost;
char filename[] = "/tmp/VidjilSimilarityXXXXXX";
json result;
bool error = false;
bool cgi_mode;
bool output_json = false;
//CGI
if (argc <= 1){
cgi_mode=true;
output_json=true;
cout <<"Content-type: text/html"<<endl<<endl;
error = ! check_cgi_parameters(result);
if (! error) {
int fd = mkstemp(filename);
if (fd == -1) {
result["Error"] = "Temporary file";
error = true;
} else {
error = ! create_fasta_file(result, fd);
fdata = filename;
}
}
//command
}else{
cgi_mode=false;
//help !
if (strcmp(argv[1], "-h")==0){
cout << "usage: similarity [-h] [-j] file\n\n";
cout << "file : fasta file\n";
cout << "-h : help\n";
cout << "-j : json output\n";
return 0;
}
if (argc >= 3){
if (strcmp(argv[1], "-j")==0) {
output_json = true;
}
fdata = argv[2];
}else{
fdata = argv[1];
}
}
if (!cgi_mode) cout <<endl;
if (! error) {
Fasta fa(fdata, 1, " ", !output_json);
list<Sequence> reads;
reads = fa.getAll();
list<string> labels;
for (int i=0; i < reads.size(); i++) {
labels.push_back(fa.label(i));
}
SimilarityMatrix matrix = compare_all(reads, labels);
if (output_json) {
json j;
j << JsonOutputSimilarityMatrix(matrix);
cout << j.dump();
}else{
cout << RawOutputSimilarityMatrix(matrix, 90);
}
}
}
......@@ -165,20 +165,16 @@ ostream &operator<<(ostream &out, const RawOutputSimilarityMatrix &outputMatrix)
/*Export a similarity matrix, for the edit distance distribution & DBSCAN algorithm
*/
JsonArray &operator<<(JsonArray &out, const JsonOutputSimilarityMatrix &outputMatrix) {
json &operator<<(json &out, const JsonOutputSimilarityMatrix &outputMatrix) {
SimilarityMatrix &matrix = outputMatrix.matrix;
out = json::array();
for (int i = 0; i < matrix.size(); i++) {
out[i] = json::array();
for (int j = 0; j < matrix.size(); j++) {
if (i < j) {
//Creation of an edges objects array, which contains a source objet, a target object, and the length of the distance between them
JsonList lineEdge;
lineEdge.add("source", i);
lineEdge.add("target", j);
//100 - similarity -> distance
lineEdge.add("len", (100 - matrix(i,j)));
out.add(lineEdge);
}
out[i][j] = fabs(matrix(i,j));
}
}
return out;
......@@ -190,16 +186,12 @@ json &operator<<(json &out, const JsonOutputWindowsMatrix &outputMatrix) {
SimilarityMatrix &matrix = outputMatrix.matrix;
out = {
{"index", json::object()}, //index clone id > matrix row/column
{"matrix", json::array()} //2 dimensional array
};
out = json::array();
for (int i = 0; i < matrix.size(); i++) {
//out["index"][] =
out["matrix"][i] = json::array();
out[i] = json::array();
for (int j = 0; j < matrix.size(); j++) {
out["matrix"][i][j] = fabs(matrix(i,j));
out[i][j] = fabs(matrix(i,j));
}
}
return out;
......
......@@ -121,7 +121,7 @@ class JsonOutputWindowsMatrix: public OutputSimilarityMatrix {
ostream &operator<<(ostream &out, const RawOutputSimilarityMatrix &matrix);
ostream &operator<<(ostream &out, const HTMLOutputSimilarityMatrix &matrix);
JsonArray &operator<<(JsonArray &out, const JsonOutputSimilarityMatrix &matrix);
json &operator<<(json &out, const JsonOutputSimilarityMatrix &matrix);
json &operator<<(json &out, const JsonOutputWindowsMatrix &matrix);
#endif
......@@ -20,6 +20,7 @@ require(["jquery",
"underscore",
"rgbcolor",
"file",
"tsne",
"../compare",
"../menu",
"../dbscan",
......@@ -39,9 +40,9 @@ require(["jquery",
"../crossDomain",
"../pdf",
"../database",
"../stats",
"../shortcut",
"../export"
"../export",
"../similarity"
], function(){
if (typeof main == "undefined"){
require(["../main"]);
......
......@@ -143,9 +143,10 @@ Axis.prototype = {
* @param {boolean} percent - display label as percent ( value 1 => 100%)
* @param {boolean} use_log - use a logarithmic scale instead of a linear
* */
custom: function(fct, default_min, default_max, output, use_log){
custom: function(fct, default_min, default_max, output, use_log, display_label){
output = typeof output !== 'undefined' ? output : 'float';
use_log = typeof use_log !== 'undefined' ? use_log : false;
display_label = typeof display_label !== 'undefined' ? display_label : true;
var self = this;
this.fct = fct;
......@@ -159,15 +160,17 @@ Axis.prototype = {
var min = default_min;
var max = default_max;
if (typeof min === 'function') min = min();
if (typeof min === 'function') max = max();
if (typeof max === 'function') max = max();
for (var i in this.m.clones){
var tmp;
try{
tmp = this.fct(i);
}catch(e){}
}catch(e){
tmp = undefined;
}
if ( typeof tmp != "undefined"){
if ( typeof tmp != "undefined" && !isNaN(tmp)){
if ( tmp > max || typeof max == "undefined") max = tmp;
if ( tmp < min || typeof min == "undefined") min = tmp;
}
......@@ -240,7 +243,7 @@ Axis.prototype = {
}
}
this.computeCustomLabels(min, max, output, use_log)
this.computeCustomLabels(min, max, output, use_log, display_label)
},
/**
......@@ -251,13 +254,15 @@ Axis.prototype = {
* @param {boolean} percent - display label as percent ( value 1 => 100%)
* @param {boolean} use_log - use a logarithmic scale instead of a linear
* */
computeCustomLabels: function(min, max, output, use_log){
computeCustomLabels: function(min, max, output, use_log, display_label){
this.labels = [];
if ((output == "string") || (output == "string-sorted")) {
var key = Object.keys(this.values)
for (var i in key){
this.labels.push(this.label("line", this.values[key[i]], key[i]));
var text = key[i];
if (!display_label) text = "";
this.labels.push(this.label("line", this.values[key[i]], text));
}
}else{
if (use_log){
......@@ -265,6 +270,7 @@ Axis.prototype = {
for (var i = 0; i < 10; i++) {
var pos = this.sizeScale(h); // pos is possibly already reversed
var text = this.m.formatSize(h, false)
if (!display_label) text = "";
if (pos >= 0 && pos <= 1)
this.labels.push(this.label("line", pos, text));
h = h / 10;
......@@ -284,7 +290,7 @@ Axis.prototype = {
}
if (this.reverse) pos = 1 - pos;
if (!display_label) text = "";
this.labels.push(this.label("line", pos, text));
}
}
......
/* tsne.JS
* AUTHOR : Andrej Karpathy
* https://github.com/karpathy/tsnejs
* license : MIT
* */
// create main global object
var tsnejs = tsnejs || { REVISION: 'ALPHA' };
(function(global) {
"use strict";
// utility function
var assert = function(condition, message) {
if (!condition) { throw message || "Assertion failed"; }
}
// syntax sugar
var getopt = function(opt, field, defaultval) {
if(opt.hasOwnProperty(field)) {
return opt[field];
} else {
return defaultval;
}
}
// return 0 mean unit standard deviation random number
var return_v = false;
var v_val = 0.0;
var gaussRandom = function() {
if(return_v) {
return_v = false;
return v_val;
}
var u = 2*Math.random()-1;
var v = 2*Math.random()-1;
var r = u*u + v*v;
if(r == 0 || r > 1) return gaussRandom();
var c = Math.sqrt(-2*Math.log(r)/r);
v_val = v*c; // cache this for next function call for efficiency
return_v = true;
return u*c;
}
// return random normal number
var randn = function(mu, std){ return mu+gaussRandom()*std; }
// utilitity that creates contiguous vector of zeros of size n
var zeros = function(n) {
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
if(typeof ArrayBuffer === 'undefined') {
// lacking browser support
var arr = new Array(n);
for(var i=0;i<n;i++) { arr[i]= 0; }
return arr;
} else {
return new Float64Array(n); // typed arrays are faster
}
}
// utility that returns 2d array filled with random numbers
// or with value s, if provided
var randn2d = function(n,d,s) {
var uses = typeof s !== 'undefined';
var x = [];
for(var i=0;i<n;i++) {
var xhere = [];
for(var j=0;j<d;j++) {
if(uses) {
xhere.push(s);
} else {
xhere.push(randn(0.0, 1e-4));
}
}
x.push(xhere);
}
return x;
}
// compute L2 distance between two vectors
var L2 = function(x1, x2) {
var D = x1.length;
var d = 0;
for(var i=0;i<D;i++) {
var x1i = x1[i];
var x2i = x2[i];
d += (x1i-x2i)*(x1i-x2i);
}
return d;
}
// compute pairwise distance in all vectors in X
var xtod = function(X) {
var N = X.length;
var dist = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = L2(X[i], X[j]);
dist[i*N+j] = d;
dist[j*N+i] = d;
}
}
return dist;
}
// compute (p_{i|j} + p_{j|i})/(2n)
var d2p = function(D, perplexity, tol) {
var Nf = Math.sqrt(D.length); // this better be an integer
var N = Math.floor(Nf);
assert(N === Nf, "D should have square number of elements.");
var Htarget = Math.log(perplexity); // target entropy of distribution
var P = zeros(N * N); // temporary probability matrix
var prow = zeros(N); // a temporary storage compartment
for(var i=0;i<N;i++) {
var betamin = -Infinity;
var betamax = Infinity;
var beta = 1; // initial value of precision
var done = false;
var maxtries = 50;
// perform binary search to find a suitable precision beta
// so that the entropy of the distribution is appropriate
var num = 0;
while(!done) {
//debugger;
// compute entropy and kernel row with beta precision
var psum = 0.0;
for(var j=0;j<N;j++) {
var pj = Math.exp(- D[i*N+j] * beta);
if(i===j) { pj = 0; } // we dont care about diagonals
prow[j] = pj;
psum += pj;
}
// normalize p and compute entropy
var Hhere = 0.0;
for(var j=0;j<N;j++) {
var pj = prow[j] / psum;
prow[j] = pj;
if(pj > 1e-7) Hhere -= pj * Math.log(pj);
}
// adjust beta based on result
if(Hhere > Htarget) {
// entropy was too high (distribution too diffuse)
// so we need to increase the precision for more peaky distribution
betamin = beta; // move up the bounds
if(betamax === Infinity) { beta = beta * 2; }
else { beta = (beta + betamax) / 2; }
} else {
// converse case. make distrubtion less peaky
betamax = beta;
if(betamin === -Infinity) { beta = beta / 2; }
else { beta = (beta + betamin) / 2; }
}
// stopping conditions: too many tries or got a good precision
num++;
if(Math.abs(Hhere - Htarget) < tol) { done = true; }
if(num >= maxtries) { done = true; }
}
// console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
// copy over the final prow to P at row i
for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }
} // end loop over examples i
// symmetrize P and normalize it to sum to 1 over all ij
var Pout = zeros(N * N);
var N2 = N*2;
for(var i=0;i<N;i++) {
for(var j=0;j<N;j++) {
Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
}
}
return Pout;
}
// helper function
function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }
var tSNE = function(opt) {
var opt = opt || {};
this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
this.epsilon = getopt(opt, "epsilon", 10); // learning rate
this.iter = 0;