Commit 99e34ce5 authored by Marc Duez's avatar Marc Duez
Browse files
parents 71545cd2 fe03b6d1
......@@ -3,13 +3,50 @@
#include <stdio.h>
#include <string.h>
#include <cstdlib>
#include <stdlib.h>
#include "../core/dynprog.h"
#include "../core/fasta.h"
#include "../core/lazy_msa.h"
#include "../core/json.h"
using namespace std;
bool check_cgi_parameters(JsonList &result) {
char * requestMethod = getenv("REQUEST_METHOD");
if(!requestMethod) {
result.add("Error", "requestMethod");
return false;
} else if(strcmp(requestMethod, "POST") != 0) {
result.add("Error", "requestMethod =/= post");
return false;
}
char * contentType = getenv("CONTENT_TYPE");
if(!contentType) {
result.add("Error", "content_type");
return false;
}
result.add("requestMethod",requestMethod);
result.add("contentType", contentType);
return true;
}
bool create_fasta_file(JsonList &result, int fd) {
char temp[1024];
FILE *f;
f = fdopen(fd,"w");
if(f == NULL){
result.add("Error", "opening tempfile");
return false;
}else{
while(cin) {
cin.getline(temp, 1024);
fputs(temp, f);
fputs("\n", f);
}
}
fclose(f);
return true;
}
int main(int argc, char* argv[])
{
......@@ -18,52 +55,26 @@ int main(int argc, char* argv[])
ostringstream ost;
ostream * p;
p=&ost;
char filename[L_tmpnam];
tmpnam(filename);
char filename[] = "VidjilAlignXXXXXX";
JsonList result;
bool error = false;
bool cgi_mode;
if (argc <= 1){
cgi_mode=true;
cout <<"Content-type: text/html"<<endl<<endl;
cout<< "{"<<endl;
char * requestMethod = getenv("REQUEST_METHOD");
cout<<"\"requestMethod\" : \""<<requestMethod<<"\""<<endl;
if(!requestMethod) {
cout<<",\"Error\": \"requestMethod\"}"<<endl;
return 0;
}
if(strcmp(requestMethod, "POST") != 0) {
cout<<",\"Error\": \"requestMethod =/= post\"}"<<endl;
return 0;
}
char * contentType = getenv("CONTENT_TYPE");
if(!contentType) {
cout<<",\"Error\": \"content_type\"}"<<endl;
return 0;
}
cout<<",\"contentType\": \""<<contentType<<"\""<<endl;
char temp[1024];
FILE *f;
f = fopen(filename,"w");
if(f == NULL){
cout<<",\"Error\": \"save\""<<filename<<"}"<<endl;
return 0;
}else{
while(cin) {
cin.getline(temp, 1024);
fputs(temp, f);
fputs("\n", f);
}
error = ! check_cgi_parameters(result);
if (! error) {
int fd = mkstemp(filename);
if (fd == -1) {
result.add("Error", "Temporary file");
error = true;
} else {
error = ! create_fasta_file(result, fd);
fdata = filename;
}
}
fclose(f);
fdata = filename;
}else{
cgi_mode=false;
fdata = argv[1];
......@@ -83,28 +94,33 @@ int main(int argc, char* argv[])
lm.add(seq1);
}
string *result;
result =new string[lm.sizeUsed+2];
lm.align(result);
string *align_str;
align_str =new string[lm.sizeUsed+2];
lm.align(align_str);
if (cgi_mode){
cout<<",\"seq\" : [\""<<result[0]<<"\""<<endl;
for (int i=1; i<lm.sizeUsed+2; i++){
cout<<",\""<<result[i]<<"\""<<endl;
}
cout<<"]}";
if (cgi_mode) {
if (! error) {
JsonArray alignment;
for (int i=0; i<lm.sizeUsed+2; i++){
alignment.add(align_str[i]);
}
result.add("seq", alignment);
}
remove(filename);
cout <<"Content-type: text/html"<<endl<<endl;
cout << result.toString();
}else{
int length=60;
int n=result[0].size();
int n=align_str[0].size();
int j=0;
while (n > 0){
for (int i=0; i<lm.sizeUsed+2; i++){
cout<<" > "<<result[i].substr(j*length,length)<<" < "<<fa.label(i)<<endl;
cout<<" > "<<align_str[i].substr(j*length,length)<<" < "<<fa.label(i)<<endl;
}
cout<<endl;
n=n-length;
......
......@@ -25,36 +25,49 @@
#include <stdexcept>
#include "fasta.h"
Fasta::Fasta(){
void Fasta::init(int extract_field, string extract_separator)
{
this -> extract_field = extract_field ;
this -> extract_separator = extract_separator ;
total_size = 0;
}
Fasta::Fasta(int extract_field, string extract_separator)
{
init(extract_field, extract_separator);
}
Fasta::Fasta(const string &input,
int extract_field, string extract_separator,
ostream &out)
{
init(extract_field, extract_separator);
if (!input.size()) // Do not open empty files (D germline if not segmentD)
return ;
// oout = out;
this -> extract_field = extract_field ;
this -> extract_separator = extract_separator ;
out << " <== " << input ;
add(input);
}
ifstream is(input.c_str());
void Fasta::add(istream &in) {
in >> *this;
cout << "\t" << setw(6) << total_size << " bp in " << setw(3) << size() << " sequences" << endl ;
}
void Fasta::add(const string &filename) {
ifstream is(filename.c_str());
if (is.fail())
{
out << " !! Error in opening file: " << input << endl ;
cerr << " !! Error in opening file: " << filename << endl ;
exit(1);
}
out << " <== " << input ;
while (is.good()) {
is >> *this;
}
cout << " <== " << filename ;
add(is);
is.close();
out << "\t" << setw(6) << total_size << " bp in " << setw(3) << size() << " sequences" << endl ;
}
int Fasta::size() const{ return (int)reads.size(); }
......@@ -234,7 +247,6 @@ istream& operator>>(istream& in, Fasta& fasta){
string line;
Sequence read;
OnlineFasta of(in, fasta.extract_field, fasta.extract_separator);
fasta.total_size = 0 ;
while (of.hasNext()) {
of.next();
......
......@@ -29,6 +29,8 @@ typedef enum {
class Fasta
{
void init(int extract_field, string extract_separator);
int total_size;
int extract_field;
string extract_separator;
......@@ -37,7 +39,7 @@ class Fasta
// ostream *oout ;
public:
Fasta();
Fasta(int extract_field=0, string extract_separator="|");
/**
* Read all the sequences in the input filename and record them in the object.
*
......@@ -56,6 +58,9 @@ public:
const string& label_full(int index) const;
const Sequence &read(int index) const;
const string& sequence(int index) const;
void add(istream &in);
void add(const string &filename);
friend istream& operator>>(istream&, Fasta&);
};
......
#include "germline.h"
Germline::Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
int _delta_min, int _delta_max)
void Germline::init(string _code, char _shortcut,
int _delta_min, int _delta_max)
{
code = _code ;
shortcut = _shortcut ;
index = 0 ;
affect_5 = "V" ;
affect_4 = "";
affect_4 = "" ;
affect_3 = "J" ;
delta_min = _delta_min ;
delta_max = _delta_max ;
stats.setLabel(code);
}
Germline::Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
int _delta_min, int _delta_max)
{
init(_code, _shortcut, _delta_min, _delta_max);
f_reps_5.push_back(f_rep_5);
f_reps_4.push_back(f_rep_4);
f_reps_3.push_back(f_rep_3);
rep_5 = Fasta(f_rep_5, 2, "|", cout);
rep_4 = Fasta(f_rep_4, 2, "|", cout);
rep_3 = Fasta(f_rep_3, 2, "|", cout);
}
delta_min = _delta_min ;
delta_max = _delta_max ;
stats.setLabel(code);
Germline::Germline(string _code, char _shortcut,
list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
int _delta_min, int _delta_max)
{
init(_code, _shortcut, _delta_min, _delta_max);
f_reps_5 = _f_reps_5 ;
f_reps_4 = _f_reps_4 ;
f_reps_3 = _f_reps_3 ;
rep_5 = Fasta(2, "|") ;
rep_4 = Fasta(2, "|") ;
rep_3 = Fasta(2, "|") ;
for (list<string>::const_iterator it = f_reps_5.begin(); it != f_reps_5.end(); ++it)
rep_5.add(*it);
for (list<string>::const_iterator it = f_reps_4.begin(); it != f_reps_4.end(); ++it)
rep_4.add(*it);
for (list<string>::const_iterator it = f_reps_3.begin(); it != f_reps_3.end(); ++it)
rep_3.add(*it);
}
......@@ -28,24 +63,11 @@ Germline::Germline(string _code, char _shortcut,
Fasta _rep_5, Fasta _rep_4, Fasta _rep_3,
int _delta_min, int _delta_max)
{
code = _code ;
shortcut = _shortcut ;
index = 0 ;
// affect_5 = KmerAffect("", "V", 0) ;
// affect_3 = KmerAffect("", "J", 0) ;
affect_5 = "V" ;
affect_4 = "" ;
affect_3 = "J" ;
init(_code, _shortcut, _delta_min, _delta_max);
rep_5 = _rep_5 ;
rep_4 = _rep_4 ;
rep_3 = _rep_3 ;
delta_min = _delta_min ;
delta_max = _delta_max ;
stats.setLabel(code);
}
void Germline::new_index(string seed)
......@@ -95,30 +117,6 @@ MultiGermline::MultiGermline()
{
}
MultiGermline::MultiGermline(string f_germlines_json)
{
// Should parse 'data/germlines.data'
string f_rep_5 = "germline/TRGV.fa";
string f_rep_4 = "";
string f_rep_3 = "germline/TRGJ.fa";
string seed = "#####-#####";
int delta_min = -10 ;
int delta_max = 20 ;
Fasta rep_5(f_rep_5, 2, "|", cout);
Fasta rep_4(f_rep_4, 2, "|", cout);
Fasta rep_3(f_rep_3, 2, "|", cout);
Germline *germline;
germline = new Germline("TRG", 'G',
rep_5, rep_4, rep_3,
delta_min, delta_max);
germlines.push_back(germline);
}
MultiGermline::~MultiGermline() {
for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
{
......@@ -133,6 +131,7 @@ void MultiGermline::insert(Germline *germline)
void MultiGermline::build_default_set(string path)
{
// Should parse 'data/germlines.data'
Germline *germline;
germline = new Germline("TRG", 'G', path + "/TRGV.fa", "", path + "/TRGJ.fa", -10, 20);
......
......@@ -12,6 +12,9 @@ using namespace std;
class Germline {
private:
void init(string _code, char _shortcut,
int _delta_min, int _delta_max);
void update_index();
public:
......@@ -24,6 +27,10 @@ class Germline {
* (left bound: end of V, right bound : start of J)
*/
Germline(string _code, char _shortcut,
list <string> f_rep_5, list <string> f_rep_4, list <string> f_rep_3,
int _delta_min, int _delta_max);
Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
int _delta_min, int _delta_max);
......@@ -40,6 +47,10 @@ class Germline {
void new_index(string seed);
void use_index(IKmerStore<KmerAffect> *index);
list <string> f_reps_5 ;
list <string> f_reps_4 ;
list <string> f_reps_3 ;
// KmerAffect affect_5 ;
// KmerAffect affect_3 ;
string affect_5 ;
......@@ -71,7 +82,6 @@ class MultiGermline {
IKmerStore<KmerAffect> *index;
MultiGermline();
MultiGermline(string f_germlines_json);
~MultiGermline();
void insert(Germline *germline);
......
......@@ -135,6 +135,14 @@ class ArrayKmerStore : public IKmerStore<T>
T* store;
int index(const seqtype& word) const;
/**
* Allocates memory for store
* @throws bad_alloc: when there is not enough memory or when the
* the total number of kmers cannot be stored in an
* unsigned int
*/
void init();
public:
/**
* @param bool: if true, will also index revcomp
......@@ -312,16 +320,12 @@ T& MapKmerStore<T>::operator[](seqtype& word){
// ArrayKmerStore
template <class T>
ArrayKmerStore<T>::ArrayKmerStore(int k, bool revcomp){
ArrayKmerStore<T>::ArrayKmerStore(int k, bool revcomp) {
this->seed = seed_contiguous(k);
this->k = k;
this->s = k;
this->revcomp_indexed = revcomp;
if ((size_t)(k << 1) >= sizeof(int) * 8)
throw std::bad_alloc();
store = new T[(unsigned int)1 << (k << 1)];
if (! store)
throw std::bad_alloc();
init();
}
......@@ -332,9 +336,14 @@ ArrayKmerStore<T>::ArrayKmerStore(string seed, bool revcomp){
this->k = k;
this->s = seed.size();
this->revcomp_indexed = revcomp;
if ((size_t)(k << 1) >= sizeof(int) * 8)
init();
}
template <class T>
void ArrayKmerStore<T>::init() {
if ((size_t)(this->k << 1) >= sizeof(int) * 8)
throw std::bad_alloc();
store = new T[(unsigned int)1 << (k << 1)];
store = new T[(unsigned int)1 << (this->k << 1)];
if (! store)
throw std::bad_alloc();
}
......
......@@ -16,24 +16,6 @@ Sequence RepresentativeComputer::getRepresentative() const{
return representative;
}
string RepresentativeComputer::getRequiredSequence() const {
return required;
}
list<Sequence>& RepresentativeComputer::getSequenceList() const{
return sequences;
}
size_t RepresentativeComputer::getMinCover() const {
return min_cover;
}
float RepresentativeComputer::getPercentCoverage() const {
return percent_cover;
}
bool RepresentativeComputer::getRevcomp() const {
return revcomp;
}
bool RepresentativeComputer::hasRepresentative() const{
return is_computed;
}
......@@ -71,14 +53,6 @@ string KmerRepresentativeComputer::getSeed() const{
return seed;
}
void KmerRepresentativeComputer::setSeed(string seed) {
this->seed = seed;
}
int KmerRepresentativeComputer::getStabilityLimit() const {
return stability_limit;
}
void KmerRepresentativeComputer::setStabilityLimit(int limit) {
stability_limit = limit;
}
......
......@@ -34,21 +34,6 @@ public:
*/
Sequence getRepresentative() const;
/**
* @return the sequence that must be contained in the representative sequence
* ie. getRepresentative().find(getRequiredSequence()) != string::npos
*/
string getRequiredSequence() const;
/**
* @return the input sequences we are working on
*/
list<Sequence>& getSequenceList() const;
size_t getMinCover() const;
float getPercentCoverage() const;
bool getRevcomp() const;
/**
* @return true iff compute() has been called and the criteria have been met.
*/
......@@ -111,13 +96,6 @@ public:
// Getters, setters
string getSeed() const;
/**
* Sets the length of the k-mer used for computing the representative
*/
void setSeed(string seed);
int getStabilityLimit() const;
/**
* @param limit: maximal number of iterations to be performed before reaching
* stability. If after limit number of iterations, the length
......
......@@ -10,22 +10,10 @@
WindowsStorage::WindowsStorage(map<string, string> &labels):windows_labels(labels) {}
map<junction, list<Sequence> > &WindowsStorage::getMap() {
return seqs_by_window;
}
list<pair <junction, int> > &WindowsStorage::getSortedList() {
return sort_all_windows;
}
size_t WindowsStorage::getNbReads(junction window) {
return seqs_by_window[window].size();
}
vector<int> WindowsStorage::getStatus(junction window) {
return status_by_window[window];
}
string WindowsStorage::getLabel(junction window) {
if (windows_labels.find(window) == windows_labels.end())
......@@ -93,10 +81,6 @@ void WindowsStorage::setIdToAll() {
}
}
int WindowsStorage::getId(junction window) {
return id_by_window[window];
}
void WindowsStorage::add(junction window, Sequence sequence, int status, Germline *germline) {
seqs_by_window[window].push_back(sequence);
if (status_by_window.find(window) == status_by_window.end() ) {
......
......@@ -38,20 +38,6 @@ class WindowsStorage {
*/
WindowsStorage(map<string, string> &labels);
/**
* Return the map storing the elements
*/
map<junction, list<Sequence> > &getMap();
/**
* @return the number of reads supporting a given window
*/
size_t getNbReads(junction window);
/**
* @return the segmented status of reads supporting a given window
*/
vector<int> getStatus(junction window);
Germline *getGermline(junction window);
JsonList statusToJson(junction window);
......
!LAUNCH: ../../vidjil -G ../../germline/IGH -d ../../data/Stanford_S22.fasta
!LOG: stanford.log
!LAUNCH: ../../vidjil -V ../../germline/IGHV.fa -D ../../germline/IGHD.fa -J ../../germline/IGHJ.fa -d -s \\\\#\\\\#\\\\#\\\\#\\\\#\\\\#-\\\\#\\\\#\\\\#\\\\#\\\\#\\\\# ../../data/Stanford_S22.fasta