vidjil.cpp 40.8 KB
Newer Older
Mikaël Salson's avatar
Mikaël Salson committed
1 2
/*
  This file is part of "Vidjil" <http://bioinfo.lifl.fr/vidjil>
Mathieu Giraud's avatar
Mathieu Giraud committed
3 4
  Copyright (C) 2011, 2012, 2013, 2014 by Bonsai bioinformatics at LIFL (UMR CNRS 8022, Université Lille) and Inria Lille
  Contributors: Mathieu Giraud <mathieu.giraud@lifl.fr>, Mikaël Salson <mikael.salson@lifl.fr>, Marc Duez <marc.duez@lifl.fr>
Mikaël Salson's avatar
Mikaël Salson committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19

  "Vidjil" is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  "Vidjil" is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with "Vidjil". If not, see <http://www.gnu.org/licenses/>
*/

Mathieu Giraud's avatar
Mathieu Giraud committed
20
//$$ #include
Mikaël Salson's avatar
Mikaël Salson committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34

#include<algorithm>
#include<utility>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <cstring>
#include <time.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "core/tools.h"
WebTogz's avatar
WebTogz committed
35
#include "core/json.h"
36
#include "core/germline.h"
Mikaël Salson's avatar
Mikaël Salson committed
37 38 39
#include "core/kmerstore.h"
#include "core/fasta.h"
#include "core/segment.h"
Mathieu Giraud's avatar
Mathieu Giraud committed
40
#include "core/windows.h"
Mikaël Salson's avatar
Mikaël Salson committed
41 42 43 44 45 46 47 48
#include "core/cluster-junctions.h"
#include "core/dynprog.h"
#include "core/read_score.h"
#include "core/read_chooser.h"
#include "core/compare-all.h"
#include "core/teestream.h"
#include "core/mkdir.h"
#include "core/labels.h"
Mikaël Salson's avatar
Mikaël Salson committed
49
#include "core/list_utils.h"
Mathieu Giraud's avatar
Mathieu Giraud committed
50
#include "core/windowExtractor.h"
Mikaël Salson's avatar
Mikaël Salson committed
51 52 53 54 55 56 57 58

#include "vidjil.h"

// RELEASE_TAG may be defined in the "release.h" file.
// If RELEASE_TAG is undefined, the version will be the git hash.
// #define RELEASE_TAG  "2013.04"
#include "release.h"

59 60 61
// GIT_VERSION should be defined in "git-version.h", created by "create-git-version-h.sh", to be used outside of releases
#include "git-version.h"

Marc Duez's avatar
Marc Duez committed
62
#define VIDJIL_JSON_VERSION "2014.09"
Mikaël Salson's avatar
Mikaël Salson committed
63

Mathieu Giraud's avatar
Mathieu Giraud committed
64 65
//$$ #define (mainly default options)

66
#define DEFAULT_MULTIGERMLINE "germline"
Mathieu Giraud's avatar
Mathieu Giraud committed
67 68
#define DEFAULT_GERMLINE_SYSTEM "IGH" 
#define DEFAULT_V_REP  "./germline/IGHV.fa"
Mikaël Salson's avatar
Mikaël Salson committed
69
#define DEFAULT_D_REP  "./germline/IGHD.fa" 
Mathieu Giraud's avatar
Mathieu Giraud committed
70
#define DEFAULT_J_REP  "./germline/IGHJ.fa"
Mikaël Salson's avatar
Mikaël Salson committed
71

Mathieu Giraud's avatar
Mathieu Giraud committed
72
#define DEFAULT_READS  "./data/Stanford_S22.fasta"
Mathieu Giraud's avatar
Mathieu Giraud committed
73
#define MIN_READS_WINDOW 10
Mikaël Salson's avatar
Mikaël Salson committed
74
#define MIN_READS_CLONE 10
75
#define DEFAULT_MAX_REPRESENTATIVES 100
Mikaël Salson's avatar
Mikaël Salson committed
76
#define MAX_CLONES 20
77
#define RATIO_READS_CLONE 0.0
78
#define NO_LIMIT "all"
Mikaël Salson's avatar
Mikaël Salson committed
79

Mathieu Giraud's avatar
Mathieu Giraud committed
80
#define COMMAND_WINDOWS "windows"
81
#define COMMAND_CLONES "clones"
Mikaël Salson's avatar
Mikaël Salson committed
82
#define COMMAND_SEGMENT "segment"
83
#define COMMAND_GERMLINES "germlines"
Mikaël Salson's avatar
Mikaël Salson committed
84
 
85
enum { CMD_WINDOWS, CMD_CLONES, CMD_SEGMENT, CMD_GERMLINES } ;
Mikaël Salson's avatar
Mikaël Salson committed
86 87

#define OUT_DIR "./out/" 
88
#define CLONES_FILENAME ".vdj.fa"
Mikaël Salson's avatar
Mikaël Salson committed
89
#define CLONE_FILENAME "clone.fa-"
90 91 92 93 94 95
#define WINDOWS_FILENAME ".windows.fa"
#define SEGMENTED_FILENAME ".segmented.vdj.fa"
#define UNSEGMENTED_FILENAME ".unsegmented.fa"
#define EDGES_FILENAME ".edges"
#define COMP_FILENAME "comp.vidjil"
#define JSON_SUFFIX ".vidjil"
Mikaël Salson's avatar
Mikaël Salson committed
96 97 98

// "tests/data/leukemia.fa" 

99
#define DEFAULT_K      0
Mikaël Salson's avatar
Mikaël Salson committed
100
#define DEFAULT_W      40
Mikaël Salson's avatar
Mikaël Salson committed
101
#define DEFAULT_W_D    60
102
#define DEFAULT_SEED   ""
Mikaël Salson's avatar
Mikaël Salson committed
103 104

#define DEFAULT_DELTA_MIN  -10
Mathieu Giraud's avatar
Mathieu Giraud committed
105
#define DEFAULT_DELTA_MAX   20
Mikaël Salson's avatar
Mikaël Salson committed
106 107

#define DEFAULT_DELTA_MIN_D  0
108
#define DEFAULT_DELTA_MAX_D  80
Mikaël Salson's avatar
Mikaël Salson committed
109

Mikaël Salson's avatar
Mikaël Salson committed
110
#define DEFAULT_MAX_AUDITIONED 2000
Mathieu Giraud's avatar
Mathieu Giraud committed
111
#define DEFAULT_RATIO_REPRESENTATIVE 0.5
112
#define MIN_COVER_REPRESENTATIVE_RATIO_MIN_READS_CLONE 1.0
Mathieu Giraud's avatar
Mathieu Giraud committed
113

Mikaël Salson's avatar
Mikaël Salson committed
114 115 116 117 118 119
#define DEFAULT_EPSILON  0
#define DEFAULT_MINPTS   10

#define DEFAULT_CLUSTER_COST  Cluster
#define DEFAULT_SEGMENT_COST   VDJ

120
// warn
121
#define WARN_MAX_CLONES 100
122
#define WARN_PERCENT_SEGMENTED 40
123

Mikaël Salson's avatar
Mikaël Salson committed
124 125 126
// display
#define WIDTH_NB_READS 7
#define WIDTH_NB_CLONES 3
Mikaël Salson's avatar
Mikaël Salson committed
127
#define WIDTH_WINDOW_POS 14+WIDTH_NB_CLONES
Mikaël Salson's avatar
Mikaël Salson committed
128 129 130

using namespace std ;

Mathieu Giraud's avatar
Mathieu Giraud committed
131
//$$ options: usage
Mikaël Salson's avatar
Mikaël Salson committed
132

Mathieu Giraud's avatar
Mathieu Giraud committed
133
extern char *optarg;
Mikaël Salson's avatar
Mikaël Salson committed
134 135 136 137 138 139 140 141

extern int optind, optopt, opterr;

void usage(char *progname)
{
  cerr << "Usage: " << progname << " [options] <reads.fa>" << endl << endl;

  cerr << "Command selection" << endl
142 143
       << "  -c <command> \t" << COMMAND_WINDOWS << "  \t window extracting" << endl 
       << "  \t\t" << COMMAND_CLONES  << "  \t clone gathering (default)" << endl 
144
       << "  \t\t" << COMMAND_SEGMENT   << "  \t V(D)J segmentation (not recommended)" << endl
145
       << "  \t\t" << COMMAND_GERMLINES << "  \t discover all germlines" << endl
Mikaël Salson's avatar
Mikaël Salson committed
146 147 148 149 150 151 152
       << endl       

       << "Germline databases" << endl
       << "  -V <file>     V germline multi-fasta file" << endl
       << "  -D <file>     D germline multi-fasta file (automatically implies -d)" << endl
       << "  -J <file>     J germline multi-fasta file" << endl
       << "  -G <prefix>   prefix for V (D) and J repertoires (shortcut for -V <prefix>V.fa -D <prefix>D.fa -J <prefix>J.fa)" << endl
153
       << "  -g <path>     multiple germlines (experimental)" << endl
Mikaël Salson's avatar
Mikaël Salson committed
154 155
       << endl

Mathieu Giraud's avatar
Mathieu Giraud committed
156
       << "Window prediction" << endl
Mikaël Salson's avatar
Mikaël Salson committed
157
#ifndef NO_SPACED_SEEDS
158
       << "  (use either -s or -k option, but not both)" << endl
159 160
       << "  -s <string>   spaced seed used for the V/J affectation" << endl
       << "                (default: #####-#####, ######-######, #######-#######, depends on germline)" << endl
Mikaël Salson's avatar
Mikaël Salson committed
161
#endif
162
       << "  -k <int>      k-mer size used for the V/J affectation (default: 10, 12, 13, depends on germline)" << endl
163 164 165
#ifndef NO_SPACED_SEEDS
       << "                (using -k option is equivalent to set with -s a contiguous seed with only '#' characters)" << endl
#endif
Mathieu Giraud's avatar
Mathieu Giraud committed
166
       << "  -w <int>      w-mer size used for the length of the extracted window (default: " << DEFAULT_W << ")(default with -d: " << DEFAULT_W_D << ")" << endl
Mikaël Salson's avatar
Mikaël Salson committed
167 168
       << endl

Mathieu Giraud's avatar
Mathieu Giraud committed
169 170
       << "Window annotations" << endl
       << "  -l <file>     labels for some windows -- these windows will be kept even if some limits are not reached" << endl
Mikaël Salson's avatar
Mikaël Salson committed
171 172
       << endl

173
       << "Additional clustering (not output in .vidjil file and therefore not used in the browser)" << endl
174 175 176
       << "  -e <file>     manual clustering -- a file used to force some specific edges" << endl
       << "  -n <int>      maximum distance between neighbors for automatic clustering (default " << DEFAULT_EPSILON << "). No automatic clusterisation if =0." << endl
       << "  -N <int>      minimum required neighbors for automatic clustering (default " << DEFAULT_MINPTS << ")" << endl
Mikaël Salson's avatar
Mikaël Salson committed
177 178 179 180 181
       << "  -S            generate and save comparative matrix for clustering" << endl
       << "  -L            load comparative matrix for clustering" << endl
       << "  -C <string>   use custom Cost for automatic clustering : format \"match, subst, indels, homo, del_end\" (default "<<Cluster<<" )"<< endl
       << endl

182
       << "Limits to report a clone (or a window)" << endl
183
       << "  -r <nb>       minimal number of reads supporting a clone (default: " << MIN_READS_CLONE << ")" << endl
Mikaël Salson's avatar
Mikaël Salson committed
184
       << "  -% <ratio>    minimal percentage of reads supporting a clone (default: " << RATIO_READS_CLONE << ")" << endl
185 186
       << endl

187
       << "Limits to further analyze some clones" << endl
188 189 190
       << "  -y <nb>       maximal number of clones computed with a representative ('" << NO_LIMIT << "': no limit) (default: " << DEFAULT_MAX_REPRESENTATIVES << ")" << endl
       << "  -z <nb>       maximal number of clones to be segmented ('" << NO_LIMIT << "': no limit, do not use) (default: " << MAX_CLONES << ")" << endl
       << "  -A            reports and segments all clones (-r 0 -% 0 -y " << NO_LIMIT << " -z " << NO_LIMIT << "), to be used only on very small datasets" << endl
Mikaël Salson's avatar
Mikaël Salson committed
191 192
       << endl

Mathieu Giraud's avatar
Mathieu Giraud committed
193
       << "Fine segmentation options (second pass, see warning in doc/algo.org)" << endl
Mikaël Salson's avatar
Mikaël Salson committed
194 195 196 197 198 199
       << "  -d            segment into V(D)J components instead of VJ " << endl
       << "  -m <int>      minimal admissible delta between segmentation points (default: " << DEFAULT_DELTA_MIN << ") (default when -d is used: " << DEFAULT_DELTA_MIN_D << ")" << endl
       << "  -M <int>      maximal admissible delta between segmentation points (default: " << DEFAULT_DELTA_MAX << ") (default when -d is used: " << DEFAULT_DELTA_MAX_D << ")" << endl
       << "  -f <string>   use custom Cost for fine segmenter : format \"match, subst, indels, homo, del_end\" (default "<<VDJ<<" )"<< endl
       << endl

200
       << "Debug" << endl
201 202
       << "  -U            output segmented (in " << SEGMENTED_FILENAME << " file) sequences" << endl
       << "  -u            output unsegmented (in " << UNSEGMENTED_FILENAME << " file) sequences" << endl
203
       << "                and display detailed k-mer affectation both on segmented and on unsegmented sequences" << endl
Mikaël Salson's avatar
Mikaël Salson committed
204 205
       << "Output" << endl
       << "  -o <dir>      output directory (default: " << OUT_DIR << ")" <<  endl
206
       << "  -b <string>   output basename (by default basename of the input file)" << endl
Mikaël Salson's avatar
Mikaël Salson committed
207
    
208
       << "  -a            output all sequences by cluster (" << CLONE_FILENAME << "*), to be used only on small datasets" << endl
209
       << "  -x            do not compute representative sequences" << endl
Mikaël Salson's avatar
Mikaël Salson committed
210 211 212
       << "  -v            verbose mode" << endl
       << endl        

213 214 215
       << "The full help is available in the doc/algo.org file."
       << endl

Mikaël Salson's avatar
Mikaël Salson committed
216
       << endl 
Mathieu Giraud's avatar
Mathieu Giraud committed
217
       << "Examples (see doc/algo.org)" << endl
218
       << "  " << progname << " -c clones   -G germline/IGH  -r 5       -d data/Stanford_S22.fasta" << endl
219
       << "  " << progname << " -c segment  -G germline/IGH             -d data/Stanford_S22.fasta   # (only for testing)" << endl
220
       << "  " << progname << " -c germlines                               data/Stanford_S22.fasta" << endl
Mikaël Salson's avatar
Mikaël Salson committed
221 222 223 224 225 226
    ;
  exit(1);
}

int main (int argc, char **argv)
{
227
  cout << "# Vidjil -- V(D)J recombinations analysis <http://www.vidjil.org/>" << endl
Mathieu Giraud's avatar
Mathieu Giraud committed
228 229
       << "# Copyright (C) 2011, 2012, 2013, 2014 by the Vidjil team" << endl
       << "# Bonsai bioinformatics at LIFL (UMR CNRS 8022, Université Lille) and Inria Lille" << endl 
Mathieu Giraud's avatar
Mathieu Giraud committed
230 231 232 233 234
       << endl
       << "# Vidjil is free software, and you are welcome to redistribute it" << endl
       << "# under certain conditions -- see http://git.vidjil.org/blob/master/doc/LICENSE" << endl
       << "# No lymphocyte was harmed in the making of this software," << endl
       << "# however this software is for research use only and comes with no warranty." << endl
Mikaël Salson's avatar
Mikaël Salson committed
235 236
       << endl ;

Mathieu Giraud's avatar
Mathieu Giraud committed
237 238
  //$$ options: defaults

Mikaël Salson's avatar
Mikaël Salson committed
239
  string germline_system = DEFAULT_GERMLINE_SYSTEM ;
Mikaël Salson's avatar
Mikaël Salson committed
240 241 242 243 244
  string f_rep_V = DEFAULT_V_REP ;
  string f_rep_D = DEFAULT_D_REP ;
  string f_rep_J = DEFAULT_J_REP ;
  string f_reads = DEFAULT_READS ;
  string seed = DEFAULT_SEED ;
245
  string f_basename = "";
Mikaël Salson's avatar
Mikaël Salson committed
246 247 248 249 250 251

  string out_dir = OUT_DIR;
  
  string comp_filename = COMP_FILENAME;

  int k = DEFAULT_K ;
Mikaël Salson's avatar
Mikaël Salson committed
252 253 254
  int w = 0 ;
  int default_w = DEFAULT_W ;

Mikaël Salson's avatar
Mikaël Salson committed
255 256 257 258 259 260 261 262 263 264 265
  int epsilon = DEFAULT_EPSILON ;
  int minPts = DEFAULT_MINPTS ;
  Cost cluster_cost = DEFAULT_CLUSTER_COST ;
  Cost segment_cost = DEFAULT_SEGMENT_COST ;
  
  
  int save_comp = 0;
  int load_comp = 0;
  int segment_D = 0;
  
  int verbose = 0 ;
266
  int command = CMD_CLONES;
Mikaël Salson's avatar
Mikaël Salson committed
267

268
  int max_representatives = DEFAULT_MAX_REPRESENTATIVES ;
Mikaël Salson's avatar
Mikaël Salson committed
269
  int max_clones = MAX_CLONES ;
Mikaël Salson's avatar
Mikaël Salson committed
270 271 272 273
  int min_reads_clone = MIN_READS_CLONE ;
  float ratio_reads_clone = RATIO_READS_CLONE;
  // int average_deletion = 4;     // Average number of deletion in V or J

Mathieu Giraud's avatar
Mathieu Giraud committed
274
  float ratio_representative = DEFAULT_RATIO_REPRESENTATIVE;
Mikaël Salson's avatar
Mikaël Salson committed
275
  unsigned int max_auditionned = DEFAULT_MAX_AUDITIONED;
Mathieu Giraud's avatar
Mathieu Giraud committed
276

Mikaël Salson's avatar
Mikaël Salson committed
277 278 279 280 281
  // Admissible delta between left and right segmentation points
  int delta_min = DEFAULT_DELTA_MIN ; // Kmer+Fine
  int delta_max = DEFAULT_DELTA_MAX ; // Fine

  bool output_sequences_by_cluster = false;
282
  bool output_segmented = false;
Mathieu Giraud's avatar
Mathieu Giraud committed
283
  bool output_unsegmented = false;
284
  bool multi_germline = false;
285
  string multi_germline_file = DEFAULT_MULTIGERMLINE;
Mikaël Salson's avatar
Mikaël Salson committed
286 287 288

  string forced_edges = "" ;

Mathieu Giraud's avatar
Mathieu Giraud committed
289
  string windows_labels_file = "" ;
Mikaël Salson's avatar
Mikaël Salson committed
290 291 292

  char c ;

293 294
  int options_s_k = 0 ;

WebTogz's avatar
WebTogz committed
295 296 297
  //JsonArray which contains the Levenshtein distances
  JsonArray jsonLevenshtein;

Mathieu Giraud's avatar
Mathieu Giraud committed
298 299
  //$$ options: getopt

300
  while ((c = getopt(argc, argv, "Ahag:G:V:D:J:k:r:vw:e:C:t:l:dc:m:M:N:s:b:Sn:o:L%:y:z:uU")) != EOF)
Mikaël Salson's avatar
Mikaël Salson committed
301 302 303 304 305 306 307 308 309 310 311 312

    switch (c)
      {
      case 'h':
        usage(argv[0]);
      case 'a':
	output_sequences_by_cluster = true;
	break;
      case 'd':
	segment_D = 1 ;
	delta_min = DEFAULT_DELTA_MIN_D ;
	delta_max = DEFAULT_DELTA_MAX_D ;
Mikaël Salson's avatar
Mikaël Salson committed
313
	default_w = DEFAULT_W_D ;
Mikaël Salson's avatar
Mikaël Salson committed
314 315 316 317 318
	break;
      case 'e':
	forced_edges = optarg;
	break;
      case 'l':
Mathieu Giraud's avatar
Mathieu Giraud committed
319
	windows_labels_file = optarg; 
Mikaël Salson's avatar
Mikaël Salson committed
320
	break;
321

Mikaël Salson's avatar
Mikaël Salson committed
322
      case 'c':
323 324
        if (!strcmp(COMMAND_CLONES,optarg))
          command = CMD_CLONES;
Mikaël Salson's avatar
Mikaël Salson committed
325 326
        else if (!strcmp(COMMAND_SEGMENT,optarg))
          command = CMD_SEGMENT;
Mathieu Giraud's avatar
Mathieu Giraud committed
327 328
        else if (!strcmp(COMMAND_WINDOWS,optarg))
          command = CMD_WINDOWS;
329 330
        else if (!strcmp(COMMAND_GERMLINES,optarg))
          command = CMD_GERMLINES;
Mikaël Salson's avatar
Mikaël Salson committed
331 332 333 334 335 336 337 338 339 340 341 342 343
        else {
          cerr << "Unknwown command " << optarg << endl;
	  usage(argv[0]);
        }
        break;
      case 'v':
	verbose += 1 ;
	break;

      // Germline

      case 'V':
	f_rep_V = optarg;
344
	germline_system = "custom" ;
Mikaël Salson's avatar
Mikaël Salson committed
345 346 347 348 349 350 351 352 353
	break;

      case 'D':
	f_rep_D = optarg;
        segment_D = 1;
	break;
        
      case 'J':
	f_rep_J = optarg;
354
	germline_system = "custom" ;
Mikaël Salson's avatar
Mikaël Salson committed
355 356
	break;

357
      case 'g':
358 359
	multi_germline = true;
	multi_germline_file = string(optarg);
360 361
	break;
	
Mikaël Salson's avatar
Mikaël Salson committed
362
      case 'G':
Mikaël Salson's avatar
Mikaël Salson committed
363 364 365 366
	germline_system = string(optarg);
	f_rep_V = (germline_system + "V.fa").c_str() ;
	f_rep_D = (germline_system + "D.fa").c_str() ;
	f_rep_J = (germline_system + "J.fa").c_str() ;
Marc Duez's avatar
Marc Duez committed
367 368
    if (germline_system.find_last_of("/\\") != string::npos)
        germline_system.erase(0, germline_system.find_last_of("/\\")+1);
Mikaël Salson's avatar
Mikaël Salson committed
369 370 371 372 373 374 375 376
	// TODO: if VDJ, set segment_D // NO, bad idea, depends on naming convention
	break;

      // Algorithm

      case 'k':
	k = atoi(optarg);
	seed = seed_contiguous(k);
377
	options_s_k++ ;
Mikaël Salson's avatar
Mikaël Salson committed
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
        break;

      case 'w':
	w = atoi(optarg);
        break;

      case 'm':
	delta_min = atoi(optarg);
        break;

      case 'M':
	delta_max = atoi(optarg);
        break;

      case 'o':
        out_dir = optarg ;
        break;

396 397
      case 'b':
        f_basename = optarg;
Mikaël Salson's avatar
Mikaël Salson committed
398 399
        break;

400 401
      // Limits

Mikaël Salson's avatar
Mikaël Salson committed
402 403 404 405
      case '%':
	ratio_reads_clone = atof(optarg);
	break;

406
      case 'r':
Mikaël Salson's avatar
Mikaël Salson committed
407 408 409
	min_reads_clone = atoi(optarg);
        break;

410
      case 'y':
411 412 413 414 415
	if (!strcmp(NO_LIMIT, optarg))
	  {
	    max_representatives = -1;
	    break;
	  }
416 417 418
	max_representatives = atoi(optarg);
        break;

Mikaël Salson's avatar
Mikaël Salson committed
419
      case 'z':
420 421 422 423 424
	if (!strcmp(NO_LIMIT, optarg))
	  {
	    max_clones = -1;
	    break;
	  }
Mikaël Salson's avatar
Mikaël Salson committed
425 426
	max_clones = atoi(optarg);
        break;
427 428 429

      case 'A': // --all
	ratio_reads_clone = 0 ;
430 431 432
	min_reads_clone = 1 ;
	max_representatives = -1 ;
	max_clones = -1 ;
433
	break ;
434
    
435 436
      // Seeds

Mikaël Salson's avatar
Mikaël Salson committed
437 438 439 440
      case 's':
#ifndef NO_SPACED_SEEDS
	seed = string(optarg);
	k = seed_weight(seed);
441
	options_s_k++ ;
Mikaël Salson's avatar
Mikaël Salson committed
442
#else
Mathieu Giraud's avatar
Mathieu Giraud committed
443
        cerr << "To enable the option -s, please compile without NO_SPACED_SEEDS" << endl;
Mikaël Salson's avatar
Mikaël Salson committed
444 445 446
#endif
        break;
	
447
      // Clustering
Mikaël Salson's avatar
Mikaël Salson committed
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
	
      case 'n':
	epsilon = atoi(optarg);
        break;

      case 'N':
	minPts = atoi(optarg);
        break;
	
      case 'S':
	save_comp=1;
        break;

      case 'L':
	load_comp=1;
        break;
	
      case 'C':
	cluster_cost=strToCost(optarg, Cluster);
        break;
	
      case 't':
	segment_cost=strToCost(optarg, VDJ);
        break;
Mathieu Giraud's avatar
Mathieu Giraud committed
472 473 474 475

      case 'u':
        output_unsegmented = true;
        break;
476 477 478
      case 'U':
        output_segmented = true;
        break;
Mikaël Salson's avatar
Mikaël Salson committed
479 480
      }

Mikaël Salson's avatar
Mikaël Salson committed
481 482 483 484 485
  // If there was no -w option, then w is either DEFAULT_W or DEFAULT_W_D
  if (w == 0)
    w = default_w ;

  
486 487 488 489 490 491
  if (options_s_k > 1)
    {
      cout << "use at most one -s or -k option." << endl ;
      exit(1);
    }

Mikaël Salson's avatar
Mikaël Salson committed
492 493
  string out_seqdir = out_dir + "/seq/" ;

Mikaël Salson's avatar
Mikaël Salson committed
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
  if (verbose)
    cout << "# verbose " << verbose << endl ;

  if (optind == argc)
    {
      cout << "# using default sequence file: " << f_reads << endl ;
    }
  else if (optind+1 == argc)
    {
      f_reads = argv[optind] ; 
    }
  else
    {
      cout << "wrong number of arguments." << endl ;
      exit(1);
    }
Mathieu Giraud's avatar
Mathieu Giraud committed
510 511

  //$$ options: post-processing+display
Mathieu Giraud's avatar
Mathieu Giraud committed
512

513
  size_t min_cover_representative = (size_t) (MIN_COVER_REPRESENTATIVE_RATIO_MIN_READS_CLONE * min_reads_clone) ;
514

515 516 517
  if (!segment_D) // TODO: add other constructor to Fasta, and do not load rep_D in this case
    f_rep_D = "";

518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
  // Default seeds

#ifndef NO_SPACED_SEEDS
  if (k == DEFAULT_K)
    {
      if (germline_system.find("TRA") != string::npos)
	seed = "#######-######" ;

      else if ((germline_system.find("TRB") != string::npos)
	       || (germline_system.find("IGH") != string::npos))
	seed = "######-######" ; 
      else // TRD, TRG, IGK, IGL
	seed = "#####-#####" ; 

      k = seed_weight(seed);
    }
#else
  {
    cout << "Vidjil was compiled with NO_SPACED_SEEDS: please provide a -k option." << endl;
    exit(1) ;
  }
#endif
	  

Mathieu Giraud's avatar
Mathieu Giraud committed
542 543 544 545 546 547 548 549 550
#ifndef NO_SPACED_SEEDS
  // Check seed buffer  
  if (seed.size() >= MAX_SEED_SIZE)
    {
      cout << "Seed size is too large (MAX_SEED_SIZE). Aborting." << endl ;
      exit(1);
    }
#endif

Mikaël Salson's avatar
Mikaël Salson committed
551 552 553 554 555 556 557 558
  // Check that out_dir is an existing directory or creates it
  const char *out_cstr = out_dir.c_str();

  if (mkpath(out_cstr, 0755) == -1) {
    perror("Directory creation");
    exit(2);
  }

Mikaël Salson's avatar
Mikaël Salson committed
559 560 561 562 563 564
  const char *outseq_cstr = out_seqdir.c_str();
  if (mkpath(outseq_cstr, 0755) == -1) {
    perror("Directory creation");
    exit(2);
  }

565 566 567 568 569
  // Compute basename if not given as an option
  if (f_basename == "") {
    f_basename = extract_basename(f_reads);
  }

Mikaël Salson's avatar
Mikaël Salson committed
570 571 572
  out_dir += "/" ;

  /// Load labels ;
Mathieu Giraud's avatar
Mathieu Giraud committed
573
  map <string, string> windows_labels = load_map(windows_labels_file);
Mikaël Salson's avatar
Mikaël Salson committed
574 575

  switch(command) {
Mathieu Giraud's avatar
Mathieu Giraud committed
576
  case CMD_WINDOWS: cout << "Extracting windows" << endl; 
Mikaël Salson's avatar
Mikaël Salson committed
577
    break;
578
  case CMD_CLONES: cout << "Analysing clones" << endl; 
Mikaël Salson's avatar
Mikaël Salson committed
579 580 581
    break;
  case CMD_SEGMENT: cout << "Segmenting V(D)J" << endl;
    break;
582 583
  case CMD_GERMLINES: cout << "Discovering germlines" << endl;
    break;
Mikaël Salson's avatar
Mikaël Salson committed
584 585
  }

Mathieu Giraud's avatar
Mathieu Giraud committed
586
  cout << "Command line: ";
Mikaël Salson's avatar
Mikaël Salson committed
587
  for (int i=0; i < argc; i++) {
Mathieu Giraud's avatar
Mathieu Giraud committed
588
    cout << argv[i] << " ";
Mikaël Salson's avatar
Mikaël Salson committed
589
  }
Mathieu Giraud's avatar
Mathieu Giraud committed
590
  cout << endl;
Mikaël Salson's avatar
Mikaël Salson committed
591 592 593 594 595 596 597 598 599 600 601 602

  //////////////////////////////////
  // Display time and date
  time_t rawtime;
  struct tm *timeinfo;
  char time_buffer[80];

  time (&rawtime );
  timeinfo = localtime (&rawtime);

  strftime (time_buffer, 80,"%F %T", timeinfo);

Mathieu Giraud's avatar
Mathieu Giraud committed
603
  cout << "# " << time_buffer << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
604 605 606 607 608 609


  //////////////////////////////////
  // Display version information or git log

#ifdef RELEASE_TAG
Mathieu Giraud's avatar
Mathieu Giraud committed
610
  cout << "# version: vidjil " << RELEASE_TAG << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
611
#else
612
  cout << "# development version" << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
613 614
#endif

615 616 617
#ifdef GIT_VERSION
  cout << "# git: " << GIT_VERSION << endl ;
#endif
618

619

620 621
  //////////////////////////////////
  // Warning for non-optimal use
622

623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
  if (max_clones == -1 || max_clones > WARN_MAX_CLONES)
    {
      cout << endl
	   << "* WARNING: vidjil was run with '-A' option or with a large '-z' option" << endl ;
    }
  
  if (command == CMD_SEGMENT)
    {
      cout << endl
	   << "* WARNING: vidjil was run with '-c segment' option" << endl ;
    }
  
  if (max_clones == -1 || max_clones > WARN_MAX_CLONES || command == CMD_SEGMENT)
    {
      cout << "* Vidjil purpose is to extract very quickly windows overlapping the CDR3" << endl
	   << "* and to gather reads into clones (-c clones), and not to provide an accurate V(D)J segmentation." << endl
	   << "* The following segmentations are slow to compute and are provided only for convenience." << endl
	   << "* They should be checked with other softwares such as IgBlast, iHHMune-align or IMGT/V-QUEST." << endl 
	   << "* More information is provided in the 'doc/algo.org' file." << endl 
	   << endl ;
    }
644

645 646 647
  /////////////////////////////////////////
  //            LOAD GERMLINES           //
  /////////////////////////////////////////
648

649
  MultiGermline *multigermline = new MultiGermline();
650

651 652 653 654 655 656 657 658 659
  if (command == CMD_GERMLINES || command == CMD_CLONES || command == CMD_WINDOWS) 
    {
      cout << "Build Kmer indexes" << endl ;
    
      if (multi_germline)
	{
	  multigermline->build_default_set(multi_germline_file);
	}
      else if (command == CMD_GERMLINES)
660
	{
661 662
	  multigermline->load_standard_set(multi_germline_file);
	  multigermline->build_with_one_index(seed);
663
	}
664 665 666 667 668 669 670 671 672 673 674 675
      else
	{
	  // Custom germline
	  Fasta rep_V(f_rep_V, 2, "|", cout);
	  Fasta rep_D(f_rep_D, 2, "|", cout);
	  Fasta rep_J(f_rep_J, 2, "|", cout);
	  
	  Germline *germline;
	  germline = new Germline(germline_system, 'X',
				  rep_V, rep_D, rep_J, 
				  delta_min, delta_max);
	  germline->new_index(seed);
676

677 678 679
	  multigermline->insert(germline);
	}
    }
680

681 682 683 684 685
  //////////////////////////////://////////
  //         DISCOVER GERMLINES          //
  /////////////////////////////////////////
  if (command == CMD_GERMLINES)
    {
686
      map <char, int> stats_kmer, stats_max;
687
      IKmerStore<KmerAffect> *index = multigermline->index ;
688 689

      // Initialize statistics, with two additional categories
690 691
      index->labels.push_back(make_pair(KmerAffect::getAmbiguous(), AFFECT_AMBIGUOUS_SYMBOL));
      index->labels.push_back(make_pair(KmerAffect::getUnknown(), AFFECT_UNKNOWN_SYMBOL));
692 693
      
      for (list< pair <KmerAffect, string> >::const_iterator it = index->labels.begin(); it != index->labels.end(); ++it)
694
	{
695 696 697
	  char key = affect_char(it->first.affect) ;
	  stats_kmer[key] = 0 ;
	  stats_max[key] = 0 ;
698
	}
699

700

701 702 703 704 705 706 707 708 709 710 711
      // Open read file (copied frow below)

      OnlineFasta *reads;

      try {
	reads = new OnlineFasta(f_reads, 1, " ");
      } catch (const std::ios_base::failure e) {
	cout << "Error while reading reads file " << f_reads << ": " << e.what()
	     << endl;
	exit(1);
      }
712
      
713
      // init forbidden for .max()
714 715 716
      set<KmerAffect> forbidden;
      forbidden.insert(KmerAffect::getAmbiguous());
      forbidden.insert(KmerAffect::getUnknown());
717
      
718 719 720
      // Loop through all reads

      int nb_reads = 0 ;
721 722 723
      int total_length = 0 ;
      int s = index->getS();

724 725 726 727 728
      while (reads->hasNext())
	{
	  reads->next();
	  nb_reads++;
	  string seq = reads->getSequence().sequence;
729
	  total_length += seq.length() - s + 1;
730

731
	  KmerAffectAnalyser<KmerAffect> *kaa = new KmerAffectAnalyser<KmerAffect>(*index, seq);
732 733 734

	  for (int i = 0; i < kaa->count(); i++) 
	    { 
735 736
	      KmerAffect ksa = kaa->getAffectation(i);
	      stats_kmer[affect_char(ksa.affect)]++ ;
737
	    }
738

739
          delete kaa;
740

741
	  CountKmerAffectAnalyser<KmerAffect> ckaa(*index, seq);
742 743
	  ckaa.setAllowedOverlap(k-1);

744
	  stats_max[affect_char(ckaa.max(forbidden).affect)]++ ;
745

746 747
	}

748 749
      delete reads;

750 751
      // Display statistics

752 753 754 755
      cout << "  <== "
	   << nb_reads << " reads, "
	   << total_length << " kmers"
	   << endl ;
756
      cout << "\t" << " max" << "\t\t" << "        kmers" << "\n" ;
757 758

      for (list< pair <KmerAffect, string> >::const_iterator it = index->labels.begin(); it != index->labels.end(); ++it)
759
	{
760 761 762 763
	  char key = affect_char(it->first.affect) ;
	  
	  cout << setw(12) << stats_max[key] << " " ;
	  cout << setw(6) << fixed << setprecision(2) <<  (float) stats_max[key] / nb_reads * 100 << "%" ;
764

765
	  cout << "     " ;
766

767 768
	  cout << setw(12) << stats_kmer[key] << " " ;
	  cout << setw(6) << fixed << setprecision(2) <<  (float) stats_kmer[key] / total_length * 100 << "%" ;
769

770
	  cout << "     " << key << " " << it->second << endl ;
771
	}
772 773
      
      delete index;
774 775 776 777
      exit(0);
    }


Mikaël Salson's avatar
Mikaël Salson committed
778
  //////////////////////////////////
Mathieu Giraud's avatar
Mathieu Giraud committed
779
  //$$ Read sequence files
Mikaël Salson's avatar
Mikaël Salson committed
780

781
 
Mathieu Giraud's avatar
Mathieu Giraud committed
782

Mikaël Salson's avatar
Mikaël Salson committed
783 784 785 786 787
  OnlineFasta *reads;

  try {
    reads = new OnlineFasta(f_reads, 1, " ");
  } catch (const std::ios_base::failure e) {
Mathieu Giraud's avatar
Mathieu Giraud committed
788
    cout << "Error while reading reads file " << f_reads << ": " << e.what()
Mikaël Salson's avatar
Mikaël Salson committed
789 790 791 792 793 794 795 796 797
        << endl;
    exit(1);
  }

  out_dir += "/";

  ////////////////////////////////////////
  //           CLONE ANALYSIS           //
  ////////////////////////////////////////
798
  if (command == CMD_CLONES || command == CMD_WINDOWS) {
Mikaël Salson's avatar
Mikaël Salson committed
799 800

    //////////////////////////////////
Mathieu Giraud's avatar
Mathieu Giraud committed
801 802 803 804 805 806
    cout << "# seed = " << seed << "," ;
    cout << " weight = " << seed_weight(seed) << "," ;
    cout << " span = " << seed.size() << endl ;
    cout << "# k = " << k << "," ;
    cout << " w = " << w << "," ;
    cout << " delta = [" << delta_min << "," << delta_max << "]" << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
807 808 809


    //////////////////////////////////
Mathieu Giraud's avatar
Mathieu Giraud committed
810
    //$$ Kmer Segmentation
Mikaël Salson's avatar
Mikaël Salson committed
811

Mathieu Giraud's avatar
Mathieu Giraud committed
812 813
    cout << endl;
    cout << "Loop through reads, looking for windows" << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
814

815
    ofstream *out_segmented = NULL;
Mathieu Giraud's avatar
Mathieu Giraud committed
816
    ofstream *out_unsegmented = NULL;
Mikaël Salson's avatar
Mikaël Salson committed
817
 
Mathieu Giraud's avatar
Mathieu Giraud committed
818
    WindowExtractor we;
819 820
 
    if (output_segmented) {
821
      string f_segmented = out_dir + f_basename + SEGMENTED_FILENAME ;
822 823 824 825 826
      cout << "  ==> " << f_segmented << endl ;
      out_segmented = new ofstream(f_segmented.c_str());
      we.setSegmentedOutput(out_segmented);
    }

Mathieu Giraud's avatar
Mathieu Giraud committed
827
    if (output_unsegmented) {
828
      string f_unsegmented = out_dir + f_basename +  UNSEGMENTED_FILENAME ;
Mathieu Giraud's avatar
Mathieu Giraud committed
829 830 831 832
      cout << "  ==> " << f_unsegmented << endl ;
      out_unsegmented = new ofstream(f_unsegmented.c_str());
      we.setUnsegmentedOutput(out_unsegmented);
    }
833

834 835

    WindowsStorage *windowsStorage = we.extract(reads, multigermline, w, windows_labels);
WebTogz's avatar
WebTogz committed
836
    windowsStorage->setIdToAll();
Mathieu Giraud's avatar
Mathieu Giraud committed
837
    size_t nb_total_reads = we.getNbReads();
Mikaël Salson's avatar
Mikaël Salson committed
838 839


Mathieu Giraud's avatar
Mathieu Giraud committed
840
    //$$ Display statistics on segmentation causes
Mikaël Salson's avatar
Mikaël Salson committed
841

842 843
        
    ostringstream stream_segmentation_info;
Mikaël Salson's avatar
Mikaël Salson committed
844

Mathieu Giraud's avatar
Mathieu Giraud committed
845 846
    int nb_segmented_including_too_short = we.getNbSegmented(TOTAL_SEG_AND_WINDOW) 
      + we.getNbSegmented(TOTAL_SEG_BUT_TOO_SHORT_FOR_THE_WINDOW);
Mikaël Salson's avatar
Mikaël Salson committed
847

848
    stream_segmentation_info << "  ==> segmented " << nb_segmented_including_too_short << " reads"
Mikaël Salson's avatar
Mikaël Salson committed
849
	<< " (" << setprecision(3) << 100 * (float) nb_segmented_including_too_short / nb_total_reads << "%)" 
Mathieu Giraud's avatar
Mathieu Giraud committed
850 851
	<< endl ;

852
    // nb_segmented is the main denominator for the following
Mathieu Giraud's avatar
Mathieu Giraud committed
853
    int nb_segmented = we.getNbSegmented(TOTAL_SEG_AND_WINDOW);
854
    float ratio_segmented = 100 * (float) nb_segmented / nb_total_reads ;
Mikaël Salson's avatar
Mikaël Salson committed
855

856
    stream_segmentation_info << "  ==> found " << windowsStorage->size() << " " << w << "-windows"
Mikaël Salson's avatar
Mikaël Salson committed
857
	<< " in " << nb_segmented << " segments"
858
	<< " (" << setprecision(3) << ratio_segmented << "%)"
Mikaël Salson's avatar
Mikaël Salson committed
859 860
	<< " inside " << nb_total_reads << " sequences" << endl ;
  
861
    // warn if there are too few segmented sequences
862
    if (ratio_segmented < WARN_PERCENT_SEGMENTED)
863
      {
864 865
        stream_segmentation_info << "  ! There are not so many CDR3 windows found in this set of reads." << endl ;
        stream_segmentation_info << "  ! If this is unexpected, check the germline (-G) and try to change seed parameters (-k)." << endl ;
866 867
      }

868
    stream_segmentation_info << "                                  #      av. length" << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
869

870
    multigermline->out_stats(stream_segmentation_info);
871 872
    stream_segmentation_info << endl;
    we.out_stats(stream_segmentation_info);
Mikaël Salson's avatar
Mikaël Salson committed
873
    
874
    cout << stream_segmentation_info.str();
Mathieu Giraud's avatar
Mathieu Giraud committed
875
      map <junction, JsonList> json_data_segment ;
Mikaël Salson's avatar
Mikaël Salson committed
876
    
Mikaël Salson's avatar
Mikaël Salson committed
877 878

	//////////////////////////////////
Mathieu Giraud's avatar
Mathieu Giraud committed
879
	//$$ Sort windows
Mikaël Salson's avatar
Mikaël Salson committed
880
	
Mathieu Giraud's avatar
Mathieu Giraud committed
881 882
        cout << "Sort windows by number of occurrences" << endl;
        windowsStorage->sort();
Mikaël Salson's avatar
Mikaël Salson committed
883 884

	//////////////////////////////////
Mathieu Giraud's avatar
Mathieu Giraud committed
885
	//$$ Output windows
Mikaël Salson's avatar
Mikaël Salson committed
886 887
	//////////////////////////////////

888
	string f_all_windows = out_dir + f_basename + WINDOWS_FILENAME;
Mathieu Giraud's avatar
Mathieu Giraud committed
889
	cout << "  ==> " << f_all_windows << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
890

Mathieu Giraud's avatar
Mathieu Giraud committed
891
	ofstream out_all_windows(f_all_windows.c_str());
Mathieu Giraud's avatar
Mathieu Giraud committed
892
        windowsStorage->printSortedWindows(out_all_windows);
Mikaël Salson's avatar
Mikaël Salson committed
893 894 895


    //////////////////////////////////
896 897 898 899 900 901 902
    //$$ min_reads_clone (ou label)

    int min_reads_clone_ratio = (int) (ratio_reads_clone * nb_segmented / 100.0);
    cout << "Considering only labeled windows and windows with >= " << min_reads_clone << " reads"
	 << " and with a ratio >= " << ratio_reads_clone << " (" << min_reads_clone_ratio << ")" << endl ;

    int min_reads_clone_final = max(min_reads_clone, min_reads_clone_ratio);
Mathieu Giraud's avatar
Mathieu Giraud committed
903

904
    pair<int, int> info_remove = windowsStorage->keepInterestingWindows((size_t) min_reads_clone_final);
Mikaël Salson's avatar
Mikaël Salson committed
905
	 
Mathieu Giraud's avatar
Mathieu Giraud committed
906 907
    cout << "  ==> keep " <<  windowsStorage->size() << " windows in " << info_remove.second << " reads" ;
    cout << " (" << setprecision(3) << 100 * (float) info_remove.second / nb_total_reads << "%)  " << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
908

909 910 911 912
    if (windowsStorage->size() == 0)
      {
	cout << "  ! No windows with current parameters." << endl;
      }
913

914 915 916 917 918 919
    //////////////////////////////////
    //$$ Clustering
    windowsStorage->sort();
    list<pair <junction, int> > sort_clones = windowsStorage->getSortedList();
    cout << "  ==> " << sort_clones.size() << " clones" << endl ;
    
920
    list <list <junction> > clones_windows;
921
    comp_matrix comp=comp_matrix(sort_clones);
922
      
923
    if (command == CMD_CLONES) {
Mikaël Salson's avatar
Mikaël Salson committed
924 925 926

    if (epsilon || forced_edges.size())
      {
Mathieu Giraud's avatar
Mathieu Giraud committed
927
	cout << "Cluster similar windows" << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
928 929 930

	if (load_comp==1) 
	  {
931
	    comp.load((out_dir+f_basename + "." + comp_filename).c_str());
Mikaël Salson's avatar
Mikaël Salson committed
932 933 934
	  }
	else
	  {
Mathieu Giraud's avatar
Mathieu Giraud committed
935
	    comp.compare( cout, cluster_cost);
Mikaël Salson's avatar
Mikaël Salson committed
936 937 938 939
	  }
	
	if (save_comp==1)
	  {
940
	    comp.save(( out_dir+f_basename + "." + comp_filename).c_str());
Mikaël Salson's avatar
Mikaël Salson committed
941 942
	  }
       
Mathieu Giraud's avatar
Mathieu Giraud committed
943
	clones_windows  = comp.cluster(forced_edges, w, cout, epsilon, minPts) ;
944
	comp.stat_cluster(clones_windows, cout );
Mikaël Salson's avatar
Mikaël Salson committed
945
	comp.del();
946
	cout << "  ==> " << clones_windows.size() << " clusters" << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
947 948
      } 
    else
949 950
      { 
	cout << "No clustering" << endl ; 
Mikaël Salson's avatar
Mikaël Salson committed
951 952
      }

953 954 955 956 957 958 959 960 961

    // TODO: output clones_windows (.data, other places ?)

    // TODO: Are these constraints checked somewhere ? keepInterestingWindows ?
    // if (labeled 
    //     || ((clone_nb_reads >= min_reads_clone) 
    //		  && (clone_nb_reads * 100.0 / nb_segmented >= ratio_reads_clone)))

    if (sort_clones.size() == 0)
962 963
      {
	cout << "  ! No clones with current parameters." << endl;
964
	cout << "  ! See the 'Limits to report a clone' options (-r, -%, -z, -A)." << endl;
965
      }
966
    else
Mikaël Salson's avatar
Mikaël Salson committed
967 968
      {

969 970
    cout << endl;

Mikaël Salson's avatar
Mikaël Salson committed
971
    //////////////////////////////////
Mathieu Giraud's avatar
Mathieu Giraud committed
972
    //$$ Output clones
973

974
    if (max_clones > 0)
975
      cout << "Detailed analysis of at most " << max_clones<< " clones" ;
976
    else
977 978
      cout << "Detailed analysis of all clones" ;
    cout << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
979 980

    map <string, int> clones_codes ;
Mathieu Giraud's avatar
Mathieu Giraud committed