From d42f1c266ffdfab586af2d30ebf7c4983a4fd505 Mon Sep 17 00:00:00 2001
From: Claire Bardel <Claire.Bardel@univ-lyon1.fr>
Date: Tue, 15 May 2007 22:46:14 +0000
Subject: [PATCH] =?UTF-8?q?ajout=20de=20la=20fonctionalit=C3=A9:=20=C3=A9l?=
 =?UTF-8?q?iminer=20les=20haplotypes=20pr=C3=A9sents=20en=20moins=20de=20x?=
 =?UTF-8?q?xx=20examplaires?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: svn+ssh://imag/users/huron/danjean/svnroot/claire/altree/trunk@290 cf695345-040a-0410-a956-b889e835fe2e
---
 altree-add-S   |   2 +-
 altree-convert | 108 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/altree-add-S b/altree-add-S
index 42d6168..7083a97 100755
--- a/altree-add-S
+++ b/altree-add-S
@@ -445,7 +445,7 @@ sub main
 	
 	my($low);
 	# Si $low !=0, if only one case or one control, then the state of S is ?
-	if (!$opt_l) {
+	if (not defined $opt_l) {
 	    die "The minimum number of haplotype should be set by -l\n"; 
 	} else {
 	    $low=$opt_l;
diff --git a/altree-convert b/altree-convert
index 6e54c88..2276a08 100755
--- a/altree-convert
+++ b/altree-convert
@@ -8,7 +8,7 @@ use Getopt::Long; # qw(:config permute);
 use Pod::Usage;
 #use Getopt::Std;
 
-our($opt_h, $opt_r, $opt_i, $opt_j, $opt_o, $opt_t, $opt_p, $opt_c, $opt_q);
+our($opt_h, $opt_r, $opt_i, $opt_j, $opt_o, $opt_t, $opt_p, $opt_c, $opt_q, $opt_s);
 
 our $VERSION;
 $VERSION = sprintf "0.%03d", q$Revision$ =~ /(\d+)/g;
@@ -354,9 +354,19 @@ sub MakeCorrespondanceFile
     my($hash_haplo)=shift;
     my($file_corres)=shift;
     my $data_quali = shift;
+    my $seuil = shift;
     my($haplo);
     open (CORRESP, '>', $file_corres) || die "Unable to open correspond.txt: $!\n";
-    foreach $haplo (keys %{$hash_haplo}) {
+    my $hash_haplo_sorted; # ref on tab containing the haplotypes sorted on the number of individuals carrying them
+	
+    if ($data_quali eq "quali") {
+	$hash_haplo_sorted = SortNbhaploQuali($hash_haplo, $seuil);
+    } else {
+	$hash_haplo_sorted = SortNbhaploQuanti($hash_haplo, $seuil);
+    }
+    
+#    foreach $haplo (keys %{$hash_haplo}) {
+    foreach $haplo (@{$hash_haplo_sorted}) { #new version with sorted sequences
 	if ($data_quali eq "quali") {
 	    printf CORRESP "H%.3i\tm%.3i\tc%.3i\n", $hash_haplo->{$haplo}->[2],$hash_haplo->{$haplo}->[1], $hash_haplo->{$haplo}->[0];
 	} else {
@@ -372,6 +382,44 @@ sub MakeCorrespondanceFile
 }
 
 
+# Select and sort the haplotypes that will appear in the paup file and in the correspondance file. Does not work with phylip yet
+
+sub SortNbhaploQuali
+{
+    my $hash_haplo = shift;
+    my $seuil = shift;
+ 
+    my @hash_haplo_sorted = sort {
+	$hash_haplo->{$a}->[0]+$hash_haplo->{$a}->[1] <=>
+	    $hash_haplo->{$b}->[0]+$hash_haplo->{$b}->[1]
+	} (grep { 
+	    $hash_haplo->{$_}->[0]+$hash_haplo->{$_}->[1]>$seuil
+	} (keys %{$hash_haplo}));
+    
+    return \@hash_haplo_sorted;
+}
+
+
+
+sub SortNbhaploQuanti
+{
+    my $hash_haplo = shift;
+    my $seuil =shift;
+    
+    my @hash_haplo_sorted = sort  {
+	$#$hash_haplo->{$a}->{"valeurs"} <=> 
+	    $#$hash_haplo->{$b}->{"valeurs"} 
+    }  (grep { 
+	$hash_haplo->{$_}->[0]+$hash_haplo->{$_}->[1]>$seuil
+	}   (keys %{$hash_haplo}));
+    
+    return \@hash_haplo_sorted;
+    
+}
+
+
+
+
 
 # Read the opt_t and define the character to add to the ancestral sequence ($anc) and to the other haplotypes ($der). Also define the "format symbol for paup.
 sub ReadDataType
@@ -406,22 +454,31 @@ sub BuildPAUP
     my($data_type)=shift;
     my($file_corres)= shift;
     my $data_quali=shift;
-   # my($der, $anc);
+    my $seuil = shift;
+#my($der, $anc);
     my($write_data_type);
     
-    MakeCorrespondanceFile($hash_haplo, $file_corres, $data_quali);
+    MakeCorrespondanceFile($hash_haplo, $file_corres, $data_quali, $seuil);
     #($anc, $der, $write_data_type)=ReadDataType($data_type);
     ($write_data_type)=ReadDataType($data_type);
+    my $hash_haplo_sorted;
+    if ($data_quali eq "quali") {
+	$hash_haplo_sorted = SortNbhaploQuali($hash_haplo, $seuil);
+    } else {
+	$hash_haplo_sorted = SortNbhaploQuanti($hash_haplo, $seuil);
+    }
+
     open(OUTPAUP, '>', $file) || die "Unable to open '$file': $!\n";
     print OUTPAUP "#Nexus\n";
     print OUTPAUP "Begin data;\n";
-    print OUTPAUP "dimension ntax=",$nb_haplo+1," nchar=", $nb_loci,";\n";
+    print OUTPAUP "dimension ntax=",$#$hash_haplo_sorted+1," nchar=", $nb_loci,";\n";
     print OUTPAUP "format symbols=\"$write_data_type\" missing=?;\n";
     print OUTPAUP "matrix\n";
     my($haplo);
-    foreach $haplo (keys %{$hash_haplo}) {
+#    foreach $haplo (keys %{$hash_haplo}) {
 #	printf OUTPAUP "H%.3i_m%.3i_c%.3i\t%s%s\n", $hash_haplo->{$haplo}->[2],$hash_haplo->{$haplo}->[1], $hash_haplo->{$haplo}->[0], $haplo, $der; # a modifier si je fais un deuxième fichier de sortie...
-	if ($data_quali eq "quali") {
+    foreach $haplo (@{$hash_haplo_sorted}) {
+    if ($data_quali eq "quali") {
 	    printf OUTPAUP "H%.3i\t%s\n", $hash_haplo->{$haplo}->[2], $haplo;#, $der; 
 	} else {
 	    printf OUTPAUP "H%.3i\t%s\n",  $hash_haplo->{$haplo}->{"name"}, $haplo;
@@ -485,15 +542,16 @@ sub main
 {
     my($progname, $rec_program);
     my %options= (
-    	"first-input-file" => \$opt_i,
-	"second-input-file" => \$opt_j,
-	"output-file" => \$opt_o,
-	"case-control-output" => \$opt_c,
-	"data-type" => \$opt_t,
-	"phylo-prog" => \$opt_p,
-	"reconstruct-prog" => \$opt_r,
-	"data-quali" => \$opt_q,
-	);
+		  "first-input-file" => \$opt_i,
+		  "second-input-file" => \$opt_j,
+		  "output-file" => \$opt_o,
+		  "case-control-output" => \$opt_c,
+		  "data-type" => \$opt_t,
+		  "phylo-prog" => \$opt_p,
+		  "reconstruct-prog" => \$opt_r,
+		  "data-quali" => \$opt_q,
+		  "nbind-threshold" => \$opt_s,	  
+		  );
     	
     #getopts('hr:i:j:o:t:p:');
     GetOptions (\%options,
@@ -509,6 +567,7 @@ sub main
 		"phylo-prog|p=s",
 		"reconstruct-prog|r=s",
 		"data-quali|q=s",
+		"nbind-threshold|s=i",
 		) or pod2usage(2);
     if (defined($options{"version"})) {
 	print $0, " version ", $VERSION, "\n";
@@ -565,6 +624,9 @@ sub main
     if (! $opt_q){
 	die "Data quality: opt_q, (qualitative or quantitative) not specified!\n";
     }
+    if (not defined $opt_s){
+	die "Nb ind threshold: opt_s  not specified!\n";
+    }
     
     my($hash_ind, $hash_statut, $hash_haplo);
     my($nb_haplo, $nb_loci);
@@ -601,7 +663,7 @@ sub main
    # AfficheHashHaplo($hash_haplo); # pour verifier
    
     if ($phylo_program =~ /^[Pp][Aa][Uu][Pp]$/) {
-	BuildPAUP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c, $data_quali)
+	BuildPAUP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c, $data_quali, $opt_s)
     } elsif ($phylo_program =~ /^[Pp][Hh][Yy][Ll][Ii][Pp]$/) {
 	BuildPHYLIP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c);
     } else {
@@ -631,9 +693,11 @@ altree-convert [options]
     --second-input-file|j input file 2 (not mandatory)
     --output-file|o       output file
     --case-control-output|c  output containing the nb cases/controls
-    --data-type|t         DNA|SNP
+    --data-type|t         DNA|NUM
     --phylo-prog|p        PAUP|PHYLIP
     --reconstruct-prog|r  PHASE|FAMHAP
+    --data-quali|q        Type of data: qualitative or quantitative
+    --nbind-threshold|s   Minimum number of individuals ecquired to keep an haplotype
 
 =head1 OPTIONS
 
@@ -683,6 +747,14 @@ Phylogeny reconstruction program
 
 Haplotype reconstruction program
 
+=item B<data-quali|q> C<qualitative|quantitative>
+
+Type of data analyzed
+
+=item B<nbind-threshold|s> 
+
+Minimal number of individuals carrying an haplotype recquired to keep it for further analysis. If you want to keep all haplotypes, you must affect 0 to this variable
+
 =back
 
 =head1 DESCRIPTION
-- 
GitLab