diff --git a/altree-add-S b/altree-add-S index 42d6168b83c560f0a10b894d13496d21ac4f0269..7083a97083fc5825a011875d0c630d9f463239c7 100755 --- a/altree-add-S +++ b/altree-add-S @@ -445,7 +445,7 @@ sub main my($low); # Si $low !=0, if only one case or one control, then the state of S is ? - if (!$opt_l) { + if (not defined $opt_l) { die "The minimum number of haplotype should be set by -l\n"; } else { $low=$opt_l; diff --git a/altree-convert b/altree-convert index 6e54c880c4d93a3222cb80b72121c03926b6fd2e..2276a086c69fa08144b06d39de561f57d135ba85 100755 --- a/altree-convert +++ b/altree-convert @@ -8,7 +8,7 @@ use Getopt::Long; # qw(:config permute); use Pod::Usage; #use Getopt::Std; -our($opt_h, $opt_r, $opt_i, $opt_j, $opt_o, $opt_t, $opt_p, $opt_c, $opt_q); +our($opt_h, $opt_r, $opt_i, $opt_j, $opt_o, $opt_t, $opt_p, $opt_c, $opt_q, $opt_s); our $VERSION; $VERSION = sprintf "0.%03d", q$Revision$ =~ /(\d+)/g; @@ -354,9 +354,19 @@ sub MakeCorrespondanceFile my($hash_haplo)=shift; my($file_corres)=shift; my $data_quali = shift; + my $seuil = shift; my($haplo); open (CORRESP, '>', $file_corres) || die "Unable to open correspond.txt: $!\n"; - foreach $haplo (keys %{$hash_haplo}) { + my $hash_haplo_sorted; # ref on tab containing the haplotypes sorted on the number of individuals carrying them + + if ($data_quali eq "quali") { + $hash_haplo_sorted = SortNbhaploQuali($hash_haplo, $seuil); + } else { + $hash_haplo_sorted = SortNbhaploQuanti($hash_haplo, $seuil); + } + +# foreach $haplo (keys %{$hash_haplo}) { + foreach $haplo (@{$hash_haplo_sorted}) { #new version with sorted sequences if ($data_quali eq "quali") { printf CORRESP "H%.3i\tm%.3i\tc%.3i\n", $hash_haplo->{$haplo}->[2],$hash_haplo->{$haplo}->[1], $hash_haplo->{$haplo}->[0]; } else { @@ -372,6 +382,44 @@ sub MakeCorrespondanceFile } +# Select and sort the haplotypes that will appear in the paup file and in the correspondance file. Does not work with phylip yet + +sub SortNbhaploQuali +{ + my $hash_haplo = shift; + my $seuil = shift; + + my @hash_haplo_sorted = sort { + $hash_haplo->{$a}->[0]+$hash_haplo->{$a}->[1] <=> + $hash_haplo->{$b}->[0]+$hash_haplo->{$b}->[1] + } (grep { + $hash_haplo->{$_}->[0]+$hash_haplo->{$_}->[1]>$seuil + } (keys %{$hash_haplo})); + + return \@hash_haplo_sorted; +} + + + +sub SortNbhaploQuanti +{ + my $hash_haplo = shift; + my $seuil =shift; + + my @hash_haplo_sorted = sort { + $#$hash_haplo->{$a}->{"valeurs"} <=> + $#$hash_haplo->{$b}->{"valeurs"} + } (grep { + $hash_haplo->{$_}->[0]+$hash_haplo->{$_}->[1]>$seuil + } (keys %{$hash_haplo})); + + return \@hash_haplo_sorted; + +} + + + + # Read the opt_t and define the character to add to the ancestral sequence ($anc) and to the other haplotypes ($der). Also define the "format symbol for paup. sub ReadDataType @@ -406,22 +454,31 @@ sub BuildPAUP my($data_type)=shift; my($file_corres)= shift; my $data_quali=shift; - # my($der, $anc); + my $seuil = shift; +#my($der, $anc); my($write_data_type); - MakeCorrespondanceFile($hash_haplo, $file_corres, $data_quali); + MakeCorrespondanceFile($hash_haplo, $file_corres, $data_quali, $seuil); #($anc, $der, $write_data_type)=ReadDataType($data_type); ($write_data_type)=ReadDataType($data_type); + my $hash_haplo_sorted; + if ($data_quali eq "quali") { + $hash_haplo_sorted = SortNbhaploQuali($hash_haplo, $seuil); + } else { + $hash_haplo_sorted = SortNbhaploQuanti($hash_haplo, $seuil); + } + open(OUTPAUP, '>', $file) || die "Unable to open '$file': $!\n"; print OUTPAUP "#Nexus\n"; print OUTPAUP "Begin data;\n"; - print OUTPAUP "dimension ntax=",$nb_haplo+1," nchar=", $nb_loci,";\n"; + print OUTPAUP "dimension ntax=",$#$hash_haplo_sorted+1," nchar=", $nb_loci,";\n"; print OUTPAUP "format symbols=\"$write_data_type\" missing=?;\n"; print OUTPAUP "matrix\n"; my($haplo); - foreach $haplo (keys %{$hash_haplo}) { +# foreach $haplo (keys %{$hash_haplo}) { # printf OUTPAUP "H%.3i_m%.3i_c%.3i\t%s%s\n", $hash_haplo->{$haplo}->[2],$hash_haplo->{$haplo}->[1], $hash_haplo->{$haplo}->[0], $haplo, $der; # a modifier si je fais un deuxième fichier de sortie... - if ($data_quali eq "quali") { + foreach $haplo (@{$hash_haplo_sorted}) { + if ($data_quali eq "quali") { printf OUTPAUP "H%.3i\t%s\n", $hash_haplo->{$haplo}->[2], $haplo;#, $der; } else { printf OUTPAUP "H%.3i\t%s\n", $hash_haplo->{$haplo}->{"name"}, $haplo; @@ -485,15 +542,16 @@ sub main { my($progname, $rec_program); my %options= ( - "first-input-file" => \$opt_i, - "second-input-file" => \$opt_j, - "output-file" => \$opt_o, - "case-control-output" => \$opt_c, - "data-type" => \$opt_t, - "phylo-prog" => \$opt_p, - "reconstruct-prog" => \$opt_r, - "data-quali" => \$opt_q, - ); + "first-input-file" => \$opt_i, + "second-input-file" => \$opt_j, + "output-file" => \$opt_o, + "case-control-output" => \$opt_c, + "data-type" => \$opt_t, + "phylo-prog" => \$opt_p, + "reconstruct-prog" => \$opt_r, + "data-quali" => \$opt_q, + "nbind-threshold" => \$opt_s, + ); #getopts('hr:i:j:o:t:p:'); GetOptions (\%options, @@ -509,6 +567,7 @@ sub main "phylo-prog|p=s", "reconstruct-prog|r=s", "data-quali|q=s", + "nbind-threshold|s=i", ) or pod2usage(2); if (defined($options{"version"})) { print $0, " version ", $VERSION, "\n"; @@ -565,6 +624,9 @@ sub main if (! $opt_q){ die "Data quality: opt_q, (qualitative or quantitative) not specified!\n"; } + if (not defined $opt_s){ + die "Nb ind threshold: opt_s not specified!\n"; + } my($hash_ind, $hash_statut, $hash_haplo); my($nb_haplo, $nb_loci); @@ -601,7 +663,7 @@ sub main # AfficheHashHaplo($hash_haplo); # pour verifier if ($phylo_program =~ /^[Pp][Aa][Uu][Pp]$/) { - BuildPAUP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c, $data_quali) + BuildPAUP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c, $data_quali, $opt_s) } elsif ($phylo_program =~ /^[Pp][Hh][Yy][Ll][Ii][Pp]$/) { BuildPHYLIP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c); } else { @@ -631,9 +693,11 @@ altree-convert [options] --second-input-file|j input file 2 (not mandatory) --output-file|o output file --case-control-output|c output containing the nb cases/controls - --data-type|t DNA|SNP + --data-type|t DNA|NUM --phylo-prog|p PAUP|PHYLIP --reconstruct-prog|r PHASE|FAMHAP + --data-quali|q Type of data: qualitative or quantitative + --nbind-threshold|s Minimum number of individuals ecquired to keep an haplotype =head1 OPTIONS @@ -683,6 +747,14 @@ Phylogeny reconstruction program Haplotype reconstruction program +=item B<data-quali|q> C<qualitative|quantitative> + +Type of data analyzed + +=item B<nbind-threshold|s> + +Minimal number of individuals carrying an haplotype recquired to keep it for further analysis. If you want to keep all haplotypes, you must affect 0 to this variable + =back =head1 DESCRIPTION