Mentions légales du service

Skip to content
Snippets Groups Projects
Commit d42f1c26 authored by Claire Bardel's avatar Claire Bardel
Browse files

ajout de la fonctionalité: éliminer les haplotypes présents en moins de xxx examplaires

git-svn-id: svn+ssh://imag/users/huron/danjean/svnroot/claire/altree/trunk@290 cf695345-040a-0410-a956-b889e835fe2e
parent 9b0509f7
No related branches found
No related tags found
No related merge requests found
......@@ -445,7 +445,7 @@ sub main
my($low);
# Si $low !=0, if only one case or one control, then the state of S is ?
if (!$opt_l) {
if (not defined $opt_l) {
die "The minimum number of haplotype should be set by -l\n";
} else {
$low=$opt_l;
......
......@@ -8,7 +8,7 @@ use Getopt::Long; # qw(:config permute);
use Pod::Usage;
#use Getopt::Std;
our($opt_h, $opt_r, $opt_i, $opt_j, $opt_o, $opt_t, $opt_p, $opt_c, $opt_q);
our($opt_h, $opt_r, $opt_i, $opt_j, $opt_o, $opt_t, $opt_p, $opt_c, $opt_q, $opt_s);
our $VERSION;
$VERSION = sprintf "0.%03d", q$Revision$ =~ /(\d+)/g;
......@@ -354,9 +354,19 @@ sub MakeCorrespondanceFile
my($hash_haplo)=shift;
my($file_corres)=shift;
my $data_quali = shift;
my $seuil = shift;
my($haplo);
open (CORRESP, '>', $file_corres) || die "Unable to open correspond.txt: $!\n";
foreach $haplo (keys %{$hash_haplo}) {
my $hash_haplo_sorted; # ref on tab containing the haplotypes sorted on the number of individuals carrying them
if ($data_quali eq "quali") {
$hash_haplo_sorted = SortNbhaploQuali($hash_haplo, $seuil);
} else {
$hash_haplo_sorted = SortNbhaploQuanti($hash_haplo, $seuil);
}
# foreach $haplo (keys %{$hash_haplo}) {
foreach $haplo (@{$hash_haplo_sorted}) { #new version with sorted sequences
if ($data_quali eq "quali") {
printf CORRESP "H%.3i\tm%.3i\tc%.3i\n", $hash_haplo->{$haplo}->[2],$hash_haplo->{$haplo}->[1], $hash_haplo->{$haplo}->[0];
} else {
......@@ -372,6 +382,44 @@ sub MakeCorrespondanceFile
}
# Select and sort the haplotypes that will appear in the paup file and in the correspondance file. Does not work with phylip yet
sub SortNbhaploQuali
{
my $hash_haplo = shift;
my $seuil = shift;
my @hash_haplo_sorted = sort {
$hash_haplo->{$a}->[0]+$hash_haplo->{$a}->[1] <=>
$hash_haplo->{$b}->[0]+$hash_haplo->{$b}->[1]
} (grep {
$hash_haplo->{$_}->[0]+$hash_haplo->{$_}->[1]>$seuil
} (keys %{$hash_haplo}));
return \@hash_haplo_sorted;
}
sub SortNbhaploQuanti
{
my $hash_haplo = shift;
my $seuil =shift;
my @hash_haplo_sorted = sort {
$#$hash_haplo->{$a}->{"valeurs"} <=>
$#$hash_haplo->{$b}->{"valeurs"}
} (grep {
$hash_haplo->{$_}->[0]+$hash_haplo->{$_}->[1]>$seuil
} (keys %{$hash_haplo}));
return \@hash_haplo_sorted;
}
# Read the opt_t and define the character to add to the ancestral sequence ($anc) and to the other haplotypes ($der). Also define the "format symbol for paup.
sub ReadDataType
......@@ -406,22 +454,31 @@ sub BuildPAUP
my($data_type)=shift;
my($file_corres)= shift;
my $data_quali=shift;
# my($der, $anc);
my $seuil = shift;
#my($der, $anc);
my($write_data_type);
MakeCorrespondanceFile($hash_haplo, $file_corres, $data_quali);
MakeCorrespondanceFile($hash_haplo, $file_corres, $data_quali, $seuil);
#($anc, $der, $write_data_type)=ReadDataType($data_type);
($write_data_type)=ReadDataType($data_type);
my $hash_haplo_sorted;
if ($data_quali eq "quali") {
$hash_haplo_sorted = SortNbhaploQuali($hash_haplo, $seuil);
} else {
$hash_haplo_sorted = SortNbhaploQuanti($hash_haplo, $seuil);
}
open(OUTPAUP, '>', $file) || die "Unable to open '$file': $!\n";
print OUTPAUP "#Nexus\n";
print OUTPAUP "Begin data;\n";
print OUTPAUP "dimension ntax=",$nb_haplo+1," nchar=", $nb_loci,";\n";
print OUTPAUP "dimension ntax=",$#$hash_haplo_sorted+1," nchar=", $nb_loci,";\n";
print OUTPAUP "format symbols=\"$write_data_type\" missing=?;\n";
print OUTPAUP "matrix\n";
my($haplo);
foreach $haplo (keys %{$hash_haplo}) {
# foreach $haplo (keys %{$hash_haplo}) {
# printf OUTPAUP "H%.3i_m%.3i_c%.3i\t%s%s\n", $hash_haplo->{$haplo}->[2],$hash_haplo->{$haplo}->[1], $hash_haplo->{$haplo}->[0], $haplo, $der; # a modifier si je fais un deuxime fichier de sortie...
if ($data_quali eq "quali") {
foreach $haplo (@{$hash_haplo_sorted}) {
if ($data_quali eq "quali") {
printf OUTPAUP "H%.3i\t%s\n", $hash_haplo->{$haplo}->[2], $haplo;#, $der;
} else {
printf OUTPAUP "H%.3i\t%s\n", $hash_haplo->{$haplo}->{"name"}, $haplo;
......@@ -485,15 +542,16 @@ sub main
{
my($progname, $rec_program);
my %options= (
"first-input-file" => \$opt_i,
"second-input-file" => \$opt_j,
"output-file" => \$opt_o,
"case-control-output" => \$opt_c,
"data-type" => \$opt_t,
"phylo-prog" => \$opt_p,
"reconstruct-prog" => \$opt_r,
"data-quali" => \$opt_q,
);
"first-input-file" => \$opt_i,
"second-input-file" => \$opt_j,
"output-file" => \$opt_o,
"case-control-output" => \$opt_c,
"data-type" => \$opt_t,
"phylo-prog" => \$opt_p,
"reconstruct-prog" => \$opt_r,
"data-quali" => \$opt_q,
"nbind-threshold" => \$opt_s,
);
#getopts('hr:i:j:o:t:p:');
GetOptions (\%options,
......@@ -509,6 +567,7 @@ sub main
"phylo-prog|p=s",
"reconstruct-prog|r=s",
"data-quali|q=s",
"nbind-threshold|s=i",
) or pod2usage(2);
if (defined($options{"version"})) {
print $0, " version ", $VERSION, "\n";
......@@ -565,6 +624,9 @@ sub main
if (! $opt_q){
die "Data quality: opt_q, (qualitative or quantitative) not specified!\n";
}
if (not defined $opt_s){
die "Nb ind threshold: opt_s not specified!\n";
}
my($hash_ind, $hash_statut, $hash_haplo);
my($nb_haplo, $nb_loci);
......@@ -601,7 +663,7 @@ sub main
# AfficheHashHaplo($hash_haplo); # pour verifier
if ($phylo_program =~ /^[Pp][Aa][Uu][Pp]$/) {
BuildPAUP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c, $data_quali)
BuildPAUP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c, $data_quali, $opt_s)
} elsif ($phylo_program =~ /^[Pp][Hh][Yy][Ll][Ii][Pp]$/) {
BuildPHYLIP($opt_o, $hash_haplo, $nb_haplo, $nb_loci, $opt_t, $opt_c);
} else {
......@@ -631,9 +693,11 @@ altree-convert [options]
--second-input-file|j input file 2 (not mandatory)
--output-file|o output file
--case-control-output|c output containing the nb cases/controls
--data-type|t DNA|SNP
--data-type|t DNA|NUM
--phylo-prog|p PAUP|PHYLIP
--reconstruct-prog|r PHASE|FAMHAP
--data-quali|q Type of data: qualitative or quantitative
--nbind-threshold|s Minimum number of individuals ecquired to keep an haplotype
=head1 OPTIONS
......@@ -683,6 +747,14 @@ Phylogeny reconstruction program
Haplotype reconstruction program
=item B<data-quali|q> C<qualitative|quantitative>
Type of data analyzed
=item B<nbind-threshold|s>
Minimal number of individuals carrying an haplotype recquired to keep it for further analysis. If you want to keep all haplotypes, you must affect 0 to this variable
=back
=head1 DESCRIPTION
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment