diff --git a/CUtils/const-c.inc b/CUtils/const-c.inc index 88ec41711c72a7ee4766c9830bb1ba4e2fadda70..6651f1a7d6d8a498548de776fc3ca28a23f65c7a 100644 --- a/CUtils/const-c.inc +++ b/CUtils/const-c.inc @@ -39,11 +39,11 @@ use ExtUtils::Constant qw (constant_types C_constant XS_constant); my $types = {map {($_, 1)} qw()}; my @names = (qw()); -print constant_types(); # macro defs +print constant_types(), "\n"; # macro defs foreach (C_constant ("ALTree::CUtils", 'constant', 'IV', $types, undef, 3, @names) ) { print $_, "\n"; # C constant subs } -print "#### XS Section:\n"; +print "\n#### XS Section:\n"; print XS_constant ("ALTree::CUtils", $types); __END__ */ diff --git a/CUtils/const-xs.inc b/CUtils/const-xs.inc index 1945c56373c2c602b85a9e98aa87618cf5500b4d..656ca94652cea5e2556ae1e34d1deb922998c31b 100644 --- a/CUtils/const-xs.inc +++ b/CUtils/const-xs.inc @@ -20,12 +20,14 @@ constant(sv) Second, if present, is found value */ switch (type) { case PERL_constant_NOTFOUND: - sv = sv_2mortal(newSVpvf("%s is not a valid ALTree::CUtils macro", s)); + sv = + sv_2mortal(newSVpvf("%s is not a valid ALTree::CUtils macro", s)); PUSHs(sv); break; case PERL_constant_NOTDEF: sv = sv_2mortal(newSVpvf( - "Your vendor has not defined ALTree::CUtils macro %s, used", s)); + "Your vendor has not defined ALTree::CUtils macro %s, used", + s)); PUSHs(sv); break; /* Uncomment this if you need to return IVs diff --git a/Documentation/fig/altree-q.fig b/Documentation/fig/altree-q.fig new file mode 100644 index 0000000000000000000000000000000000000000..4c3fcab3352451eab6b5e45886c362c10a62686c --- /dev/null +++ b/Documentation/fig/altree-q.fig @@ -0,0 +1,168 @@ +#FIG 3.2 Produced by xfig version 3.2.5 +Portrait +Center +Metric +Letter +100.00 +Single +-2 +1200 2 +6 11023 2696 11700 2745 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 11023 2696 11339 2696 +2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 + 11339 2696 11700 2696 +-6 +6 11025 3150 11700 3195 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 11025 3150 11340 3150 +2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 + 11340 3150 11700 3150 +-6 +6 11025 4500 11700 4545 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 11025 4500 11340 4500 +2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 + 11340 4500 11700 4500 +-6 +6 11025 4950 11700 4995 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 11025 4950 11340 4950 +2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 + 11340 4950 11700 4950 +-6 +6 11025 3600 11700 3645 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 11025 3600 11340 3600 +2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 + 11340 3600 11700 3600 +-6 +6 11067 4200 11742 4245 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 11067 4200 11382 4200 +2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 + 11382 4200 11742 4200 +-6 +6 11021 2250 11696 2295 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 11021 2250 11336 2250 +2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 + 11336 2250 11696 2250 +-6 +2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 4 + 1350 4500 675 4500 675 2925 1350 2925 +2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2 + 225 3825 675 3825 +2 1 1 1 0 7 52 0 -1 4.000 0 0 -1 0 0 4 + 2025 4725 1350 4725 1350 4185 2700 4185 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 2 + 2025 4725 2700 4725 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 4 + 2700 3600 2025 3600 2025 3150 2700 3150 +2 1 1 1 0 7 52 0 -1 4.000 0 0 -1 0 0 4 + 2025 3375 1350 3375 1350 2475 2025 2475 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 4 + 2700 2700 2025 2700 2025 2250 2700 2250 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 4 + 2700 4950 2025 4950 2025 4500 2700 4500 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 3390 3825 4230 3825 +2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 4 + 5625 4515 4950 4515 4950 2940 5625 2940 +2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2 + 4500 3840 4950 3840 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 2 + 6300 4740 6975 4740 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 4 + 6975 3615 6300 3615 6300 3165 6975 3165 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 4 + 6975 2715 6300 2715 6300 2265 6975 2265 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 4 + 6975 4965 6300 4965 6300 4515 6975 4515 +2 1 0 1 0 7 52 0 -1 0.000 0 0 -1 0 0 4 + 6300 3390 5625 3390 5625 2490 6300 2490 +2 1 0 1 0 7 52 0 -1 0.000 0 0 -1 0 0 4 + 6300 4740 5625 4740 5625 4200 6345 4200 +2 1 1 1 0 7 51 0 -1 4.000 0 0 -1 0 0 2 + 7020 4200 6345 4200 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 8115 3825 8955 3825 +2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 4 + 10350 4515 9675 4515 9675 2940 10350 2940 +2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2 + 9225 3840 9675 3840 +2 1 0 1 0 7 52 0 -1 0.000 0 0 -1 0 0 4 + 11025 3390 10350 3390 10350 2490 11025 2490 +2 1 0 1 0 7 52 0 -1 0.000 0 0 -1 0 0 4 + 11025 4740 10350 4740 10350 4200 11070 4200 +2 1 0 1 0 7 51 0 -1 0.000 0 0 -1 0 0 1 + 11070 4200 +2 1 0 1 0 7 51 0 -1 0.000 0 0 -1 0 0 1 + 11025 4740 +2 1 0 1 0 7 51 0 -1 0.000 0 0 -1 0 0 2 + 11017 2698 11017 2246 +2 1 0 1 0 7 51 0 -1 0.000 0 0 -1 0 0 2 + 11025 3604 11025 3154 +2 1 0 1 0 7 51 0 -1 0.000 0 0 -1 0 0 2 + 11025 4947 11025 4497 +2 2 0 2 -1 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 6120 2040 7245 2040 7245 5190 6120 5190 6120 2040 +2 2 0 2 -1 7 20 0 -1 0.000 0 0 -1 0 0 5 + 900 2025 3285 2025 3285 5265 900 5265 900 2025 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 6120 2940 7245 2940 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 6120 3840 7245 3840 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 6120 4380 7245 4380 +2 2 0 0 -1 7 50 0 -1 0.000 0 0 -1 0 0 5 + 9270 2040 12195 2040 12195 5190 9270 5190 9270 2040 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 11345 2476 11962 2472 +2 2 0 2 -1 7 50 -1 -1 6.000 0 0 -1 0 0 5 + 11340 2025 11970 2025 11970 5190 11340 5190 11340 2025 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 11353 2939 11970 2940 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 11353 3374 11970 3370 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 11331 3832 11970 3840 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 11353 4384 11970 4380 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 11350 4726 11967 4722 +2 2 0 0 7 7 50 0 -1 0.000 0 0 -1 0 0 5 + 270 2025 3195 2025 3195 5175 270 5175 270 2025 +2 1 2 1 -1 7 50 -1 -1 3.000 0 0 -1 0 0 2 + 900 3780 2925 3780 +2 2 0 0 7 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4545 2040 7470 2040 7470 5190 4545 5190 4545 2040 +4 0 0 50 -1 16 20 0.0000 6 255 960 12105 2340 C1.1.1\001 +4 0 0 50 -1 16 20 0.0000 6 255 960 12105 2790 C1.1.2\001 +4 0 0 50 -1 16 20 0.0000 6 255 960 12105 3240 C1.2.1\001 +4 0 0 50 -1 16 20 0.0000 6 255 960 12105 3690 C1.2.2\001 +4 0 0 50 -1 16 20 0.0000 6 255 690 7380 2490 C1.1\001 +4 0 0 50 -1 16 20 0.0000 6 255 690 7380 3390 C1.2\001 +4 0 0 50 -1 16 20 0.0000 6 255 690 7380 4245 C2.1\001 +4 0 0 50 -1 16 20 0.0000 6 255 690 7380 4830 C2.2\001 +4 0 -1 30 0 16 20 0.0000 6 255 420 1035 2340 C1\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 2385 H1\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 2790 H2\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 3285 H3\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 3735 H4\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 4230 H5\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 4545 H6\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 4860 H7\001 +4 0 -1 50 -1 0 18 0.0000 4 195 345 2790 5175 H8\001 +4 1 -1 50 -1 16 20 0.0000 6 255 1050 1935 5805 Level 1\001 +4 1 -1 50 -1 16 20 0.0000 6 255 1050 6255 5805 Level 2\001 +4 1 -1 50 -1 16 20 0.0000 6 255 1050 11025 5760 Level 3\001 +4 0 0 50 -1 16 20 0.0000 6 255 960 12105 4680 C2.2.1\001 +4 0 0 50 -1 16 20 0.0000 6 255 960 12105 5085 C2.2.2\001 +4 0 0 50 -1 16 20 0.0000 6 255 690 12105 4185 C2.1\001 +4 0 -1 30 0 16 20 0.0000 6 255 420 990 4140 C2\001 +4 2 -1 50 -1 0 18 0.0000 4 195 555 360 3735 Root\001 +4 2 -1 50 -1 0 18 0.0000 4 195 555 4680 3735 Root\001 +4 2 -1 50 -1 0 18 0.0000 4 195 555 9405 3735 Root\001 diff --git a/Documentation/manual.tex b/Documentation/manual.tex index 0a3f1b60c2060bf3cc5b0bbd1797e9b021d8deba..a6446d006153be599efdd3747cc6a11444771f30 100644 --- a/Documentation/manual.tex +++ b/Documentation/manual.tex @@ -91,15 +91,25 @@ phylogenetic Trees } \author{Claire Bardel, Vincent Danjean, Pierre Darlu and Emmanuelle Génin} +\date{Version 1.1.0} \maketitle \tableofcontents -\chapter{Overview of the software} + + +\chapter{Introduction} + +\section{What's new?} +\paragraph{Version 1.1.0: modification of \newchitree and \etHT to deal with quantitative data} +The software now deals with quantitative data. For the association test, series of one-way ANOVA are performed instead of the homogeneity tests. For the localisation test, only the definition of the S character is different for quantitative traits. Currently, \rechaplo has not been modified and does not deal with quantitative data. + + +\section{Overview of the software} This software is designed to perform phylogeny-based analysis: first, it allows the detection of an association between a candidate gene and -a disease, and second, it enables to make hypothesis about the +a disease or a quantitative trait, and second, it enables us to make hypothesis about the susceptibility loci. %\section{Copyright} @@ -114,50 +124,49 @@ It contains three programs: \newchitree, \rechaplo and \label{fig:altree} \end{figure} -This program is copyright (c) by Claire Bardel and Vincent Danjean +This program is copyrighted (c) by Claire Bardel and Vincent Danjean and is distributed under the GNU General Public License. You are free to re-distribute it under the same license. This software comes with no warranty whatsoever. If you encounter any -problem, please, send a bug report to Claire Bardel at the foolowing -e-mail: bardel@vjf.inserm.fr +problem, please, send a bug report to Claire Bardel at the following +e-mail: Claire.Bardel@univ-lyon1.fr %The complete text of the GNU General Public %License can be found in the annexe~\ref{GPL} on page~\pageref{GPL}. -\section{\newchitree} -\subsection{Association test} -The test consists in performing series of nested homogeneity tests -(\chisquare) comparing the number of cases and controls in the -different clades defined on the tree. The nested algorithm is detailed +\subsection{\newchitree} +\subsubsection{Association test} +The test consists in performing series of nested tests at different level of a phylogenetic tree. These tests compare either the number of cases and controls (for case/control data) in the different groups (or clades) defined on the tree or the variance of the trait within each clade to the variance between these different clades (for quantitative data). The nested algorithm is detailed on Figure~\ref{fig:nesting_algo} (figure from \citet{Bardel05}, -slightly modified). A global p-value is calculated +slightly modified). Then, a global p-value is calculated for the tree by using a permutation procedure such as the one described by \citet{Ge03} and \citet{Becker04}. \begin{figure}[h] \begin{center} - \includegraphics[width=0.6\linewidth]{Analysis_Temp2.fig} + \includegraphics[width=0.8\linewidth]{altree-q.fig} \caption{Description of the nested clade analysis (without the permutation procedure)} \label{fig:nesting_algo} \end{center} \vspace{-0.4cm} - {\small (A) shows the homogeneity test performed at level k (between clades - $C_1$ and $C_2$). If it is not significant (B), a test will be + {\small (A) shows the homogeneity test or the ANOVA performed at level k (between clades + $C_1$ and $C_2$). Then (B), a test will be performed at the following level (k+1), between all the sub-clades descending from clades $C_1$ and $C_2$, i.e between clades $C_{1.1}$, - $C_{1.2}$, $C_{2.1}$ and $C_{2.2}$ (3 degree of freedom). If it is - significant the analysis ends because an association is detected. - When the permutation procedure is used, all the tests are considered - as non significant and the p-values are evaluated \textit{a posteriori}. + $C_{1.2}$, $C_{2.1}$ and $C_{2.2}$. +% If it is +% significant the analysis ends because an association is detected. +% When the permutation procedure is used, all the tests are considered +% as non significant and the p-values are evaluated \textit{a posteriori}. } \end{figure} -\subsection{Localisation of the susceptibility loci} +\subsubsection{Localisation of the susceptibility loci} To perform the localisation analysis, for each haplotype $h$, the user must previously define a new character (called character $S$) whose -state depends on the proportion of cases carrying haplotype $h$ and +state depends on the proportion of cases (resp. individual with high quantitative trait values) carrying haplotype $h$ and optimise it on the haplotype phylogeny. The program \newchitree then looks for sites that co-mutates with the character $S$ by calculating a co-mutation index called $V_{i}$ for each site $i$ and for each @@ -169,7 +178,7 @@ susceptibility site. described in~\citet{Bardel05}. Please refer to this article for a more complete description. -\section{\rechaplo} +\subsection{\rechaplo} Before running \newchitree, you will generally have to reconstruct haplotypes %(see section \ref{sec:require} for a description of usable %programs). @@ -188,17 +197,17 @@ programs: \phase~\citep{Stephens01, Stephens03} and reconstruction programs: \paup~\citep{Swofford02}, \phylip~\citep{Felsenstein04} and \paml~\citep{YangPAML}. -\section{\etHT}\label{description_etiquette} +\subsection{\etHT}\label{description_etiquette} \enlargethispage{1cm} To perform the localisation analysis, a new character $S$ must be added to each haplotype $h$. The state of $S$ depends on the -proportion of cases carrying the haplotype $h$. You can use your own +proportion of cases (resp. individual with high quantitative trait values) carrying the haplotype $h$. You can use your own criterion to determine the state of $S$ and add it manually to the input file of the phylogeny reconstruction program that will optimise the character states changes on the tree. If you do not want to add the character $S$ manually, you can use \etHT. -The state of the character $S$ is allocated depending on the +For case/control data, the state of the character $S$ is allocated depending on the proportion ($p_h$) of cases carrying the haplotype $h$ compared to the proportion $p_0$ of cases in the whole sample. @@ -207,21 +216,33 @@ proportion $p_0$ of cases in the whole sample. is coded ``C'' or ``0'' (high number of controls); \item if $p_h > p_0+\epsilon\sqrt{\frac{p_h\times(1-p_h)}{n_h}}$, $S$ is coded ``G'' or ``1'' (high number of cases); -\item else, $S$ is coded ``?'' (unknown data). +\item else, $S$ is coded ``?'' (undetermined). \end{itemize} with $n_h$ being the number of individuals carrying the haplotype $h$. -\section{Computation time} +For quantitative data, the state of ``S'' depends on the mean of the quantitative trait in a given branch of the +tree $\mu$, the mean of the quantitative trait in the whole data +set $\mu_0$ and the standard deviation of the quantitative trait in +the whole data set, $\sigma_0$. +\begin{itemize} +\item if $\mu> \mu_0 + \epsilon \times \displaystyle{\frac{\sigma_0}{\sqrt{n}}}\quad$ ``S'' is coded ``H'' (high level); +\item if $\mu< \mu_0 - \epsilon \times \displaystyle{\frac{\sigma_0}{\sqrt{n}}}\quad$ ``S'' is coded ``L'' (low level); + \item else, ``S'' is coded ``?'' (undetermined) +\end{itemize} +$n$ being the sample size, and $\epsilon$, an inflation coefficient to be chosen by the user. + + +\subsection{Computation time (in 2005)} We measured the computation time on a Pentium III, 930 MHz, 512 Mo of -RAM. We used the Crohn data set: 363 individuals genotyped for 7 SNPs -defining 33 different haplotypes. The reconstructed phylogenetic tree -possessed 6 levels. On this data set, the association test runs in -about 24 hours (p-value evaluated by 100~000 permutations, the complexity -of the program being linear with respect to the number of -permutations). The localisation test runs in about 10 seconds -(2~000 equiparsimonious trees analysed, the complexity of the program -being linear with respect to the number of analysed trees). +RAM. We used a data set of 363 individuals (cases and controls) +genotyped for 7 SNPs defining 33 different haplotypes. The +reconstructed phylogenetic tree had 6 levels. On this data set, the +association test runs in about 24 hours (p-value evaluated by 100~000 +permutations, the complexity of the program being linear with respect +to the number of permutations). The localisation test runs in about 10 +seconds (2~000 equiparsimonious trees analysed, the complexity of the +program being linear with respect to the number of analysed trees). In fact, for the association test, the computation time increases with the number of permutation and with the number of levels in the tree @@ -519,9 +540,10 @@ name (which must be preceded by -). \option{-{}-second-input-file|-j \ucode{file}}{Input file 2: nb cases/controls per haplotype} \option{-{}-output-file|-o \ucode{file}}{Output file} %-r & Haplotype reconstruction program\\ - \option{-{}-proportion|-p \ucode{number}}{proportion of cases in the sample} \option{-{}-epsilon|-e \ucode{number}}{$\epsilon$ parameter} \option{-{}-data-type|-t \ccode{DNA|SNP}}{data type: SNP or DNA} + \option{-{}-proportion|-p \ucode{number}}{proportion of cases in the sample} + \option{-{}-data-qual|q \ccode{qualitative|quantitative}}{data type: qualitative or quantitative} \option{-{}-outgroup|-g \ucode{outgroup\_name}} {Name of the outgroup (if necessary)} \option{-{}-low|-l}{forces the state of character $S$ to be ``?'' for haplotypes carried by 1 individual} @@ -538,11 +560,47 @@ name (which must be preceded by -). \section{How to get help?} See the same section for the program \rechaplo (page \pageref{help}). -\section{Input file (-i option)} -The name of the input file must be specified after the -i option. The -input file must be a valid \paup (nexus) or \phylip/\paml input -file. If it is a \paup file, make sure that the line following the -description of the last haplotype in the data block includes a semi colon only. +\section{Input files } +\subsection{The sequence file (-i option)} +The name of the input file containing the sequences must be specified +after the -i option. This input file must be a valid \paup (nexus) or +\phylip/\paml input file. If it is a \paup file, make sure that the +line following the description of the last haplotype in the data block +includes a semi colon only. + +\subsection{The trait file (-j option)} +The name of the file containing informations about the trait must be specified after the -j option. +\begin{itemize} +\item If your trait is quantitative, the file must contain haplotype labels followed by the quantitative values measured for the individuals carrying these haplotypes. or homozygous individuals, quantitative values must be repeated twice; +\item If your data are qualitative, the file must contain haplotype + labels folled by the number of cases and controls carrying this + haplotype separated by spaces or tabulations. The number of cases + should be preceded by a "m" (or the word "case", possibly followed + by a "$\_$"), the number of controls should be preceded by the letter + "c" (or the word "control", possibly followed by a "$\_$"). +\end{itemize} +Examples: +\begin{minipage}{0.45\linewidth} + \begin{center} + \begin{tabular}{ccc} + \multicolumn{3}{c}{Case/control data}\\ + H002 & m12 & c5 \\ + H019 & m2 & c6 \\ + H007 & m54 & c78 \\ + \end{tabular} + \end{center} +\end{minipage} +\begin{minipage}{0.45\linewidth} + \begin{center} + \begin{tabular}{ccccc} + \multicolumn{3}{c}{Quantitative data}\\ + H008 & 9.54 & 11.45\\ + H005 & 7.73 & 11.43 & 10.6 & 13.8\\ + H018 & 8.98 \\ + \end{tabular} + \end{center} +\end{minipage} + \section{Output file (-o option)} @@ -550,20 +608,22 @@ The name of the output file can be specified after the -o option. If the -o option is not present, the standard output is used. The output file is a \paup or \paml input file. The character $S$ is -coded ``G'' or ``1'' for cases and ``C'' or ``0'' for controls. In -the \paup input file generated, a new command is added, which excludes -the character $S$ from the tree reconstruction process, and includes it -in the table of apomorphies. If you want to use \paml, no such command -exists. We advise you to reconstruct the phylogeny on the data set -without the character $S$ by using your favorite phylogeny -reconstruction program. Then, you give that tree and the data-set with -the S-character to \paml to obtain the apomorphie list. +coded ``G'' or ``1'' for cases or high values of the quantitative +trait and ``C'' or ``0'' for controls or low values of the +quantitative trait. In the \paup input file generated, a new command +is added, which excludes the character $S$ from the tree +reconstruction process, and includes it in the table of +apomorphies. If you want to use \paml, no such command exists. We +advise you to reconstruct the phylogeny on the data set without the +character $S$ by using your favorite phylogeny reconstruction +program. Then, you give that tree and the data-set with the +S-character to \paml to obtain the apomorphie list. \section{Other options} -\subsection{Proportion of cases in the sample} -The proportion of cases in the sample must be specified after the -p option +\subsection{Proportion of cases in the sample (qualitative data only)} +The proportion of cases in the sample must be specified after the -p option. \subsection{The epsilon value} It corresponds to the parameter $\epsilon$ (see the description of the @@ -571,11 +631,13 @@ program in section \ref{description_etiquette}, page \pageref{description_etiquette}). If $\epsilon$ is high, haplotypes will more often have a character $S$ coded ``?''. To give an idea, in our article~\citep{Bardel05}, $\epsilon$ was set to 1. -\subsection{Data type} +\subsection{Data type: sequence} The -t option must be followed either by \cmd{SNP} or by \cmd{DNA}. \cmd{SNP} should be used if you have numerical data (characters coded from 0 to 9). \cmd{DNA} must be used if you have DNA data (A, T, G, C). +\subsection{Data type: trait} +The software can deal with qualitative data (case/control) or quantitative data. The -q (or --data-qual) option must be followed by either \cmd{qualitative} or \cmd{quantitative}, depending on your data. \subsection{Haplotypes carried by only 1 individual} The -l option is not mandatory: if it is present, $S$ is coded ``?'' @@ -608,6 +670,7 @@ some of them also have a short name (which must be preceded by -). \option{-{}-second-input-file|-j \ucode{file}}{nb cases/controls per haplotype} \option{-{}-output-file|-o \ucode{file}}{output file} \option{-{}-data-type|-t \ccode{DNA}|\ccode{SNP}}{type of data} +\option{-{}-data-qual|-q \hspace*{\fill}\ccode{qualitative|quantitative}}{data type: qualitative or quantitative} \option{-{}-remove-outgroup }{remove the outgroup sequence for the analysis} @@ -678,17 +741,19 @@ Moreover, the states at all nodes of the tree must appear in the output file, so The input file for \newchitree is the output file named ``rst'' by \paml. -\subsection{Second input file (option -{}-second-input-file or -j)} -This input file consists in lines containing the label of each +\subsection{The trait input file (option -{}-second-input-file or -j)} +If you analyze case/control data, this input file consists in lines containing the label of each haplotype followed by the number of cases and controls carrying it separated by spaces or tabulations. The number of cases should be preceded by a ``m``(or the word ``case'', possibly followed by a ``\_''), the number of controls should be preceded by the letter ``c'' -(or the word ``control'', possibly followed by a ``\_''). - +(or the word ``control'', possibly followed by a ``\_'').\\ Example of such files are given in the test directory. These files are always labeled \fn{nb\_cas\_control.txt}. +If your trait is quantitative, the file must contain haplotype labels followed by the quantitative values measured for the individuals carrying these haplotypes. or homozygous individuals, quantitative values must be repeated twice. + + \subsection{Output file (option -{}-output-file or -o)} You can choose the name of the output file by using the -{}-output-file or -o option. If this option is not specified, the standard output is used. @@ -696,13 +761,16 @@ You can choose the name of the output file by using the -{}-output-file or -o op After the option -p, you must specify which phylogeny reconstruction program (\paup, \phylip or \paml) was used to generate the first input file. -\subsection{Data type (option -{}-data-type or -t)} +\subsection{Data type: sequence (option -{}-data-type or -t)} The option -t must be followed either by \cmd{SNP} or by \cmd{DNA}. \cmd{SNP} should be used if you have numerical data (from 0 to 9). \cmd{DNA} must be used if you have DNA data (A, T, G, C). Warning: the DNA option currently does not work if you have reconstructed the phylogeny with phylip. +\subsection{Data type: trait (option -{}-data-qual or -q)} +The software can deal with qualitative data (case/control) or quantitative data. The -q (or {}-data-qual) option must be followed by either qualitative or quantitative, depending on your data. + \subsection{Print tree (option -{}-print-tree)} If this option is specified, the tree with the character state changes along the branches will be written in the output file. It may @@ -845,7 +913,7 @@ Be careful: this option is \emph{case sensitive} and the \emph{quotes are mandatory}. \subsubsection{ -{}-co-evo|e simple|double} -This option enables to choose how the $V_{i}$ are calculated. +This option enables the user to choose how the $V_{i}$ are calculated. \paragraph{option ''simple''} This option corresponds to the calculation of $V_{i}$ described in \citet{Bardel05}. Please refer to this publication for more information. diff --git a/Makefile.PL b/Makefile.PL index f894c9d4c1eba252839474d88d9bf765e173a46e..a1fda564a537756de1e5bce394d338d88cbbe60d 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -6,7 +6,7 @@ WriteMakefile( NAME => 'altree', PMLIBDIRS => [ 'ALTree' ], DIR => [ 'CUtils' ], - VERSION => '1.0.1', + VERSION => '1.1.0', PREREQ_PM => {}, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'altree', # retrieve abstract from module diff --git a/altree b/altree index 99354f619d9a8b5b1af4f997c05f7ddd39e60c15..7bb8a7e99615fccc522fc5507c8d231b58948dce 100755 --- a/altree +++ b/altree @@ -897,8 +897,10 @@ sub CalculateEit SetEit(($sitesens_per_tree->GetMit()*$s_t+ $sitesens_per_tree->GetSensRev()->GetMit()*$s_t_rev) /$b_t); - # print " m_it= ", $sitesens_per_tree->GetMit(),"\n"; - # print $site_per_tree->GetSiteNb()," ", $sitesens_per_tree->GetEit(), "\n"; + # print " m_it= ", $sitesens_per_tree->GetMit(),"\n"; + # print $site_per_tree->GetSiteNb()," E_i ", $sitesens_per_tree->GetEit(), "\n"; + # print " R_it= ", $sitesens_per_tree->GetRit(), "\n"; + # print " V_it= ", $sitesens_per_tree->GetVit(),"\n"; } } } @@ -1638,7 +1640,7 @@ sub main my $tree=$file_tree->{"tree"}; my ($b_t)=$tree->GetNbBrNonNulle(); - + # print "NB branches: $b_t\n"; my $site=$tree->GetSite($s_site_nb); if (not defined($site)) { erreur("Invalid value ($s_site_nb) for". @@ -1652,7 +1654,7 @@ sub main $all_files_foret->AddTree($tree); } $foret->CalculVi(); - #PrintAllVi($foret, $s_site_nb);# Non trié + #PrintAllVit($foret, $s_site_nb);# Non trié #PrintViMax($foret, $s_site_nb); # Affiche la liste en choisissant #pour chaque sit, juste le meilleur sens print "\n"; @@ -1801,8 +1803,6 @@ Type of data: DNA (ATGCU) or SNP (0-1) =item B<--data-qual|q> C<qualitative>|C<quantitative> Analyse qualitative (case/control) or quantitative data -It is only valid for the association test -The localization test cannot deal with quantitative data =item B<--outgroup> outgroup diff --git a/altree-add-S b/altree-add-S index 7083a97083fc5825a011875d0c630d9f463239c7..e8b459415b5198948b7d72500561ca797e91ac6b 100755 --- a/altree-add-S +++ b/altree-add-S @@ -497,13 +497,12 @@ altree-add-S [options] --help help message with options descriptions --man full documentation --first-input-file|i input file 1 - --second-input-file|j input file 2 nb cases/controls per haplotypes + --second-input-file|j input file 2 data concerning the trait (qualitative or quantitative) --output-file|o output file --epsilon|e epsilon value --data-type|t data type: SNP or DNA - --proportion|p proportion of cases in the sample - --ancestor|a name of ancestral haplotype - --data-qual|q qualitative or quantitative data + --proportion|p proportion of cases in the sample (for qualitative data only) + --data-qual|q data type: qualitative or quantitative [--outgroup|g] name of the outgroup --low|l if an haplotype is present equal or less than -l times, the state of S will be set to "?" @@ -534,7 +533,7 @@ Input file 1 (paup or phylip file) =item B<--second-input-file|j> -Input file 2, contains the number of times a given haplotypes is carried by case and control individuals +Input file 2, contains the number of times a given haplotypes is carried by case and control individuals (qualitative data) or the quantitative values correqsponding to a givent haplotype (quantitative data) =item B<--output-file|o> diff --git a/test/paup/ancestor_absent/association/1_caco.asso b/test/paup/ancestor_absent/association/1_caco.asso index 6ce818fba241fe975be65059b66cd34fa3d3ef0b..0b108f61c348f41ac389e59f4f6172c0ebbb7f83 100644 --- a/test/paup/ancestor_absent/association/1_caco.asso +++ b/test/paup/ancestor_absent/association/1_caco.asso @@ -1,48 +1,48 @@ - /----* H002 case/control:8/6 - | /----* H008 case/control:9/2 + /----* H002 (LEVEL: 4) case/control:8/6 + | /----* H008 (LEVEL: 5) case/control:9/2 | | Site: 11 Sens: 2-->1 - |----* 14 case/control:13/4 + |----* 14 (LEVEL: 4) case/control:13/4 | | Site: 9 Sens: 1-->2 - | \----* H014 case/control:4/2 + | \----* H014 (LEVEL: 5) case/control:4/2 | Site: 10 Sens: 1-->2 - /----* 15 case/control:57/25 + /----* 15 (LEVEL: 3) case/control:57/25 | | Site: 1 Sens: 2-->1 | | Site: 2 Sens: 2-->1 - | |----* H009 case/control:8/2 + | |----* H009 (LEVEL: 4) case/control:8/2 | | Site: 12 Sens: 2-->1 - | \----* H006 case/control:28/13 + | \----* H006 (LEVEL: 4) case/control:28/13 | Site: 11 Sens: 2-->1 - |----* H007 case/control:7/13 + |----* H007 (LEVEL: 3) case/control:7/13 | Site: 12 Sens: 2-->1 - /----* 16 case/control:70/43 + /----* 16 (LEVEL: 2) case/control:70/43 | | Site: 8 Sens: 2-->1 | | Site: 10 Sens: 2-->1 | | Site: 9 Sens: 2-->1 - | |----* H012 case/control:1/4 + | |----* H012 (LEVEL: 3) case/control:1/4 | | Site: 11 Sens: 2-->1 - | \----* H011 case/control:5/1 - /----* 17 case/control:72/47 + | \----* H011 (LEVEL: 3) case/control:5/1 + /----* 17 (LEVEL: 1) case/control:72/47 | | Site: 6 Sens: 2-->1 | | Site: 7 Sens: 2-->1 - | \----* H013 case/control:2/4 - |----* H001 case/control:11/9 + | \----* H013 (LEVEL: 2) case/control:2/4 + |----* H001 (LEVEL: 1) case/control:11/9 | Site: 1 Sens: 2-->1 | Site: 2 Sens: 2-->1 ------* 20+(19) case/control:102/74 +-----* 20+(19) (LEVEL: 0) case/control:102/74 | | [0] ddl=3 chi2=1.41 p_value_chi2=0.704 | [1] ddl=5 chi2=3.37 p_value_chi2=0.643 | [2] ddl=8 chi2=16.00 p_value_chi2=0.04 | [3] ddl=11 chi2=17.69 p_value_chi2=0.079 | [4] ddl=12 chi2=18.05 p_value_chi2=0.094 - | /----* H003 case/control:5/6 + | /----* H003 (LEVEL: 2) case/control:5/6 | | Site: 4 Sens: 2-->1 - |----* 18 case/control:12/13 + |----* 18 (LEVEL: 1) case/control:12/13 | | Site: 3 Sens: 2-->1 - | \----* H005 case/control:7/7 + | \----* H005 (LEVEL: 2) case/control:7/7 | Site: 12 Sens: 2-->1 - \----* H010 case/control:7/5 + \----* H010 (LEVEL: 1) case/control:7/5 Site: 5 Sens: 2-->1 Number of permutation: 1 diff --git a/test/paup/ancestor_absent/association/test.res.log b/test/paup/ancestor_absent/association/test.res.log index fec2757eabcf49018f302ed456071d98edc8361e..8f6bdd2a8297614fc96b032b071b23149a85734a 100644 --- a/test/paup/ancestor_absent/association/test.res.log +++ b/test/paup/ancestor_absent/association/test.res.log @@ -1,7 +1,7 @@ P A U P * Portable version 4.0b10 for Unix -Sat Apr 1 16:01:41 2006 +Wed Apr 23 21:53:16 2008 -----------------------------NOTICE----------------------------- This is a beta-test version. Please report any crashes, diff --git a/test/paup/ancestor_absent/association/test.tree b/test/paup/ancestor_absent/association/test.tree index 2da5b28c96b4930216a1e55f5e1d1169f01da4f5..02ff2a1e05f8f81c18c0941062713cb3ca0f8ba8 100644 --- a/test/paup/ancestor_absent/association/test.tree +++ b/test/paup/ancestor_absent/association/test.tree @@ -1,8 +1,8 @@ #NEXUS -Begin trees; [Treefile saved Sat Apr 1 16:01:41 2006] +Begin trees; [Treefile saved Wed Apr 23 21:53:16 2008] [! ->Data file = /home/vdanjean/travail/perso/Claire/svn/altree/test/paup/ancestor_absent/association/caco.paup +>Data file = /home/cbardel/recherche/logiciel/altree/test/paup/ancestor_absent/association/caco.paup >Heuristic search settings: > Optimality criterion = parsimony > Character-status summary: @@ -26,7 +26,7 @@ Begin trees; [Treefile saved Sat Apr 1 16:01:41 2006] > Total number of rearrangements tried = 69978 > Score of best tree(s) found = 20 > Number of trees retained = 89 -> Time used = 1 sec (CPU time = 0.04 sec) +> Time used = 1 sec (CPU time = 0.09 sec) ] tree PAUP_1 = [&R] (((((H002,(H008,H014),H009,H006),H007,H012,H011),H013),H001,(H003,H005)),H010); tree PAUP_2 = [&R] (((((((H002,(H007,H012,H011),H009,H006),H008),H014),H013),H001),(H003,H005)),H010);