From de3bdcea102b4e5eaf6fc63fafc323706c7d259b Mon Sep 17 00:00:00 2001 From: Vincent Danjean <Vincent.Danjean@ens-lyon.org> Date: Wed, 31 Oct 2012 15:46:46 +0000 Subject: [PATCH] ClassicalChi2 as a C function with validation checks git-svn-id: svn+ssh://imag/users/huron/danjean/svnroot/claire/altree/trunk@456 cf695345-040a-0410-a956-b889e835fe2e --- CUtils/CUtils.xs | 42 ++++++++++++++++++++++++++++++ CUtils/c_sources/stats.c | 56 ++++++++++++++++++++++++++++++++++++++++ CUtils/c_sources/stats.h | 24 +++++++++++++++++ progs/altree | 17 ++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 CUtils/c_sources/stats.c create mode 100644 CUtils/c_sources/stats.h diff --git a/CUtils/CUtils.xs b/CUtils/CUtils.xs index 7eb90b2..a4308ab 100644 --- a/CUtils/CUtils.xs +++ b/CUtils/CUtils.xs @@ -7,6 +7,7 @@ #include "fisher.h" #include "chisq.h" #include "double_permutation.h" +#include "stats.h" #include "const-c.inc" @@ -107,3 +108,44 @@ DoublePermutation(nb_sample, nb_chi2, data) free_ensemble(ens); OUTPUT: RETVAL + +void +ClassicalChi2(tabnodes) + AV * tabnodes + INIT: + struct cc *nodes; + struct classical_chi2_res res; + int nb_nodes=0; + int i; + PPCODE: + nb_nodes=av_len(tabnodes)+1; + //fprintf(stderr, "\nSTART(%i, %i)\n", nb_leaves, nb_nodes); + if (nb_nodes < 1) + { + XSRETURN_UNDEF; + } + nodes=(struct cc*)malloc(nb_nodes*sizeof(struct cc)); + + for (i=0; i<nb_nodes; i++) { + SV* ref=*av_fetch(tabnodes, i, 0); + if (!SvROK(ref)) { return XSRETURN_UNDEF; } + if (SvTYPE(SvRV(ref)) != SVt_PVHV) { return XSRETURN_UNDEF; } + HV* hash=(HV*)SvRV(ref); + SV** svp; + svp=hv_fetch(hash, "case", 4, 0); + if (!svp) { return XSRETURN_UNDEF; } + nodes[i].cases=SvNV(*svp); + svp=hv_fetch(hash, "control", 7, 0); + if (!svp) { return XSRETURN_UNDEF; } + nodes[i].controls=SvNV(*svp); + } + + res=classical_chi2(nb_nodes, nodes); + + XPUSHs(sv_2mortal(newSVnv(res.chi2))); + XPUSHs(sv_2mortal(newSViv(res.chi2invalid))); + XPUSHs(sv_2mortal(newSViv(res.error))); + XPUSHs(sv_2mortal(newSViv(res.sum_control))); + XPUSHs(sv_2mortal(newSViv(res.sum_case))); + + free(nodes); diff --git a/CUtils/c_sources/stats.c b/CUtils/c_sources/stats.c new file mode 100644 index 0000000..4a4bbb7 --- /dev/null +++ b/CUtils/c_sources/stats.c @@ -0,0 +1,56 @@ + +#include "stats.h" +#include <stdio.h> +#include <stdlib.h> + +struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes) { + struct classical_chi2_res res={ + .chi2=0.0, + .chi2invalid=0, + .error=0, + .sum_control=0, + .sum_case=0, + }; + + int i; + for (i=0; i<nb_nodes; i++) { + res.sum_control+=nodes[i].controls; + res.sum_case+=nodes[i].cases; + } + int sum_total=res.sum_control+res.sum_case; + int ddl=nb_nodes-1; + + if (ddl==0) { // 1 seul clade + res.error=4; + return res; + } + if (res.sum_case==0) { + res.error=1; + return res; + } + if (res.sum_control==0) { + res.error=2; + return res; + } + + for(i=0; i<nb_nodes; i++) { + int m=nodes[i].cases; + int c=nodes[i].controls; + if (m==0 && c==0) { + fprintf(stderr, "no case and no control in a node!\n"); + exit(1); + } + datatype_t t_m=((double)((m+c)*res.sum_case))/sum_total; + res.chi2 += (m - t_m)*(m - t_m)/t_m; + + datatype_t t_c=((double)((m+c)*res.sum_control))/sum_total; + res.chi2 += (c - t_c)*(c - t_c)/t_c; + + if( t_m <= SAMPLESIZE + || t_c <= SAMPLESIZE ) { + res.chi2invalid++; + } + } + return res; +} + diff --git a/CUtils/c_sources/stats.h b/CUtils/c_sources/stats.h new file mode 100644 index 0000000..c8ac8d5 --- /dev/null +++ b/CUtils/c_sources/stats.h @@ -0,0 +1,24 @@ +#ifndef _STATS_H +#define _STATS_H + +#include "datatype.h" + +#define SAMPLESIZE 5 + +struct cc { + datatype_t cases; + datatype_t controls; +}; + +struct classical_chi2_res { + datatype_t chi2; + int chi2invalid; + int error; + int sum_control; + int sum_case; +}; + +struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes); + +#endif + diff --git a/progs/altree b/progs/altree index 44b34e9..1afc2ca 100755 --- a/progs/altree +++ b/progs/altree @@ -235,6 +235,23 @@ sub ClassicalChi2 } } } + my ($cchi2, $cchi2invalid, $cerror, $csum_control, $csum_case)= + ALTree::CUtils::ClassicalChi2($tabnodes_a_traiter); + + if ($cchi2 != $chi2) { print STDERR "chi2: $cchi2/$chi2\n"; } + if ($cchi2invalid != $chi2invalid) { + print STDERR "chi2invalid: $cchi2invalid/$chi2invalid\n"; + } + if ($cerror != $error) { + print STDERR "error: $cerror/$error\n"; + } + if ($csum_control != $sum_control) { + print STDERR "sum_control: $csum_control/$sum_control\n"; + } + if ($csum_case != $sum_case) { + print STDERR "sum_case: $csum_case/$sum_case\n"; + } + return ($chi2, $chi2invalid, $error, $sum_control, $sum_case); } -- GitLab