From aa6ec3caf0f5cef8e8bd2f755bb6178a0d2e5624 Mon Sep 17 00:00:00 2001 From: Vincent Danjean <Vincent.Danjean@ens-lyon.org> Date: Wed, 31 Oct 2012 15:46:55 +0000 Subject: [PATCH] Rewrite CalculChi2 in C git-svn-id: svn+ssh://imag/users/huron/danjean/svnroot/claire/altree/trunk@460 cf695345-040a-0410-a956-b889e835fe2e --- ALTree/to_rewrite.pm | 103 ++------------------------------------- CUtils/CUtils.xs | 62 +++++++++++++++++++++++ CUtils/c_sources/stats.c | 98 +++++++++++++++++++++++++++++++++++++ CUtils/c_sources/stats.h | 10 ++++ 4 files changed, 173 insertions(+), 100 deletions(-) diff --git a/ALTree/to_rewrite.pm b/ALTree/to_rewrite.pm index 8e57c86..c2de94f 100644 --- a/ALTree/to_rewrite.pm +++ b/ALTree/to_rewrite.pm @@ -148,106 +148,9 @@ sub CalculChi2 my($ddl)=shift; my($test_results)=shift; my($sign_util)=shift; - my($chi2, $chi2invalid, $error, $sum_control, $sum_case); - my($significatif)=ALTree::Chi2::NON_SIGNIFICATIF; - my($p_value); - - ($chi2, $chi2invalid, $error, $sum_control, $sum_case)= - ALTree::CUtils::ClassicalChi2($tabnodes_a_traiter); - if ($error != 0) { - # TODO: A vérifier : est-ce OK de mettre $significatif à 0 - # la valeur est utilisée au retour de cette fonction -# $significatif=0; - if ($error == 1) { - $test_results->{"texte"}= - "No cases, ($sum_control controls)"; - if ($sign_util==SignUtil::YES) { - $significatif=ALTree::Chi2::NON_SIGNIFICATIF; - } - } elsif ($error == 2) { - $test_results->{"texte"}="No controls: only $sum_case cases"; - if ($sum_case>=Seuil::ONLY_CASE) { - if ($sign_util==SignUtil::YES) { - $significatif=1; - $test_results->{"sign"}=ALTree::Chi2::SIGNIFICATIF; - } - } else { - if ($sign_util==SignUtil::YES) { - $significatif=0; - $test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF; - } - } - #$test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF; - } elsif ($error == 4) { - $test_results->{"texte"}="Only one clade"; - if ($sign_util==SignUtil::YES) { - $significatif=0; - $test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF; - } - # Manque plein de trucs par rapport à la fonction dans chi2tree... - } else { - die "invalid error $error\n"; - } - } else { - if ($chi2invalid !=0) { - $test_results->{"warning"}="Small sample size correction used"; - # J'ai pas compté dans combien de branches... - if ($ddl == 1) { - $p_value=ALTree::CUtils::bilateral($tabnodes_a_traiter->[0]->{"case"}, - $tabnodes_a_traiter->[0]->{"control"}, - $tabnodes_a_traiter->[1]->{"case"}, - $tabnodes_a_traiter->[1]->{"control"}); - if ($sign_util==SignUtil::YES) { - $significatif=ALTree::Chi2::chi2_fisher_significatif($p_value); - } - } else { - my(@clades, $node); - foreach $node (@{$tabnodes_a_traiter}) { - push @clades, ($node->{"case"} + $node->{"control"}); - # remplit un tableau contenant les effectifs - # totaux des différents clades à utiliser dans le - # réechantillonnage - } - ($p_value)= ALTree::Chi2::reech_chi2($sum_case, $sum_control, - $ddl+1, $chi2, \@clades); - $test_results->{"warning"}.=" ($p_value)"; - if ($sign_util==SignUtil::YES) { - $significatif= ALTree::Chi2::reech_significatif ($p_value); - if ($significatif != - ALTree::Chi2::chi2_significatif($ddl, $chi2)) { - $test_results->{"warning"}.=" Result has changed !"; - } - } - } - } else { - if ($sign_util==SignUtil::YES) { - $significatif=ALTree::Chi2::chi2_significatif($ddl, $chi2); - } - #my $p=`pochisq $chi2 $ddl`+0; # Verif que les 2 appellent - #bien la même chose! - $p_value=1-gsl_cdf_chisq_P($chi2,$ddl); - #if ($p != $p_value) { - #print STDERR "pochisq: $p != $p_value !\n"; - #} - } - if ($sign_util==SignUtil::YES) { - if ($significatif) { - $test_results->{"sign"}=ALTree::Chi2::SIGNIFICATIF; - #$test_results->{"texte"}.="significatif"; - } else { - $test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF; - # $test_results->{"texte"}.="non significatif"; - } - } - $test_results->{"chi2"}=$chi2; - $test_results->{"p_val"}=$p_value; - #$test_results->{"texte"}.=" [p_value_chi2=$p_value]"; - } -# if ($sign_util==SignUtil::YES) { - return ($p_value, $significatif); -# } else { -# return ($p_value); -# } + + return ALTree::CUtils::CalculChi2($tabnodes_a_traiter, $ddl, + $test_results, $sign_util); } sub CalculAnovaOneWay diff --git a/CUtils/CUtils.xs b/CUtils/CUtils.xs index dcc3c67..9823be0 100644 --- a/CUtils/CUtils.xs +++ b/CUtils/CUtils.xs @@ -169,6 +169,68 @@ ClassicalChi2(tabnodes) free(nodes); +void +CalculChi2(tabnodes, ddl, test_results, sign_util) + AV * tabnodes + int ddl + HV * test_results + int sign_util + INIT: + struct cc *nodes; + int nb_nodes; + struct calcul_chi2_res res; + int i; + PPCODE: + nb_nodes=ddl+1; + if (av_len(tabnodes) != ddl) + { + XSRETURN_UNDEF; + } + nodes=(struct cc*)malloc(nb_nodes*sizeof(struct cc)); + + for (i=0; i<nb_nodes; i++) { + SV* ref=*av_fetch(tabnodes, i, 0); + if (!SvROK(ref)) { return XSRETURN_UNDEF; } + if (SvTYPE(SvRV(ref)) != SVt_PVHV) { return XSRETURN_UNDEF; } + HV* hash=(HV*)SvRV(ref); + SV** svp; + svp=hv_fetch(hash, "case", 4, 0); + if (!svp) { return XSRETURN_UNDEF; } + nodes[i].cases=SvNV(*svp); + svp=hv_fetch(hash, "control", 7, 0); + if (!svp) { return XSRETURN_UNDEF; } + nodes[i].controls=SvNV(*svp); + } + + res=calcul_chi2(nb_nodes, nodes, sign_util, 1); + + if (res.texte) { + hv_store(test_results, "texte", 5, + newSVpv(res.texte, 0),0); + free(res.texte); + } + if (sign_util) { + hv_store(test_results, "sign", 4, + newSViv(res.significatif),0); + } + if (res.warning) { + hv_store(test_results, "warning", 7, + newSVpv(res.warning, 0),0); + free(res.warning); + } + if (res.error==0) { + hv_store(test_results, "chi2", 4, + newSVnv(res.chi2),0); + hv_store(test_results, "p_val", 5, + newSVnv(res.p_val),0); + } + + free(nodes); + + EXTEND(SP, 2); + PUSHs(sv_2mortal(newSVnv(res.p_val))); + PUSHs(sv_2mortal(newSViv(res.significatif))); + void DefinitionPChi2(p, pprop) SV* p diff --git a/CUtils/c_sources/stats.c b/CUtils/c_sources/stats.c index f35fb3f..4b3c4dd 100644 --- a/CUtils/c_sources/stats.c +++ b/CUtils/c_sources/stats.c @@ -8,6 +8,7 @@ #include <stdlib.h> #include <gsl/gsl_cdf.h> #include <strings.h> +#include <string.h> struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes) { struct classical_chi2_res res={ @@ -60,6 +61,103 @@ struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes) { return res; } +#define msprintf(str,...) \ + ({ \ + char* ch=NULL; \ + size_t len=1+snprintf(ch, 0, str, ##__VA_ARGS__); \ + ch=(char*)malloc(len); \ + snprintf(ch, len, str, ##__VA_ARGS__); \ + ch; \ + }) + +#define msprintfcat(dest, str,...) \ + ({ \ + char* ch=NULL; \ + size_t dlen=((dest)?strlen(dest):0); \ + size_t slen=snprintf(ch, 0, str, ##__VA_ARGS__); \ + ch=(char*)realloc(dest,dlen+slen+1); \ + snprintf(ch+dlen, slen+1, str, ##__VA_ARGS__); \ + ch; \ + }) + +struct calcul_chi2_res calcul_chi2(int nb_nodes, struct cc *nodes, + int sign_util, int texte) +{ + struct classical_chi2_res r= + classical_chi2(nb_nodes, nodes); + + struct calcul_chi2_res res={ + .error=r.error, + .significatif=0, + .texte=NULL, + .warning=NULL, + }; + + assert(!(sign_util && !texte)); + if (res.error != 0) { + if (!texte) { return res; } + + // TODO: A vérifier : est-ce OK de mettre $significatif à 0 + // la valeur est utilisée au retour de cette fonction +// res.significatif=0; + switch (res.error) { + case 1: + res.texte=msprintf("No cases, (%i controls)", r.sum_control); + break; + case 2: + res.texte=msprintf("No controls: only %i cases", r.sum_case); + if (r.sum_case>=Seuil_ONLY_CASE) { + res.significatif=sign_util; + } + break; + case 4: + res.texte=msprintf("Only one clade"); + break; + // Manque plein de trucs par rapport à la fonction dans chi2tree... + default: + fprintf(stderr, "invalid error %i\n", res.error); + } + return res; + } + datatype_t p_value; + int ddl=nb_nodes-1; + if (r.chi2invalid == 0) { + if (sign_util) { + res.significatif=chi2_significatif(ddl, r.chi2); + } + p_value=1-gsl_cdf_chisq_P(r.chi2, ddl); + } else { + if (texte) { + res.warning=msprintf("Small sample size correction used"); + } + // J'ai pas compté dans combien de branches... + if (ddl == 1) { + p_value=bilateral(nodes[0].cases, + nodes[0].controls, + nodes[1].cases, + nodes[1].controls); + if (sign_util) { + res.significatif=chi2_fisher_significatif(p_value); + } + } else { + p_value = reech_chi2(r.sum_case, r.sum_control, + nb_nodes, r.chi2, nodes); + res.warning=msprintfcat(res.warning," (%.6g)",p_value); + if (sign_util) { + res.significatif = reech_significatif(p_value); + if (texte + && res.significatif != chi2_significatif(ddl, r.chi2)) + { + res.warning=msprintfcat(res.warning," Result has changed !"); + } + } + } + } + res.chi2=r.chi2; + res.p_val=p_value; + return res; +} + static datatype_t *chi2_seuil=NULL; static int nb_seuils=0; diff --git a/CUtils/c_sources/stats.h b/CUtils/c_sources/stats.h index 6cce445..18e140e 100644 --- a/CUtils/c_sources/stats.h +++ b/CUtils/c_sources/stats.h @@ -20,7 +20,17 @@ struct classical_chi2_res { int sum_case; }; +struct calcul_chi2_res { + datatype_t chi2; + datatype_t p_val; + int error; + int significatif; + char *texte; + char *warning; +}; + struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes); +struct calcul_chi2_res calcul_chi2(int nb_nodes, struct cc *nodes, int sign_util, int texte); void definition_p_chi2(datatype_t p, datatype_t pprop); int chi2_significatif(int ddl, datatype_t chi2); int chi2_fisher_significatif(datatype_t pvalue); -- GitLab