From aa6ec3caf0f5cef8e8bd2f755bb6178a0d2e5624 Mon Sep 17 00:00:00 2001
From: Vincent Danjean <Vincent.Danjean@ens-lyon.org>
Date: Wed, 31 Oct 2012 15:46:55 +0000
Subject: [PATCH] Rewrite CalculChi2 in C

git-svn-id: svn+ssh://imag/users/huron/danjean/svnroot/claire/altree/trunk@460 cf695345-040a-0410-a956-b889e835fe2e
---
 ALTree/to_rewrite.pm     | 103 ++-------------------------------------
 CUtils/CUtils.xs         |  62 +++++++++++++++++++++++
 CUtils/c_sources/stats.c |  98 +++++++++++++++++++++++++++++++++++++
 CUtils/c_sources/stats.h |  10 ++++
 4 files changed, 173 insertions(+), 100 deletions(-)

diff --git a/ALTree/to_rewrite.pm b/ALTree/to_rewrite.pm
index 8e57c86..c2de94f 100644
--- a/ALTree/to_rewrite.pm
+++ b/ALTree/to_rewrite.pm
@@ -148,106 +148,9 @@ sub CalculChi2
     my($ddl)=shift; 
     my($test_results)=shift;
     my($sign_util)=shift;
-    my($chi2, $chi2invalid, $error, $sum_control, $sum_case);
-    my($significatif)=ALTree::Chi2::NON_SIGNIFICATIF;
-    my($p_value);
-    
-    ($chi2, $chi2invalid, $error, $sum_control, $sum_case)= 
-	ALTree::CUtils::ClassicalChi2($tabnodes_a_traiter);
-    if ($error != 0) {
-	# TODO: A vérifier : est-ce OK de mettre $significatif à 0
-	# la valeur est utilisée au retour de cette fonction
-#	$significatif=0;
-	if ($error == 1) {
-	    $test_results->{"texte"}=
-		"No cases,  ($sum_control controls)";
-	    if ($sign_util==SignUtil::YES) {
-		$significatif=ALTree::Chi2::NON_SIGNIFICATIF;
-	    }
-	} elsif ($error == 2) {
-	    $test_results->{"texte"}="No controls: only $sum_case cases";
-	    if ($sum_case>=Seuil::ONLY_CASE) {
-		 if ($sign_util==SignUtil::YES) {
-		     $significatif=1;
-		     $test_results->{"sign"}=ALTree::Chi2::SIGNIFICATIF;
-		 }
-	    } else {
-		if ($sign_util==SignUtil::YES) {
-		    $significatif=0;
-		    $test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF;
-		}	
-	    }
-	    #$test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF;
-	} elsif ($error == 4) {
-	    $test_results->{"texte"}="Only one clade";
-	    if ($sign_util==SignUtil::YES) {		
-		$significatif=0;
-		$test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF;
-	    }
-	    # Manque plein de trucs par rapport à la fonction dans chi2tree...
-	} else {
-	    die "invalid error $error\n";
-	}
-    } else {
-	if ($chi2invalid !=0) {
-	    $test_results->{"warning"}="Small sample size correction used";   
-	    # J'ai pas compté dans combien de branches...
-	    if ($ddl == 1) {
-		$p_value=ALTree::CUtils::bilateral($tabnodes_a_traiter->[0]->{"case"},
-					   $tabnodes_a_traiter->[0]->{"control"},
-					   $tabnodes_a_traiter->[1]->{"case"},
-					   $tabnodes_a_traiter->[1]->{"control"});
-		if ($sign_util==SignUtil::YES) {
-		    $significatif=ALTree::Chi2::chi2_fisher_significatif($p_value);
-		}
-	    } else {
-		my(@clades, $node);
-		foreach $node (@{$tabnodes_a_traiter}) {
-		    push @clades, ($node->{"case"} + $node->{"control"});
-		    # remplit un tableau contenant les effectifs
-		    # totaux des différents clades à utiliser dans le
-		    # réechantillonnage
-		}
-		($p_value)= ALTree::Chi2::reech_chi2($sum_case, $sum_control,
-					     $ddl+1, $chi2, \@clades);
-		$test_results->{"warning"}.=" ($p_value)";
-		if ($sign_util==SignUtil::YES) {
-		    $significatif= ALTree::Chi2::reech_significatif ($p_value);
-		    if ($significatif != 
-			ALTree::Chi2::chi2_significatif($ddl, $chi2)) {
-			$test_results->{"warning"}.=" Result has changed !";
-		    }
-		}
-	    }
-	} else {
-	    if ($sign_util==SignUtil::YES) {
-		$significatif=ALTree::Chi2::chi2_significatif($ddl, $chi2);
-	    }
-	    #my $p=`pochisq $chi2 $ddl`+0; # Verif que les 2 appellent 
-	                                   #bien la même chose!
-	    $p_value=1-gsl_cdf_chisq_P($chi2,$ddl);
-	    #if ($p != $p_value) {
-	    #print STDERR "pochisq: $p != $p_value !\n";
-	    #}
-	}
-	if ($sign_util==SignUtil::YES) {
-	    if ($significatif) {
-		$test_results->{"sign"}=ALTree::Chi2::SIGNIFICATIF;
-	    #$test_results->{"texte"}.="significatif";
-	    } else {
-		$test_results->{"sign"}=ALTree::Chi2::NON_SIGNIFICATIF;
-		# $test_results->{"texte"}.="non significatif";
-	    }
-	}
-	$test_results->{"chi2"}=$chi2;
-	$test_results->{"p_val"}=$p_value;
-	#$test_results->{"texte"}.=" [p_value_chi2=$p_value]";
-    }
-#    if ($sign_util==SignUtil::YES) {
-	return ($p_value, $significatif);
-#    } else {
-#	return ($p_value);
-#    }
+
+    return ALTree::CUtils::CalculChi2($tabnodes_a_traiter, $ddl,
+				      $test_results, $sign_util);
 }
 
 sub CalculAnovaOneWay
diff --git a/CUtils/CUtils.xs b/CUtils/CUtils.xs
index dcc3c67..9823be0 100644
--- a/CUtils/CUtils.xs
+++ b/CUtils/CUtils.xs
@@ -169,6 +169,68 @@ ClassicalChi2(tabnodes)
 
 	free(nodes);
 
+void
+CalculChi2(tabnodes, ddl, test_results, sign_util)
+	AV * tabnodes
+        int ddl
+        HV * test_results
+        int sign_util
+    INIT:
+	struct cc *nodes;
+	int nb_nodes;
+	struct calcul_chi2_res res;
+	int i;
+    PPCODE:
+	nb_nodes=ddl+1;
+	if (av_len(tabnodes) != ddl)
+	{
+	  XSRETURN_UNDEF;
+	}
+	nodes=(struct cc*)malloc(nb_nodes*sizeof(struct cc));
+
+	for (i=0; i<nb_nodes; i++) {
+	  SV* ref=*av_fetch(tabnodes, i, 0);
+	  if (!SvROK(ref)) { return XSRETURN_UNDEF; }
+	  if (SvTYPE(SvRV(ref)) != SVt_PVHV) { return XSRETURN_UNDEF; }
+	  HV* hash=(HV*)SvRV(ref);
+	  SV** svp;
+	  svp=hv_fetch(hash, "case", 4, 0);
+	  if (!svp) { return XSRETURN_UNDEF; }
+	  nodes[i].cases=SvNV(*svp);
+	  svp=hv_fetch(hash, "control", 7, 0);
+	  if (!svp) { return XSRETURN_UNDEF; }
+	  nodes[i].controls=SvNV(*svp);
+	}
+
+	res=calcul_chi2(nb_nodes, nodes, sign_util, 1);
+
+	if (res.texte) {
+	  hv_store(test_results, "texte", 5,
+		   newSVpv(res.texte, 0),0);
+	  free(res.texte);
+	}
+	if (sign_util) {
+	  hv_store(test_results, "sign", 4,
+		   newSViv(res.significatif),0);
+	}
+	if (res.warning) {
+	  hv_store(test_results, "warning", 7,
+		   newSVpv(res.warning, 0),0);
+	  free(res.warning);
+	}
+	if (res.error==0) {
+	  hv_store(test_results, "chi2", 4,
+		   newSVnv(res.chi2),0);
+	  hv_store(test_results, "p_val", 5,
+		   newSVnv(res.p_val),0);
+	}
+
+	free(nodes);
+
+	EXTEND(SP, 2);
+	PUSHs(sv_2mortal(newSVnv(res.p_val)));
+	PUSHs(sv_2mortal(newSViv(res.significatif)));
+
 void
 DefinitionPChi2(p, pprop)
 	SV* p
diff --git a/CUtils/c_sources/stats.c b/CUtils/c_sources/stats.c
index f35fb3f..4b3c4dd 100644
--- a/CUtils/c_sources/stats.c
+++ b/CUtils/c_sources/stats.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <gsl/gsl_cdf.h>
 #include <strings.h>
+#include <string.h>
 
 struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes) {
 	struct classical_chi2_res res={
@@ -60,6 +61,103 @@ struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes) {
 	return res;
 }
 
+#define msprintf(str,...) \
+	({						  \
+		char* ch=NULL;						\
+		size_t len=1+snprintf(ch, 0, str, ##__VA_ARGS__);	\
+		ch=(char*)malloc(len);					\
+		snprintf(ch, len, str, ##__VA_ARGS__);			\
+		ch;							\
+	})
+
+#define msprintfcat(dest, str,...)					\
+	({								\
+		char* ch=NULL;						\
+		size_t dlen=((dest)?strlen(dest):0);			\
+		size_t slen=snprintf(ch, 0, str, ##__VA_ARGS__);	\
+		ch=(char*)realloc(dest,dlen+slen+1);			\
+		snprintf(ch+dlen, slen+1, str, ##__VA_ARGS__);		\
+		ch;							\
+	})
+
+struct calcul_chi2_res calcul_chi2(int nb_nodes, struct cc *nodes,
+				   int sign_util, int texte)
+{
+	struct classical_chi2_res r=
+		classical_chi2(nb_nodes, nodes);
+
+	struct calcul_chi2_res res={
+		.error=r.error,
+		.significatif=0,
+		.texte=NULL,
+		.warning=NULL,
+	};
+
+	assert(!(sign_util && !texte));
+	if (res.error != 0) {
+		if (!texte) { return res; }
+
+		// TODO: A vérifier : est-ce OK de mettre $significatif à 0
+		// la valeur est utilisée au retour de cette fonction
+//		res.significatif=0;
+		switch (res.error) {
+		case 1:
+			res.texte=msprintf("No cases,  (%i controls)", r.sum_control);
+			break;
+		case 2:
+			res.texte=msprintf("No controls: only %i cases", r.sum_case);			
+			if (r.sum_case>=Seuil_ONLY_CASE) {
+				res.significatif=sign_util;
+			}
+			break;
+		case 4:
+			res.texte=msprintf("Only one clade");
+			break;
+			// Manque plein de trucs par rapport à la fonction dans chi2tree...
+		default:
+			fprintf(stderr, "invalid error %i\n", res.error);
+		}
+		return res;
+	}
+	datatype_t p_value;
+	int ddl=nb_nodes-1;
+	if (r.chi2invalid == 0) {
+		if (sign_util) {
+			res.significatif=chi2_significatif(ddl, r.chi2);
+		}
+		p_value=1-gsl_cdf_chisq_P(r.chi2, ddl);
+	} else {
+		if (texte) {
+			res.warning=msprintf("Small sample size correction used");
+		}
+		// J'ai pas compté dans combien de branches...
+		if (ddl == 1) {
+			p_value=bilateral(nodes[0].cases,
+					  nodes[0].controls,
+					  nodes[1].cases,
+					  nodes[1].controls);
+			if (sign_util) {
+				res.significatif=chi2_fisher_significatif(p_value);
+			}
+		} else {
+			p_value = reech_chi2(r.sum_case, r.sum_control,
+					     nb_nodes, r.chi2, nodes);
+			res.warning=msprintfcat(res.warning," (%.6g)",p_value);
+			if (sign_util) {
+				res.significatif = reech_significatif(p_value);
+				if (texte
+				    && res.significatif != chi2_significatif(ddl, r.chi2))
+				{
+					res.warning=msprintfcat(res.warning," Result has changed !");
+				}
+			}
+		}
+	}
+	res.chi2=r.chi2;
+	res.p_val=p_value;
+	return res;
+}
+
 static datatype_t *chi2_seuil=NULL;
 static int nb_seuils=0;
 
diff --git a/CUtils/c_sources/stats.h b/CUtils/c_sources/stats.h
index 6cce445..18e140e 100644
--- a/CUtils/c_sources/stats.h
+++ b/CUtils/c_sources/stats.h
@@ -20,7 +20,17 @@ struct classical_chi2_res {
 	int sum_case;
 };
 
+struct calcul_chi2_res {
+	datatype_t chi2;
+	datatype_t p_val;
+	int error;
+	int significatif;
+	char *texte;
+	char *warning;
+};
+
 struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes);
+struct calcul_chi2_res calcul_chi2(int nb_nodes, struct cc *nodes, int sign_util, int texte);
 void definition_p_chi2(datatype_t p, datatype_t pprop);
 int chi2_significatif(int ddl, datatype_t chi2);
 int chi2_fisher_significatif(datatype_t pvalue);
-- 
GitLab