From de3bdcea102b4e5eaf6fc63fafc323706c7d259b Mon Sep 17 00:00:00 2001
From: Vincent Danjean <Vincent.Danjean@ens-lyon.org>
Date: Wed, 31 Oct 2012 15:46:46 +0000
Subject: [PATCH] ClassicalChi2 as a C function with validation checks

git-svn-id: svn+ssh://imag/users/huron/danjean/svnroot/claire/altree/trunk@456 cf695345-040a-0410-a956-b889e835fe2e
---
 CUtils/CUtils.xs         | 42 ++++++++++++++++++++++++++++++
 CUtils/c_sources/stats.c | 56 ++++++++++++++++++++++++++++++++++++++++
 CUtils/c_sources/stats.h | 24 +++++++++++++++++
 progs/altree             | 17 ++++++++++++
 4 files changed, 139 insertions(+)
 create mode 100644 CUtils/c_sources/stats.c
 create mode 100644 CUtils/c_sources/stats.h

diff --git a/CUtils/CUtils.xs b/CUtils/CUtils.xs
index 7eb90b2..a4308ab 100644
--- a/CUtils/CUtils.xs
+++ b/CUtils/CUtils.xs
@@ -7,6 +7,7 @@
 #include "fisher.h"
 #include "chisq.h"
 #include "double_permutation.h"
+#include "stats.h"
 
 #include "const-c.inc"
 
@@ -107,3 +108,44 @@ DoublePermutation(nb_sample, nb_chi2, data)
         free_ensemble(ens);
     OUTPUT:
         RETVAL
+
+void
+ClassicalChi2(tabnodes)
+	AV * tabnodes
+    INIT:
+	struct cc *nodes;
+	struct classical_chi2_res res;
+	int nb_nodes=0;
+	int i;
+    PPCODE:
+	nb_nodes=av_len(tabnodes)+1;
+	//fprintf(stderr, "\nSTART(%i, %i)\n", nb_leaves, nb_nodes);
+	if (nb_nodes < 1)
+	{
+	  XSRETURN_UNDEF;
+	}
+	nodes=(struct cc*)malloc(nb_nodes*sizeof(struct cc));
+
+	for (i=0; i<nb_nodes; i++) {
+	  SV* ref=*av_fetch(tabnodes, i, 0);
+	  if (!SvROK(ref)) { return XSRETURN_UNDEF; }
+	  if (SvTYPE(SvRV(ref)) != SVt_PVHV) { return XSRETURN_UNDEF; }
+	  HV* hash=(HV*)SvRV(ref);
+	  SV** svp;
+	  svp=hv_fetch(hash, "case", 4, 0);
+	  if (!svp) { return XSRETURN_UNDEF; }
+	  nodes[i].cases=SvNV(*svp);
+	  svp=hv_fetch(hash, "control", 7, 0);
+	  if (!svp) { return XSRETURN_UNDEF; }
+	  nodes[i].controls=SvNV(*svp);
+	}
+
+	res=classical_chi2(nb_nodes, nodes);
+
+	XPUSHs(sv_2mortal(newSVnv(res.chi2)));
+	XPUSHs(sv_2mortal(newSViv(res.chi2invalid)));
+	XPUSHs(sv_2mortal(newSViv(res.error)));
+	XPUSHs(sv_2mortal(newSViv(res.sum_control)));
+	XPUSHs(sv_2mortal(newSViv(res.sum_case)));
+
+	free(nodes);
diff --git a/CUtils/c_sources/stats.c b/CUtils/c_sources/stats.c
new file mode 100644
index 0000000..4a4bbb7
--- /dev/null
+++ b/CUtils/c_sources/stats.c
@@ -0,0 +1,56 @@
+
+#include "stats.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes) {
+	struct classical_chi2_res res={
+		.chi2=0.0,
+		.chi2invalid=0,
+		.error=0,
+		.sum_control=0,
+		.sum_case=0,
+	};
+
+	int i;
+	for (i=0; i<nb_nodes; i++) {
+		res.sum_control+=nodes[i].controls;
+		res.sum_case+=nodes[i].cases;
+	}
+	int sum_total=res.sum_control+res.sum_case;
+	int ddl=nb_nodes-1;
+
+	if (ddl==0) { // 1 seul clade
+		res.error=4;
+		return res;
+	}
+	if (res.sum_case==0) {
+		res.error=1;
+		return res;
+	}
+	if (res.sum_control==0) {
+		res.error=2;
+		return res;
+	}
+	
+	for(i=0; i<nb_nodes; i++) {
+		int m=nodes[i].cases;
+		int c=nodes[i].controls;
+		if (m==0 && c==0) {
+			fprintf(stderr, "no case and no control in a node!\n");
+			exit(1);
+		}
+		datatype_t t_m=((double)((m+c)*res.sum_case))/sum_total;
+		res.chi2 += (m - t_m)*(m - t_m)/t_m;
+
+		datatype_t t_c=((double)((m+c)*res.sum_control))/sum_total;
+		res.chi2 += (c - t_c)*(c - t_c)/t_c;
+
+		if( t_m <= SAMPLESIZE
+		    || t_c <= SAMPLESIZE ) {
+			res.chi2invalid++;
+		}
+	}
+	return res;
+}
+
diff --git a/CUtils/c_sources/stats.h b/CUtils/c_sources/stats.h
new file mode 100644
index 0000000..c8ac8d5
--- /dev/null
+++ b/CUtils/c_sources/stats.h
@@ -0,0 +1,24 @@
+#ifndef _STATS_H
+#define _STATS_H
+
+#include "datatype.h"
+
+#define SAMPLESIZE 5
+
+struct cc {
+	datatype_t cases;
+	datatype_t controls;
+};
+
+struct classical_chi2_res {
+	datatype_t chi2;
+	int chi2invalid;
+	int error;
+	int sum_control;
+	int sum_case;
+};
+
+struct classical_chi2_res classical_chi2(int nb_nodes, struct cc *nodes);
+
+#endif
+
diff --git a/progs/altree b/progs/altree
index 44b34e9..1afc2ca 100755
--- a/progs/altree
+++ b/progs/altree
@@ -235,6 +235,23 @@ sub ClassicalChi2
             }
         }
     }
+    my ($cchi2, $cchi2invalid, $cerror, $csum_control, $csum_case)=
+	ALTree::CUtils::ClassicalChi2($tabnodes_a_traiter);
+
+    if ($cchi2 != $chi2) { print STDERR "chi2: $cchi2/$chi2\n"; }
+    if ($cchi2invalid != $chi2invalid) {
+	print STDERR "chi2invalid: $cchi2invalid/$chi2invalid\n";
+    }
+    if ($cerror != $error) {
+	print STDERR "error: $cerror/$error\n";
+    }
+    if ($csum_control != $sum_control) {
+	print STDERR "sum_control: $csum_control/$sum_control\n";
+    }
+    if ($csum_case != $sum_case) {
+	print STDERR "sum_case: $csum_case/$sum_case\n";
+    }
+
     return ($chi2, $chi2invalid, $error, $sum_control, $sum_case);
 }
 
-- 
GitLab