From 929cffd7419c335221c005857f5f988c7dc30ef1 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 13 Mar 2017 19:01:26 +0100
Subject: [PATCH] Recalibrate potrf with magma support enabled

---
 .../sampling/codelets/44/dpotrf.sirocco       | 146 +++++++++++++++++-
 .../sampling/codelets/44/spotrf.sirocco       | 146 +++++++++++++++++-
 2 files changed, 282 insertions(+), 10 deletions(-)

diff --git a/simucore/perfmodels/.starpu/sampling/codelets/44/dpotrf.sirocco b/simucore/perfmodels/.starpu/sampling/codelets/44/dpotrf.sirocco
index b37ae7cf5..c51248b76 100644
--- a/simucore/perfmodels/.starpu/sampling/codelets/44/dpotrf.sirocco
+++ b/simucore/perfmodels/.starpu/sampling/codelets/44/dpotrf.sirocco
@@ -5,7 +5,143 @@
 ####################
 # COMBs
 # number of combinations
+5
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	16588800       	0.000000e+00   	1.927652e+04   	3.502107e+03   	9.059964e+05   	1.804090e+10   	47
+617e5fe6	7372800        	0.000000e+00   	1.229853e+04   	1.768810e+03   	6.149267e+05   	7.719131e+09   	50
+ad30af9b	51200          	0.000000e+00   	2.101636e+03   	2.756395e+02   	1.303014e+05   	2.785568e+08   	62
+982013a8	1548800        	0.000000e+00   	4.903930e+03   	3.193701e+02   	2.550044e+05   	1.255828e+09   	52
+5104f3b7	29491200       	0.000000e+00   	2.463348e+04   	2.985247e+03   	1.576542e+06   	3.940607e+10   	64
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	16588800       	0.000000e+00   	1.940135e+04   	4.068762e+03   	6.984485e+05   	1.414682e+10   	36
+617e5fe6	7372800        	0.000000e+00   	1.194505e+04   	1.978674e+03   	3.822416e+05   	4.691180e+09   	32
+ad30af9b	51200          	0.000000e+00   	2.114340e+03   	2.957222e+02   	1.268604e+05   	2.734732e+08   	60
+982013a8	1548800        	0.000000e+00   	5.046035e+03   	5.465455e+02   	1.766112e+05   	9.016412e+08   	35
+5104f3b7	29491200       	0.000000e+00   	2.329330e+04   	2.356823e+03   	1.094785e+06   	2.576222e+10   	47
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
 1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb3)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	16588800       	0.000000e+00   	1.848606e+04   	2.220768e+03   	5.545817e+05   	1.039998e+10   	30
+617e5fe6	7372800        	0.000000e+00   	1.174012e+04   	1.793250e+03   	3.991641e+05   	4.795570e+09   	34
+ad30af9b	51200          	0.000000e+00   	2.130842e+03   	3.337398e+02   	1.576823e+05   	3.442383e+08   	74
+982013a8	1548800        	0.000000e+00   	5.078755e+03   	5.140761e+02   	2.437802e+05   	1.250785e+09   	48
+5104f3b7	29491200       	0.000000e+00   	2.451908e+04   	3.131104e+03   	5.884580e+05   	1.466374e+10   	24
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb0)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	16588800       	0.000000e+00   	1.807832e+04   	2.125912e+03   	7.412111e+05   	1.358515e+10   	41
+ad30af9b	51200          	0.000000e+00   	2.127387e+03   	2.904710e+02   	1.404075e+05   	3.042697e+08   	66
+617e5fe6	7372800        	0.000000e+00   	1.177248e+04   	1.259016e+03   	4.002642e+05   	4.765995e+09   	34
+982013a8	1548800        	0.000000e+00   	4.993474e+03   	5.010162e+02   	2.546672e+05   	1.284476e+09   	51
+5104f3b7	29491200       	0.000000e+00   	2.412058e+04   	2.891586e+03   	1.206029e+06   	2.950819e+10   	50
+
 ####################
 # COMB_4
 # number of types devices
@@ -34,9 +170,9 @@
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-617e5fe6	7372800        	0.000000e+00   	1.199994e+04   	1.417119e+03   	2.051989e+06   	2.496714e+10   	171
-ad30af9b	51200          	0.000000e+00   	4.029498e+01   	7.344934e+00   	1.938188e+04   	8.069416e+05   	481
-982013a8	1548800        	0.000000e+00   	1.778398e+03   	2.306933e+02   	5.015082e+05   	9.068890e+08   	282
-25ebb669	16588800       	0.000000e+00   	3.651073e+04   	5.293321e+03   	4.308266e+06   	1.606042e+11   	118
-5104f3b7	29491200       	0.000000e+00   	8.248482e+04   	9.428391e+03   	9.403269e+06   	7.857610e+11   	114
+25ebb669	16588800       	0.000000e+00   	3.521786e+04   	3.719259e+03   	2.183508e+06   	7.775611e+10   	62
+982013a8	1548800        	0.000000e+00   	1.965233e+03   	3.623952e+02   	7.271360e+04   	1.477584e+08   	37
+617e5fe6	7372800        	0.000000e+00   	1.125962e+04   	1.634952e+03   	5.855000e+05   	6.731505e+09   	52
+ad30af9b	51200          	0.000000e+00   	4.077398e+01   	4.776338e+00   	2.487213e+03   	1.028052e+05   	61
+5104f3b7	29491200       	0.000000e+00   	7.956441e+04   	8.548094e+03   	5.410380e+06   	4.354424e+11   	68
 
diff --git a/simucore/perfmodels/.starpu/sampling/codelets/44/spotrf.sirocco b/simucore/perfmodels/.starpu/sampling/codelets/44/spotrf.sirocco
index 04d00423c..d519d86de 100644
--- a/simucore/perfmodels/.starpu/sampling/codelets/44/spotrf.sirocco
+++ b/simucore/perfmodels/.starpu/sampling/codelets/44/spotrf.sirocco
@@ -5,7 +5,143 @@
 ####################
 # COMBs
 # number of combinations
+5
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb0)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+5104f3b7	14745600       	0.000000e+00   	1.725922e+04   	2.264939e+03   	1.052812e+06   	1.848364e+10   	61
+25ebb669	8294400        	0.000000e+00   	1.618055e+04   	2.196987e+03   	7.766664e+05   	1.279857e+10   	48
+982013a8	774400         	0.000000e+00   	3.928085e+03   	4.967651e+02   	1.414111e+05   	5.643587e+08   	36
+ad30af9b	25600          	0.000000e+00   	2.070017e+03   	2.556262e+02   	1.283410e+05   	2.697195e+08   	62
+617e5fe6	3686400        	0.000000e+00   	9.570246e+03   	1.453408e+03   	5.263635e+05   	5.153611e+09   	55
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+5104f3b7	14745600       	0.000000e+00   	1.689494e+04   	1.439976e+03   	1.081276e+06   	1.840081e+10   	64
+25ebb669	8294400        	0.000000e+00   	1.549399e+04   	1.742327e+03   	8.211813e+05   	1.288426e+10   	53
+982013a8	774400         	0.000000e+00   	3.925510e+03   	5.372277e+02   	1.334673e+05   	5.337401e+08   	34
+ad30af9b	25600          	0.000000e+00   	2.102179e+03   	3.123586e+02   	1.303351e+05   	2.800370e+08   	62
+617e5fe6	3686400        	0.000000e+00   	9.539420e+03   	1.257811e+03   	4.292739e+05   	4.166218e+09   	45
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
 1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb3)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+5104f3b7	14745600       	0.000000e+00   	1.679050e+04   	1.864821e+03   	5.037149e+05   	8.561949e+09   	30
+25ebb669	8294400        	0.000000e+00   	1.650986e+04   	2.615948e+03   	4.457661e+05   	7.544300e+09   	27
+982013a8	774400         	0.000000e+00   	4.072426e+03   	6.492225e+02   	1.384625e+05   	5.782087e+08   	34
+ad30af9b	25600          	0.000000e+00   	2.106632e+03   	2.976700e+02   	1.390377e+05   	2.987493e+08   	66
+617e5fe6	3686400        	0.000000e+00   	9.596093e+03   	1.252122e+03   	2.686906e+05   	2.622279e+09   	28
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb1)
+# number of entries
+5
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+5104f3b7	14745600       	0.000000e+00   	1.640619e+04   	1.490045e+03   	6.398413e+05   	1.058394e+10   	39
+25ebb669	8294400        	0.000000e+00   	1.538862e+04   	2.385658e+03   	6.155449e+05   	9.700043e+09   	40
+982013a8	774400         	0.000000e+00   	4.069845e+03   	5.836273e+02   	1.627938e+05   	6.761704e+08   	40
+ad30af9b	25600          	0.000000e+00   	2.134159e+03   	2.682985e+02   	1.323178e+05   	2.868503e+08   	62
+617e5fe6	3686400        	0.000000e+00   	9.181625e+03   	8.594056e+02   	3.580834e+05   	3.316592e+09   	39
+
 ####################
 # COMB_4
 # number of types devices
@@ -34,9 +170,9 @@
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-5104f3b7	14745600       	0.000000e+00   	4.483079e+04   	5.635100e+03   	1.026625e+07   	4.675159e+11   	229
-25ebb669	8294400        	0.000000e+00   	1.993450e+04   	2.800795e+03   	3.109782e+06   	6.321569e+10   	156
-617e5fe6	3686400        	0.000000e+00   	7.511506e+03   	7.944975e+02   	2.291009e+06   	1.740145e+10   	305
-982013a8	774400         	0.000000e+00   	1.048226e+03   	1.196694e+02   	2.452850e+05   	2.604652e+08   	234
-ad30af9b	25600          	0.000000e+00   	7.425993e+01   	7.891218e+00   	2.784747e+04   	2.091303e+06   	375
+5104f3b7	14745600       	0.000000e+00   	4.292375e+04   	3.070461e+03   	2.704196e+06   	1.166682e+11   	63
+25ebb669	8294400        	0.000000e+00   	1.889580e+04   	1.888554e+03   	1.473872e+06   	2.812818e+10   	78
+982013a8	774400         	0.000000e+00   	1.554382e+03   	1.710542e+02   	5.595775e+04   	8.803307e+07   	36
+ad30af9b	25600          	0.000000e+00   	7.450665e+01   	4.146608e+00   	4.097866e+03   	3.062640e+05   	55
+617e5fe6	3686400        	0.000000e+00   	6.920145e+03   	2.464678e+02   	2.214446e+05   	1.534373e+09   	32
 
-- 
GitLab