diff --git a/doc/user/chapters/using.org b/doc/user/chapters/using.org index 2468c877a8e4cad46ddda81d89b4d21bbd70dd32..8dff2f7468c17053244f1aa0016ad80afb01b9f8 100644 --- a/doc/user/chapters/using.org +++ b/doc/user/chapters/using.org @@ -213,16 +213,28 @@ Simulation mode can be activated by setting the cmake option CHAMELEON_SIMULATION to ON. This mode allows you to simulate - execution of algorithms with StarPU compiled with [[http://simgrid.gforge.inria.fr/][SimGrid]]. To do - so, we provide some perfmodels in the simucore/perfmodels/ + execution of algorithms with StarPU compiled with + [[https://github.com/simgrid/simgrid][SimGrid]]. + To do so, we provide some perfmodels in the simucore/perfmodels/ directory of Chameleon sources. To use these perfmodels, please set your *STARPU_HOME* environment variable to ~path/to/your/chameleon_sources/simucore/perfmodels~. Finally, you need to set your *STARPU_HOSTNAME* environment variable to the name - of the machine to simulate. Note that only *potrf* algorithm, - simple and double precisions, on /mirage/ and /sirocco/ machines, with - or without gpus, and for the following tile sizes (choose a size *N* - multiple of the tile size) are available for now + of the machine to simulate. + + The algorithms available for now: gemm, symm, potrf, potrs, potri, posv, + getrf_nopiv, getrs_nopiv, geqrf, geqrf_hqr, gels, gels_hqr, simple and + double precisions on + [[https://plafrim-users.gitlabpages.inria.fr/doc/][PlaFRIM nodes with GPUs]]. + The tile size to use depending on the platform /i.e./ *STARPU_HOSTNAME* + (choose a size *N* multiple of the tile size): + - /sirocco-k40m/: 960 + - /sirocco-p100/: 1240 + - /sirocco-v100/: 1600 + - /sirocco-a100/: 1600 + - /sirocco-rtx8000/: 1600 + In addition the *potrf* algorithm is also available on /mirage/ and + /sirocco/ machines for the following tile sizes - /mirage/: 320, 960 - /sirocco/: 80, 440, 960, 1440, 1920 @@ -231,9 +243,16 @@ #+begin_example export STARPU_HOME=/tmp/chameleon/simucore/perfmodels/ export STARPU_HOSTNAME=sirocco - ./testing/chameleon_dtesting -o potrf -t 22 -g 2 -n 14400 -b 1440 - Id;Function;threads;gpus;P;Q;mtxfmt;nb;uplo;n;lda;seedA;time;gflops - 0;dpotrf;22;2;1;1;0;1440;121;14400;14400;846930886;7.814116e-01;1.273889e+03 + ./testing/chameleon_dtesting -o potrf -t 22 -g 2 -n 14400 -b 1440 --nowarmup + 0;dpotrf;22;2;1;1;0;1440;121;14400;14400;1804289383;0.000000e+00;7.867404e-01;1.265261e+03 + + export STARPU_HOSTNAME=sirocco-k40m + ./testing/chameleon_stesting -o gemm -t 38 -g 2 -n 64000 -b 1600 --nowarmup + 0;sgemm;38;2;1;1;0;1600;111;111;64000;64000;64000;64000;64000;64000;4.892778e-01;-1.846424e-01;1649760492;596516649;1189641421;0.000000e+00;2.010660e+01;2.607541e+04 + + export STARPU_HOSTNAME=sirocco-p100 + ./testing/chameleon_dtesting -o geqrf -g 2 -t 30 -b 1240 -n 39680 --nowarmup + 0;dgeqrf;30;2;1;1;0;1240;48;39680;39680;39680;4;1804289383;0.000000e+00;3.893336e+01;2.139677e+03 #+end_example **** Use out of core support with StarPU diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.affinity b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.affinity new file mode 100644 index 0000000000000000000000000000000000000000..98e7753edbf563110d3b316c3c8bf434205d1505 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.affinity @@ -0,0 +1,3 @@ +# GPU NUMA0 +0 0 +1 0 diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.bandwidth b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.bandwidth new file mode 100644 index 0000000000000000000000000000000000000000..cea795128c29ecda349683d4ed01db2d5fa9b405 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.bandwidth @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 1.974809e+04 1.824606e+04 nan nan nan nan nan +1.664433e+04 0.000000e+00 1.364439e+04 nan nan nan nan nan +1.611138e+04 1.581451e+04 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.config b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.config new file mode 100644 index 0000000000000000000000000000000000000000..7d77803bdb3e4c45b18d99f2996af3030b428375 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.config @@ -0,0 +1,7 @@ +# Current configuration +48 # Number of CPUs +1 # Number of NUMA nodes +2 # Number of CUDA devices +0 # Number of OpenCL devices +0 # Number of MIC devices +0 # Number of MPI devices diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.latency b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.latency new file mode 100644 index 0000000000000000000000000000000000000000..faf8a1bc64f21c630626e34bbc7c790d429c4d86 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.latency @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 1.032681e+01 1.069156e+01 nan nan nan nan nan +1.017813e+01 0.000000e+00 1.127391e+01 nan nan nan nan nan +1.055850e+01 1.313209e+01 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.platform.v4.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.platform.v4.xml new file mode 100644 index 0000000000000000000000000000000000000000..e229cc38f55a6165bec69dc8735a3e02a7acdfc3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.platform.v4.xml @@ -0,0 +1,165 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd'> + <platform version="4"> + <config id="General"> + <prop id="network/TCP-gamma" value="-1"></prop> + <prop id="network/latency-factor" value="1"></prop> + <prop id="network/bandwidth-factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight-S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" speed="1f"/> + <host id="CPU0" speed="2000000000f"/> + <host id="CPU1" speed="2000000000f"/> + <host id="CPU2" speed="2000000000f"/> + <host id="CPU3" speed="2000000000f"/> + <host id="CPU4" speed="2000000000f"/> + <host id="CPU5" speed="2000000000f"/> + <host id="CPU6" speed="2000000000f"/> + <host id="CPU7" speed="2000000000f"/> + <host id="CPU8" speed="2000000000f"/> + <host id="CPU9" speed="2000000000f"/> + <host id="CPU10" speed="2000000000f"/> + <host id="CPU11" speed="2000000000f"/> + <host id="CPU12" speed="2000000000f"/> + <host id="CPU13" speed="2000000000f"/> + <host id="CPU14" speed="2000000000f"/> + <host id="CPU15" speed="2000000000f"/> + <host id="CPU16" speed="2000000000f"/> + <host id="CPU17" speed="2000000000f"/> + <host id="CPU18" speed="2000000000f"/> + <host id="CPU19" speed="2000000000f"/> + <host id="CPU20" speed="2000000000f"/> + <host id="CPU21" speed="2000000000f"/> + <host id="CPU22" speed="2000000000f"/> + <host id="CPU23" speed="2000000000f"/> + <host id="CPU24" speed="2000000000f"/> + <host id="CPU25" speed="2000000000f"/> + <host id="CPU26" speed="2000000000f"/> + <host id="CPU27" speed="2000000000f"/> + <host id="CPU28" speed="2000000000f"/> + <host id="CPU29" speed="2000000000f"/> + <host id="CPU30" speed="2000000000f"/> + <host id="CPU31" speed="2000000000f"/> + <host id="CPU32" speed="2000000000f"/> + <host id="CPU33" speed="2000000000f"/> + <host id="CPU34" speed="2000000000f"/> + <host id="CPU35" speed="2000000000f"/> + <host id="CPU36" speed="2000000000f"/> + <host id="CPU37" speed="2000000000f"/> + <host id="CPU38" speed="2000000000f"/> + <host id="CPU39" speed="2000000000f"/> + <host id="CPU40" speed="2000000000f"/> + <host id="CPU41" speed="2000000000f"/> + <host id="CPU42" speed="2000000000f"/> + <host id="CPU43" speed="2000000000f"/> + <host id="CPU44" speed="2000000000f"/> + <host id="CPU45" speed="2000000000f"/> + <host id="CPU46" speed="2000000000f"/> + <host id="CPU47" speed="2000000000f"/> + <host id="CUDA0" speed="2000000000f"> + <prop id="memsize" value="42314694656"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" speed="2000000000f"> + <prop id="memsize" value="42314694656"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" speed="1f"/> + + <link id="Host" bandwidth="19748087777.410324Bps" latency="0.000000s"/> + + <link id="RAM-CUDA0" bandwidth="19748087777.410324Bps" latency="0.000010s"/> + <link id="CUDA0-RAM" bandwidth="16644329493.795069Bps" latency="0.000010s"/> + <link id="RAM-CUDA1" bandwidth="18246056939.720322Bps" latency="0.000011s"/> + <link id="CUDA1-RAM" bandwidth="16111382651.350138Bps" latency="0.000011s"/> + + <link id="CUDA0-CUDA1" bandwidth="13644393704.350653Bps" latency="0.000011s"/> + <link id="CUDA1-CUDA0" bandwidth="15814512028.119701Bps" latency="0.000013s"/> + + <link id="PCI:0000:[20-23] up" bandwidth="16644329493.795069Bps" latency="0.000000s"/> + <link id="PCI:0000:[20-23] down" bandwidth="19748087777.410324Bps" latency="0.000000s"/> + <link id="PCI:0000:[20-23] through" bandwidth="39496175554.820648Bps" latency="0.000000s"/> + <link id="PCI:0000:[21-21] up" bandwidth="16644329493.795069Bps" latency="0.000000s"/> + <link id="PCI:0000:[21-21] down" bandwidth="19748087777.410324Bps" latency="0.000000s"/> + <link id="PCI:0000:[21-21] through" bandwidth="39496175554.820648Bps" latency="0.000000s"/> + <link id="PCI:0000:21:00.0 up" bandwidth="16644329493.795069Bps" latency="0.000000s"/> + <link id="PCI:0000:21:00.0 down" bandwidth="19748087777.410324Bps" latency="0.000000s"/> + <link id="PCI:0000:[e0-e4] up" bandwidth="16111382651.350138Bps" latency="0.000000s"/> + <link id="PCI:0000:[e0-e4] down" bandwidth="18246056939.720322Bps" latency="0.000000s"/> + <link id="PCI:0000:[e0-e4] through" bandwidth="36492113879.440643Bps" latency="0.000000s"/> + <link id="PCI:0000:[e2-e2] up" bandwidth="16111382651.350138Bps" latency="0.000000s"/> + <link id="PCI:0000:[e2-e2] down" bandwidth="18246056939.720322Bps" latency="0.000000s"/> + <link id="PCI:0000:[e2-e2] through" bandwidth="36492113879.440643Bps" latency="0.000000s"/> + <link id="PCI:0000:e2:00.0 up" bandwidth="16111382651.350138Bps" latency="0.000000s"/> + <link id="PCI:0000:e2:00.0 down" bandwidth="18246056939.720322Bps" latency="0.000000s"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:e2:00.0 down"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + <link_ctn id="PCI:0000:[e2-e2] down"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e0-e4] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[20-23] up"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[21-21] up"/> + <link_ctn id="PCI:0000:[21-21] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:21:00.0 up"/> + <link_ctn id="PCI:0000:[21-21] through"/> + <link_ctn id="PCI:0000:[21-21] up"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[20-23] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:21:00.0 down"/> + <link_ctn id="PCI:0000:[21-21] through"/> + <link_ctn id="PCI:0000:[21-21] down"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[20-23] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:21:00.0 down"/> + <link_ctn id="PCI:0000:[21-21] through"/> + <link_ctn id="PCI:0000:[21-21] down"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[20-23] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[e0-e4] up"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e2-e2] up"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:e2:00.0 up"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + <link_ctn id="PCI:0000:[e2-e2] up"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e0-e4] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:e2:00.0 down"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + <link_ctn id="PCI:0000:[e2-e2] down"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e0-e4] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.platform.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.platform.xml new file mode 100644 index 0000000000000000000000000000000000000000..e2851a06e18d77f733d5d49ffbb4e3c7799160a0 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-a100.platform.xml @@ -0,0 +1,165 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'> + <platform version="3"> + <config id="General"> + <prop id="network/TCP_gamma" value="-1"></prop> + <prop id="network/latency_factor" value="1"></prop> + <prop id="network/bandwidth_factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight_S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" power="1"/> + <host id="CPU0" power="2000000000"/> + <host id="CPU1" power="2000000000"/> + <host id="CPU2" power="2000000000"/> + <host id="CPU3" power="2000000000"/> + <host id="CPU4" power="2000000000"/> + <host id="CPU5" power="2000000000"/> + <host id="CPU6" power="2000000000"/> + <host id="CPU7" power="2000000000"/> + <host id="CPU8" power="2000000000"/> + <host id="CPU9" power="2000000000"/> + <host id="CPU10" power="2000000000"/> + <host id="CPU11" power="2000000000"/> + <host id="CPU12" power="2000000000"/> + <host id="CPU13" power="2000000000"/> + <host id="CPU14" power="2000000000"/> + <host id="CPU15" power="2000000000"/> + <host id="CPU16" power="2000000000"/> + <host id="CPU17" power="2000000000"/> + <host id="CPU18" power="2000000000"/> + <host id="CPU19" power="2000000000"/> + <host id="CPU20" power="2000000000"/> + <host id="CPU21" power="2000000000"/> + <host id="CPU22" power="2000000000"/> + <host id="CPU23" power="2000000000"/> + <host id="CPU24" power="2000000000"/> + <host id="CPU25" power="2000000000"/> + <host id="CPU26" power="2000000000"/> + <host id="CPU27" power="2000000000"/> + <host id="CPU28" power="2000000000"/> + <host id="CPU29" power="2000000000"/> + <host id="CPU30" power="2000000000"/> + <host id="CPU31" power="2000000000"/> + <host id="CPU32" power="2000000000"/> + <host id="CPU33" power="2000000000"/> + <host id="CPU34" power="2000000000"/> + <host id="CPU35" power="2000000000"/> + <host id="CPU36" power="2000000000"/> + <host id="CPU37" power="2000000000"/> + <host id="CPU38" power="2000000000"/> + <host id="CPU39" power="2000000000"/> + <host id="CPU40" power="2000000000"/> + <host id="CPU41" power="2000000000"/> + <host id="CPU42" power="2000000000"/> + <host id="CPU43" power="2000000000"/> + <host id="CPU44" power="2000000000"/> + <host id="CPU45" power="2000000000"/> + <host id="CPU46" power="2000000000"/> + <host id="CPU47" power="2000000000"/> + <host id="CUDA0" power="2000000000"> + <prop id="memsize" value="42314694656"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" power="2000000000"> + <prop id="memsize" value="42314694656"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" power="1"/> + + <link id="Host" bandwidth="19748087777.410324" latency="0.000000"/> + + <link id="RAM-CUDA0" bandwidth="19748087777.410324" latency="0.000010"/> + <link id="CUDA0-RAM" bandwidth="16644329493.795069" latency="0.000010"/> + <link id="RAM-CUDA1" bandwidth="18246056939.720322" latency="0.000011"/> + <link id="CUDA1-RAM" bandwidth="16111382651.350138" latency="0.000011"/> + + <link id="CUDA0-CUDA1" bandwidth="13644393704.350653" latency="0.000011"/> + <link id="CUDA1-CUDA0" bandwidth="15814512028.119701" latency="0.000013"/> + + <link id="PCI:0000:[20-23] up" bandwidth="16644329493.795069" latency="0.000000"/> + <link id="PCI:0000:[20-23] down" bandwidth="19748087777.410324" latency="0.000000"/> + <link id="PCI:0000:[20-23] through" bandwidth="39496175554.820648" latency="0.000000"/> + <link id="PCI:0000:[21-21] up" bandwidth="16644329493.795069" latency="0.000000"/> + <link id="PCI:0000:[21-21] down" bandwidth="19748087777.410324" latency="0.000000"/> + <link id="PCI:0000:[21-21] through" bandwidth="39496175554.820648" latency="0.000000"/> + <link id="PCI:0000:21:00.0 up" bandwidth="16644329493.795069" latency="0.000000"/> + <link id="PCI:0000:21:00.0 down" bandwidth="19748087777.410324" latency="0.000000"/> + <link id="PCI:0000:[e0-e4] up" bandwidth="16111382651.350138" latency="0.000000"/> + <link id="PCI:0000:[e0-e4] down" bandwidth="18246056939.720322" latency="0.000000"/> + <link id="PCI:0000:[e0-e4] through" bandwidth="36492113879.440643" latency="0.000000"/> + <link id="PCI:0000:[e2-e2] up" bandwidth="16111382651.350138" latency="0.000000"/> + <link id="PCI:0000:[e2-e2] down" bandwidth="18246056939.720322" latency="0.000000"/> + <link id="PCI:0000:[e2-e2] through" bandwidth="36492113879.440643" latency="0.000000"/> + <link id="PCI:0000:e2:00.0 up" bandwidth="16111382651.350138" latency="0.000000"/> + <link id="PCI:0000:e2:00.0 down" bandwidth="18246056939.720322" latency="0.000000"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:e2:00.0 down"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + <link_ctn id="PCI:0000:[e2-e2] down"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e0-e4] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[20-23] up"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[21-21] up"/> + <link_ctn id="PCI:0000:[21-21] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:21:00.0 up"/> + <link_ctn id="PCI:0000:[21-21] through"/> + <link_ctn id="PCI:0000:[21-21] up"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[20-23] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:21:00.0 down"/> + <link_ctn id="PCI:0000:[21-21] through"/> + <link_ctn id="PCI:0000:[21-21] down"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[20-23] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:21:00.0 down"/> + <link_ctn id="PCI:0000:[21-21] through"/> + <link_ctn id="PCI:0000:[21-21] down"/> + <link_ctn id="PCI:0000:[20-23] through"/> + <link_ctn id="PCI:0000:[20-23] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[e0-e4] up"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e2-e2] up"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:e2:00.0 up"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + <link_ctn id="PCI:0000:[e2-e2] up"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e0-e4] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:e2:00.0 down"/> + <link_ctn id="PCI:0000:[e2-e2] through"/> + <link_ctn id="PCI:0000:[e2-e2] down"/> + <link_ctn id="PCI:0000:[e0-e4] through"/> + <link_ctn id="PCI:0000:[e0-e4] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.affinity b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.affinity new file mode 100644 index 0000000000000000000000000000000000000000..a5ec004d6537c16a93f23e10e8f4beacd6d4b980 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.affinity @@ -0,0 +1,5 @@ +# GPU NUMA0 +0 0 +1 0 +2 0 +3 0 diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.bandwidth b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.bandwidth new file mode 100644 index 0000000000000000000000000000000000000000..ce22379cec84f66a4f41c3d673848a0d7dc0011f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.bandwidth @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 1.050427e+04 1.050922e+04 9.545512e+03 9.540121e+03 nan nan nan +1.049903e+04 0.000000e+00 1.025040e+04 8.920861e+03 8.701214e+03 nan nan nan +1.050928e+04 1.025019e+04 0.000000e+00 8.688583e+03 8.612009e+03 nan nan nan +8.461591e+03 8.157348e+03 8.178563e+03 0.000000e+00 1.025026e+04 nan nan nan +8.137461e+03 8.119668e+03 8.210409e+03 1.025185e+04 0.000000e+00 nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.config b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.config new file mode 100644 index 0000000000000000000000000000000000000000..66640bc7c74b3abe2a99858966bdb64ce6ee4c5f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.config @@ -0,0 +1,7 @@ +# Current configuration +24 # Number of CPUs +1 # Number of NUMA nodes +4 # Number of CUDA devices +0 # Number of OpenCL devices +0 # Number of MIC devices +0 # Number of MPI devices diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.latency b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.latency new file mode 100644 index 0000000000000000000000000000000000000000..561ef0560b676b2a07a0db4a541a9f1fed47ee0c --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.latency @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 7.639062e+00 7.923062e+00 9.443469e+00 8.946469e+00 nan nan nan +7.467156e+00 0.000000e+00 1.015213e+01 1.628694e+01 1.647028e+01 nan nan nan +7.343344e+00 8.747000e+00 0.000000e+00 1.641166e+01 1.675206e+01 nan nan nan +8.552656e+00 1.496706e+01 1.628628e+01 0.000000e+00 1.010944e+01 nan nan nan +8.346437e+00 1.477387e+01 1.567300e+01 1.030456e+01 0.000000e+00 nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.platform.v4.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.platform.v4.xml new file mode 100644 index 0000000000000000000000000000000000000000..989d019a2926c2cba5ac627caf78c604fc5359cb --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.platform.v4.xml @@ -0,0 +1,329 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd'> + <platform version="4"> + <config id="General"> + <prop id="network/TCP-gamma" value="-1"></prop> + <prop id="network/latency-factor" value="1"></prop> + <prop id="network/bandwidth-factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight-S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" speed="1f"/> + <host id="CPU0" speed="2000000000f"/> + <host id="CPU1" speed="2000000000f"/> + <host id="CPU2" speed="2000000000f"/> + <host id="CPU3" speed="2000000000f"/> + <host id="CPU4" speed="2000000000f"/> + <host id="CPU5" speed="2000000000f"/> + <host id="CPU6" speed="2000000000f"/> + <host id="CPU7" speed="2000000000f"/> + <host id="CPU8" speed="2000000000f"/> + <host id="CPU9" speed="2000000000f"/> + <host id="CPU10" speed="2000000000f"/> + <host id="CPU11" speed="2000000000f"/> + <host id="CPU12" speed="2000000000f"/> + <host id="CPU13" speed="2000000000f"/> + <host id="CPU14" speed="2000000000f"/> + <host id="CPU15" speed="2000000000f"/> + <host id="CPU16" speed="2000000000f"/> + <host id="CPU17" speed="2000000000f"/> + <host id="CPU18" speed="2000000000f"/> + <host id="CPU19" speed="2000000000f"/> + <host id="CPU20" speed="2000000000f"/> + <host id="CPU21" speed="2000000000f"/> + <host id="CPU22" speed="2000000000f"/> + <host id="CPU23" speed="2000000000f"/> + <host id="CUDA0" speed="2000000000f"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" speed="2000000000f"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA2" speed="2000000000f"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA3" speed="2000000000f"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" speed="1f"/> + + <link id="Host" bandwidth="10509279953.268858Bps" latency="0.000000s"/> + + <link id="RAM-CUDA0" bandwidth="10504274501.875196Bps" latency="0.000008s"/> + <link id="CUDA0-RAM" bandwidth="10499029584.051048Bps" latency="0.000007s"/> + <link id="RAM-CUDA1" bandwidth="10509222043.470289Bps" latency="0.000008s"/> + <link id="CUDA1-RAM" bandwidth="10509279953.268860Bps" latency="0.000007s"/> + <link id="RAM-CUDA2" bandwidth="9545511884.828405Bps" latency="0.000009s"/> + <link id="CUDA2-RAM" bandwidth="8461590540.535644Bps" latency="0.000009s"/> + <link id="RAM-CUDA3" bandwidth="9540121030.162157Bps" latency="0.000009s"/> + <link id="CUDA3-RAM" bandwidth="8137460661.567282Bps" latency="0.000008s"/> + + <link id="CUDA0-CUDA1" bandwidth="10250399319.935379Bps" latency="0.000010s"/> + <link id="CUDA0-CUDA2" bandwidth="8920860699.502163Bps" latency="0.000016s"/> + <link id="CUDA0-CUDA3" bandwidth="8701213506.609842Bps" latency="0.000016s"/> + <link id="CUDA1-CUDA0" bandwidth="10250193144.225332Bps" latency="0.000009s"/> + <link id="CUDA1-CUDA2" bandwidth="8688583236.880905Bps" latency="0.000016s"/> + <link id="CUDA1-CUDA3" bandwidth="8612009376.706877Bps" latency="0.000017s"/> + <link id="CUDA2-CUDA0" bandwidth="8157348384.165317Bps" latency="0.000015s"/> + <link id="CUDA2-CUDA1" bandwidth="8178562577.756922Bps" latency="0.000016s"/> + <link id="CUDA2-CUDA3" bandwidth="10250262031.627239Bps" latency="0.000010s"/> + <link id="CUDA3-CUDA0" bandwidth="8119667915.266233Bps" latency="0.000015s"/> + <link id="CUDA3-CUDA1" bandwidth="8210408712.019968Bps" latency="0.000016s"/> + <link id="CUDA3-CUDA2" bandwidth="10251854919.590197Bps" latency="0.000010s"/> + + <link id="PCI:0000:[00-09] up" bandwidth="10509279953.268860Bps" latency="0.000000s"/> + <link id="PCI:0000:[00-09] down" bandwidth="10509222043.470289Bps" latency="0.000000s"/> + <link id="PCI:0000:[00-09] through" bandwidth="21018559906.537720Bps" latency="0.000000s"/> + <link id="PCI:0000:[03-03] up" bandwidth="10499029584.051048Bps" latency="0.000000s"/> + <link id="PCI:0000:[03-03] down" bandwidth="10504274501.875196Bps" latency="0.000000s"/> + <link id="PCI:0000:[03-03] through" bandwidth="21008549003.750393Bps" latency="0.000000s"/> + <link id="PCI:0000:03:00.0 up" bandwidth="10499029584.051048Bps" latency="0.000000s"/> + <link id="PCI:0000:03:00.0 down" bandwidth="10504274501.875196Bps" latency="0.000000s"/> + <link id="PCI:0000:[04-04] up" bandwidth="10509279953.268860Bps" latency="0.000000s"/> + <link id="PCI:0000:[04-04] down" bandwidth="10509222043.470289Bps" latency="0.000000s"/> + <link id="PCI:0000:[04-04] through" bandwidth="21018559906.537720Bps" latency="0.000000s"/> + <link id="PCI:0000:04:00.0 up" bandwidth="10509279953.268860Bps" latency="0.000000s"/> + <link id="PCI:0000:04:00.0 down" bandwidth="10509222043.470289Bps" latency="0.000000s"/> + <link id="PCI:0000:[80-83] up" bandwidth="8461590540.535644Bps" latency="0.000000s"/> + <link id="PCI:0000:[80-83] down" bandwidth="9545511884.828405Bps" latency="0.000000s"/> + <link id="PCI:0000:[80-83] through" bandwidth="20503709839.180393Bps" latency="0.000000s"/> + <link id="PCI:0000:[82-82] up" bandwidth="10250262031.627239Bps" latency="0.000000s"/> + <link id="PCI:0000:[82-82] down" bandwidth="10251854919.590197Bps" latency="0.000000s"/> + <link id="PCI:0000:[82-82] through" bandwidth="20503709839.180393Bps" latency="0.000000s"/> + <link id="PCI:0000:82:00.0 up" bandwidth="8461590540.535644Bps" latency="0.000000s"/> + <link id="PCI:0000:82:00.0 down" bandwidth="10251854919.590197Bps" latency="0.000000s"/> + <link id="PCI:0000:[83-83] up" bandwidth="10251854919.590197Bps" latency="0.000000s"/> + <link id="PCI:0000:[83-83] down" bandwidth="10250262031.627239Bps" latency="0.000000s"/> + <link id="PCI:0000:[83-83] through" bandwidth="20503709839.180393Bps" latency="0.000000s"/> + <link id="PCI:0000:83:00.0 up" bandwidth="8137460661.567282Bps" latency="0.000000s"/> + <link id="PCI:0000:83:00.0 down" bandwidth="10250262031.627239Bps" latency="0.000000s"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + </route> + <route src="CUDA0" dst="CUDA2" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + </route> + <route src="CUDA0" dst="CUDA3" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:03:00.0 up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA1" dst="CUDA2" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA1" dst="CUDA3" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:04:00.0 up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA2" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA2-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA2" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA2-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA2" dst="CUDA3" symmetrical="NO"> + <link_ctn id="CUDA2-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA2" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA2-RAM"/> + <link_ctn id="PCI:0000:82:00.0 up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA2" symmetrical="NO"> + <link_ctn id="RAM-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA3" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA3-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + </route> + <route src="CUDA3" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA3-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + </route> + <route src="CUDA3" dst="CUDA2" symmetrical="NO"> + <link_ctn id="CUDA3-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + </route> + <route src="CUDA3" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA3-RAM"/> + <link_ctn id="PCI:0000:83:00.0 up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA3" symmetrical="NO"> + <link_ctn id="RAM-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.platform.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.platform.xml new file mode 100644 index 0000000000000000000000000000000000000000..2d884e24e073cfc0a57fc1e6f8f541bb24f21557 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-k40m.platform.xml @@ -0,0 +1,329 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'> + <platform version="3"> + <config id="General"> + <prop id="network/TCP_gamma" value="-1"></prop> + <prop id="network/latency_factor" value="1"></prop> + <prop id="network/bandwidth_factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight_S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" power="1"/> + <host id="CPU0" power="2000000000"/> + <host id="CPU1" power="2000000000"/> + <host id="CPU2" power="2000000000"/> + <host id="CPU3" power="2000000000"/> + <host id="CPU4" power="2000000000"/> + <host id="CPU5" power="2000000000"/> + <host id="CPU6" power="2000000000"/> + <host id="CPU7" power="2000000000"/> + <host id="CPU8" power="2000000000"/> + <host id="CPU9" power="2000000000"/> + <host id="CPU10" power="2000000000"/> + <host id="CPU11" power="2000000000"/> + <host id="CPU12" power="2000000000"/> + <host id="CPU13" power="2000000000"/> + <host id="CPU14" power="2000000000"/> + <host id="CPU15" power="2000000000"/> + <host id="CPU16" power="2000000000"/> + <host id="CPU17" power="2000000000"/> + <host id="CPU18" power="2000000000"/> + <host id="CPU19" power="2000000000"/> + <host id="CPU20" power="2000000000"/> + <host id="CPU21" power="2000000000"/> + <host id="CPU22" power="2000000000"/> + <host id="CPU23" power="2000000000"/> + <host id="CUDA0" power="2000000000"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" power="2000000000"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA2" power="2000000000"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA3" power="2000000000"> + <prop id="memsize" value="11997020160"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" power="1"/> + + <link id="Host" bandwidth="10509279953.268858" latency="0.000000"/> + + <link id="RAM-CUDA0" bandwidth="10504274501.875196" latency="0.000008"/> + <link id="CUDA0-RAM" bandwidth="10499029584.051048" latency="0.000007"/> + <link id="RAM-CUDA1" bandwidth="10509222043.470289" latency="0.000008"/> + <link id="CUDA1-RAM" bandwidth="10509279953.268860" latency="0.000007"/> + <link id="RAM-CUDA2" bandwidth="9545511884.828405" latency="0.000009"/> + <link id="CUDA2-RAM" bandwidth="8461590540.535644" latency="0.000009"/> + <link id="RAM-CUDA3" bandwidth="9540121030.162157" latency="0.000009"/> + <link id="CUDA3-RAM" bandwidth="8137460661.567282" latency="0.000008"/> + + <link id="CUDA0-CUDA1" bandwidth="10250399319.935379" latency="0.000010"/> + <link id="CUDA0-CUDA2" bandwidth="8920860699.502163" latency="0.000016"/> + <link id="CUDA0-CUDA3" bandwidth="8701213506.609842" latency="0.000016"/> + <link id="CUDA1-CUDA0" bandwidth="10250193144.225332" latency="0.000009"/> + <link id="CUDA1-CUDA2" bandwidth="8688583236.880905" latency="0.000016"/> + <link id="CUDA1-CUDA3" bandwidth="8612009376.706877" latency="0.000017"/> + <link id="CUDA2-CUDA0" bandwidth="8157348384.165317" latency="0.000015"/> + <link id="CUDA2-CUDA1" bandwidth="8178562577.756922" latency="0.000016"/> + <link id="CUDA2-CUDA3" bandwidth="10250262031.627239" latency="0.000010"/> + <link id="CUDA3-CUDA0" bandwidth="8119667915.266233" latency="0.000015"/> + <link id="CUDA3-CUDA1" bandwidth="8210408712.019968" latency="0.000016"/> + <link id="CUDA3-CUDA2" bandwidth="10251854919.590197" latency="0.000010"/> + + <link id="PCI:0000:[00-09] up" bandwidth="10509279953.268860" latency="0.000000"/> + <link id="PCI:0000:[00-09] down" bandwidth="10509222043.470289" latency="0.000000"/> + <link id="PCI:0000:[00-09] through" bandwidth="21018559906.537720" latency="0.000000"/> + <link id="PCI:0000:[03-03] up" bandwidth="10499029584.051048" latency="0.000000"/> + <link id="PCI:0000:[03-03] down" bandwidth="10504274501.875196" latency="0.000000"/> + <link id="PCI:0000:[03-03] through" bandwidth="21008549003.750393" latency="0.000000"/> + <link id="PCI:0000:03:00.0 up" bandwidth="10499029584.051048" latency="0.000000"/> + <link id="PCI:0000:03:00.0 down" bandwidth="10504274501.875196" latency="0.000000"/> + <link id="PCI:0000:[04-04] up" bandwidth="10509279953.268860" latency="0.000000"/> + <link id="PCI:0000:[04-04] down" bandwidth="10509222043.470289" latency="0.000000"/> + <link id="PCI:0000:[04-04] through" bandwidth="21018559906.537720" latency="0.000000"/> + <link id="PCI:0000:04:00.0 up" bandwidth="10509279953.268860" latency="0.000000"/> + <link id="PCI:0000:04:00.0 down" bandwidth="10509222043.470289" latency="0.000000"/> + <link id="PCI:0000:[80-83] up" bandwidth="8461590540.535644" latency="0.000000"/> + <link id="PCI:0000:[80-83] down" bandwidth="9545511884.828405" latency="0.000000"/> + <link id="PCI:0000:[80-83] through" bandwidth="20503709839.180393" latency="0.000000"/> + <link id="PCI:0000:[82-82] up" bandwidth="10250262031.627239" latency="0.000000"/> + <link id="PCI:0000:[82-82] down" bandwidth="10251854919.590197" latency="0.000000"/> + <link id="PCI:0000:[82-82] through" bandwidth="20503709839.180393" latency="0.000000"/> + <link id="PCI:0000:82:00.0 up" bandwidth="8461590540.535644" latency="0.000000"/> + <link id="PCI:0000:82:00.0 down" bandwidth="10251854919.590197" latency="0.000000"/> + <link id="PCI:0000:[83-83] up" bandwidth="10251854919.590197" latency="0.000000"/> + <link id="PCI:0000:[83-83] down" bandwidth="10250262031.627239" latency="0.000000"/> + <link id="PCI:0000:[83-83] through" bandwidth="20503709839.180393" latency="0.000000"/> + <link id="PCI:0000:83:00.0 up" bandwidth="8137460661.567282" latency="0.000000"/> + <link id="PCI:0000:83:00.0 down" bandwidth="10250262031.627239" latency="0.000000"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + </route> + <route src="CUDA0" dst="CUDA2" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + </route> + <route src="CUDA0" dst="CUDA3" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:03:00.0 up"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA1" dst="CUDA2" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA1" dst="CUDA3" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:04:00.0 up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA2" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA2-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA2" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA2-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA2" dst="CUDA3" symmetrical="NO"> + <link_ctn id="CUDA2-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA2" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA2-RAM"/> + <link_ctn id="PCI:0000:82:00.0 up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA2" symmetrical="NO"> + <link_ctn id="RAM-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA3" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA3-CUDA0"/> + <link_ctn id="PCI:0000:03:00.0 down"/> + <link_ctn id="PCI:0000:[03-03] through"/> + <link_ctn id="PCI:0000:[03-03] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + </route> + <route src="CUDA3" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA3-CUDA1"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-09] through"/> + <link_ctn id="PCI:0000:[00-09] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + </route> + <route src="CUDA3" dst="CUDA2" symmetrical="NO"> + <link_ctn id="CUDA3-CUDA2"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + </route> + <route src="CUDA3" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA3-RAM"/> + <link_ctn id="PCI:0000:83:00.0 up"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] up"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA3" symmetrical="NO"> + <link_ctn id="RAM-CUDA3"/> + <link_ctn id="PCI:0000:83:00.0 down"/> + <link_ctn id="PCI:0000:[83-83] through"/> + <link_ctn id="PCI:0000:[83-83] down"/> + <link_ctn id="PCI:0000:[80-83] through"/> + <link_ctn id="PCI:0000:[80-83] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.affinity b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.affinity new file mode 100644 index 0000000000000000000000000000000000000000..98e7753edbf563110d3b316c3c8bf434205d1505 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.affinity @@ -0,0 +1,3 @@ +# GPU NUMA0 +0 0 +1 0 diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.bandwidth b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.bandwidth new file mode 100644 index 0000000000000000000000000000000000000000..e5360c98b8fbcf322b79ac8b906946e9775fea6e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.bandwidth @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 1.225954e+04 1.226987e+04 nan nan nan nan nan +1.317998e+04 0.000000e+00 1.151387e+04 nan nan nan nan nan +1.317174e+04 1.139902e+04 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.config b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.config new file mode 100644 index 0000000000000000000000000000000000000000..a72fe77c8f8783d6e8c8017505724c693c1a1459 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.config @@ -0,0 +1,7 @@ +# Current configuration +32 # Number of CPUs +1 # Number of NUMA nodes +2 # Number of CUDA devices +0 # Number of OpenCL devices +0 # Number of MIC devices +0 # Number of MPI devices diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.latency b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.latency new file mode 100644 index 0000000000000000000000000000000000000000..ff064eeda44dc9c0d961cd190f23894fc45c9f51 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.latency @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 7.772156e+00 8.277344e+00 nan nan nan nan nan +7.549594e+00 0.000000e+00 1.444169e+01 nan nan nan nan nan +7.778156e+00 1.422050e+01 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.platform.v4.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.platform.v4.xml new file mode 100644 index 0000000000000000000000000000000000000000..e41b4a6258347cb9502f7dff40f39ba16ebdf464 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.platform.v4.xml @@ -0,0 +1,149 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd'> + <platform version="4"> + <config id="General"> + <prop id="network/TCP-gamma" value="-1"></prop> + <prop id="network/latency-factor" value="1"></prop> + <prop id="network/bandwidth-factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight-S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" speed="1f"/> + <host id="CPU0" speed="2000000000f"/> + <host id="CPU1" speed="2000000000f"/> + <host id="CPU2" speed="2000000000f"/> + <host id="CPU3" speed="2000000000f"/> + <host id="CPU4" speed="2000000000f"/> + <host id="CPU5" speed="2000000000f"/> + <host id="CPU6" speed="2000000000f"/> + <host id="CPU7" speed="2000000000f"/> + <host id="CPU8" speed="2000000000f"/> + <host id="CPU9" speed="2000000000f"/> + <host id="CPU10" speed="2000000000f"/> + <host id="CPU11" speed="2000000000f"/> + <host id="CPU12" speed="2000000000f"/> + <host id="CPU13" speed="2000000000f"/> + <host id="CPU14" speed="2000000000f"/> + <host id="CPU15" speed="2000000000f"/> + <host id="CPU16" speed="2000000000f"/> + <host id="CPU17" speed="2000000000f"/> + <host id="CPU18" speed="2000000000f"/> + <host id="CPU19" speed="2000000000f"/> + <host id="CPU20" speed="2000000000f"/> + <host id="CPU21" speed="2000000000f"/> + <host id="CPU22" speed="2000000000f"/> + <host id="CPU23" speed="2000000000f"/> + <host id="CPU24" speed="2000000000f"/> + <host id="CPU25" speed="2000000000f"/> + <host id="CPU26" speed="2000000000f"/> + <host id="CPU27" speed="2000000000f"/> + <host id="CPU28" speed="2000000000f"/> + <host id="CPU29" speed="2000000000f"/> + <host id="CPU30" speed="2000000000f"/> + <host id="CPU31" speed="2000000000f"/> + <host id="CUDA0" speed="2000000000f"> + <prop id="memsize" value="17071800320"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" speed="2000000000f"> + <prop id="memsize" value="17071800320"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" speed="1f"/> + + <link id="Host" bandwidth="13179977868.515903Bps" latency="0.000000s"/> + + <link id="RAM-CUDA0" bandwidth="12259538653.064060Bps" latency="0.000008s"/> + <link id="CUDA0-RAM" bandwidth="13179977868.515903Bps" latency="0.000008s"/> + <link id="RAM-CUDA1" bandwidth="12269869477.991051Bps" latency="0.000008s"/> + <link id="CUDA1-RAM" bandwidth="13171735231.912804Bps" latency="0.000008s"/> + + <link id="CUDA0-CUDA1" bandwidth="11513869501.029203Bps" latency="0.000014s"/> + <link id="CUDA1-CUDA0" bandwidth="11399015261.961815Bps" latency="0.000014s"/> + + <link id="PCI:0000:[00-0b] up" bandwidth="13179977868.515903Bps" latency="0.000000s"/> + <link id="PCI:0000:[00-0b] down" bandwidth="12259538653.064060Bps" latency="0.000000s"/> + <link id="PCI:0000:[00-0b] through" bandwidth="26359955737.031807Bps" latency="0.000000s"/> + <link id="PCI:0000:[04-04] up" bandwidth="13179977868.515903Bps" latency="0.000000s"/> + <link id="PCI:0000:[04-04] down" bandwidth="12259538653.064060Bps" latency="0.000000s"/> + <link id="PCI:0000:[04-04] through" bandwidth="26359955737.031807Bps" latency="0.000000s"/> + <link id="PCI:0000:04:00.0 up" bandwidth="13179977868.515903Bps" latency="0.000000s"/> + <link id="PCI:0000:04:00.0 down" bandwidth="12259538653.064060Bps" latency="0.000000s"/> + <link id="PCI:0000:[80-84] up" bandwidth="13171735231.912804Bps" latency="0.000000s"/> + <link id="PCI:0000:[80-84] down" bandwidth="12269869477.991051Bps" latency="0.000000s"/> + <link id="PCI:0000:[80-84] through" bandwidth="26343470463.825607Bps" latency="0.000000s"/> + <link id="PCI:0000:[82-82] up" bandwidth="13171735231.912804Bps" latency="0.000000s"/> + <link id="PCI:0000:[82-82] down" bandwidth="12269869477.991051Bps" latency="0.000000s"/> + <link id="PCI:0000:[82-82] through" bandwidth="26343470463.825607Bps" latency="0.000000s"/> + <link id="PCI:0000:82:00.0 up" bandwidth="13171735231.912804Bps" latency="0.000000s"/> + <link id="PCI:0000:82:00.0 down" bandwidth="12269869477.991051Bps" latency="0.000000s"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[80-84] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-0b] up"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:04:00.0 up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[00-0b] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[00-0b] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[00-0b] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-84] up"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:82:00.0 up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[80-84] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[80-84] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.platform.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.platform.xml new file mode 100644 index 0000000000000000000000000000000000000000..f788433d57a3dba6518c61952959e4fd7f1ffd9f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-p100.platform.xml @@ -0,0 +1,149 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'> + <platform version="3"> + <config id="General"> + <prop id="network/TCP_gamma" value="-1"></prop> + <prop id="network/latency_factor" value="1"></prop> + <prop id="network/bandwidth_factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight_S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" power="1"/> + <host id="CPU0" power="2000000000"/> + <host id="CPU1" power="2000000000"/> + <host id="CPU2" power="2000000000"/> + <host id="CPU3" power="2000000000"/> + <host id="CPU4" power="2000000000"/> + <host id="CPU5" power="2000000000"/> + <host id="CPU6" power="2000000000"/> + <host id="CPU7" power="2000000000"/> + <host id="CPU8" power="2000000000"/> + <host id="CPU9" power="2000000000"/> + <host id="CPU10" power="2000000000"/> + <host id="CPU11" power="2000000000"/> + <host id="CPU12" power="2000000000"/> + <host id="CPU13" power="2000000000"/> + <host id="CPU14" power="2000000000"/> + <host id="CPU15" power="2000000000"/> + <host id="CPU16" power="2000000000"/> + <host id="CPU17" power="2000000000"/> + <host id="CPU18" power="2000000000"/> + <host id="CPU19" power="2000000000"/> + <host id="CPU20" power="2000000000"/> + <host id="CPU21" power="2000000000"/> + <host id="CPU22" power="2000000000"/> + <host id="CPU23" power="2000000000"/> + <host id="CPU24" power="2000000000"/> + <host id="CPU25" power="2000000000"/> + <host id="CPU26" power="2000000000"/> + <host id="CPU27" power="2000000000"/> + <host id="CPU28" power="2000000000"/> + <host id="CPU29" power="2000000000"/> + <host id="CPU30" power="2000000000"/> + <host id="CPU31" power="2000000000"/> + <host id="CUDA0" power="2000000000"> + <prop id="memsize" value="17071800320"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" power="2000000000"> + <prop id="memsize" value="17071800320"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" power="1"/> + + <link id="Host" bandwidth="13179977868.515903" latency="0.000000"/> + + <link id="RAM-CUDA0" bandwidth="12259538653.064060" latency="0.000008"/> + <link id="CUDA0-RAM" bandwidth="13179977868.515903" latency="0.000008"/> + <link id="RAM-CUDA1" bandwidth="12269869477.991051" latency="0.000008"/> + <link id="CUDA1-RAM" bandwidth="13171735231.912804" latency="0.000008"/> + + <link id="CUDA0-CUDA1" bandwidth="11513869501.029203" latency="0.000014"/> + <link id="CUDA1-CUDA0" bandwidth="11399015261.961815" latency="0.000014"/> + + <link id="PCI:0000:[00-0b] up" bandwidth="13179977868.515903" latency="0.000000"/> + <link id="PCI:0000:[00-0b] down" bandwidth="12259538653.064060" latency="0.000000"/> + <link id="PCI:0000:[00-0b] through" bandwidth="26359955737.031807" latency="0.000000"/> + <link id="PCI:0000:[04-04] up" bandwidth="13179977868.515903" latency="0.000000"/> + <link id="PCI:0000:[04-04] down" bandwidth="12259538653.064060" latency="0.000000"/> + <link id="PCI:0000:[04-04] through" bandwidth="26359955737.031807" latency="0.000000"/> + <link id="PCI:0000:04:00.0 up" bandwidth="13179977868.515903" latency="0.000000"/> + <link id="PCI:0000:04:00.0 down" bandwidth="12259538653.064060" latency="0.000000"/> + <link id="PCI:0000:[80-84] up" bandwidth="13171735231.912804" latency="0.000000"/> + <link id="PCI:0000:[80-84] down" bandwidth="12269869477.991051" latency="0.000000"/> + <link id="PCI:0000:[80-84] through" bandwidth="26343470463.825607" latency="0.000000"/> + <link id="PCI:0000:[82-82] up" bandwidth="13171735231.912804" latency="0.000000"/> + <link id="PCI:0000:[82-82] down" bandwidth="12269869477.991051" latency="0.000000"/> + <link id="PCI:0000:[82-82] through" bandwidth="26343470463.825607" latency="0.000000"/> + <link id="PCI:0000:82:00.0 up" bandwidth="13171735231.912804" latency="0.000000"/> + <link id="PCI:0000:82:00.0 down" bandwidth="12269869477.991051" latency="0.000000"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[80-84] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[00-0b] up"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:04:00.0 up"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] up"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[00-0b] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[00-0b] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:04:00.0 down"/> + <link_ctn id="PCI:0000:[04-04] through"/> + <link_ctn id="PCI:0000:[04-04] down"/> + <link_ctn id="PCI:0000:[00-0b] through"/> + <link_ctn id="PCI:0000:[00-0b] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[80-84] up"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:82:00.0 up"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] up"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[80-84] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:82:00.0 down"/> + <link_ctn id="PCI:0000:[82-82] through"/> + <link_ctn id="PCI:0000:[82-82] down"/> + <link_ctn id="PCI:0000:[80-84] through"/> + <link_ctn id="PCI:0000:[80-84] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.affinity b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.affinity new file mode 100644 index 0000000000000000000000000000000000000000..98e7753edbf563110d3b316c3c8bf434205d1505 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.affinity @@ -0,0 +1,3 @@ +# GPU NUMA0 +0 0 +1 0 diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.bandwidth b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.bandwidth new file mode 100644 index 0000000000000000000000000000000000000000..77c281e8ac44dbf4d6ff8e8114fca72914804886 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.bandwidth @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 1.219592e+04 1.218918e+04 nan nan nan nan nan +1.298221e+04 0.000000e+00 9.732451e+03 nan nan nan nan nan +1.297135e+04 1.021293e+04 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.config b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.config new file mode 100644 index 0000000000000000000000000000000000000000..591f5eef778926571cf3394a6b28226eab8938a0 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.config @@ -0,0 +1,7 @@ +# Current configuration +40 # Number of CPUs +1 # Number of NUMA nodes +2 # Number of CUDA devices +0 # Number of OpenCL devices +0 # Number of MIC devices +0 # Number of MPI devices diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.latency b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.latency new file mode 100644 index 0000000000000000000000000000000000000000..e1e6ee87a01393a9749771ddf3dc0bec109be12f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.latency @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 7.359437e+00 7.619937e+00 nan nan nan nan nan +7.278000e+00 0.000000e+00 8.228219e+00 nan nan nan nan nan +7.814937e+00 8.656250e+00 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.platform.v4.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.platform.v4.xml new file mode 100644 index 0000000000000000000000000000000000000000..05a121623f9bc049f4700bc20863804315817394 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.platform.v4.xml @@ -0,0 +1,157 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd'> + <platform version="4"> + <config id="General"> + <prop id="network/TCP-gamma" value="-1"></prop> + <prop id="network/latency-factor" value="1"></prop> + <prop id="network/bandwidth-factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight-S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" speed="1f"/> + <host id="CPU0" speed="2000000000f"/> + <host id="CPU1" speed="2000000000f"/> + <host id="CPU2" speed="2000000000f"/> + <host id="CPU3" speed="2000000000f"/> + <host id="CPU4" speed="2000000000f"/> + <host id="CPU5" speed="2000000000f"/> + <host id="CPU6" speed="2000000000f"/> + <host id="CPU7" speed="2000000000f"/> + <host id="CPU8" speed="2000000000f"/> + <host id="CPU9" speed="2000000000f"/> + <host id="CPU10" speed="2000000000f"/> + <host id="CPU11" speed="2000000000f"/> + <host id="CPU12" speed="2000000000f"/> + <host id="CPU13" speed="2000000000f"/> + <host id="CPU14" speed="2000000000f"/> + <host id="CPU15" speed="2000000000f"/> + <host id="CPU16" speed="2000000000f"/> + <host id="CPU17" speed="2000000000f"/> + <host id="CPU18" speed="2000000000f"/> + <host id="CPU19" speed="2000000000f"/> + <host id="CPU20" speed="2000000000f"/> + <host id="CPU21" speed="2000000000f"/> + <host id="CPU22" speed="2000000000f"/> + <host id="CPU23" speed="2000000000f"/> + <host id="CPU24" speed="2000000000f"/> + <host id="CPU25" speed="2000000000f"/> + <host id="CPU26" speed="2000000000f"/> + <host id="CPU27" speed="2000000000f"/> + <host id="CPU28" speed="2000000000f"/> + <host id="CPU29" speed="2000000000f"/> + <host id="CPU30" speed="2000000000f"/> + <host id="CPU31" speed="2000000000f"/> + <host id="CPU32" speed="2000000000f"/> + <host id="CPU33" speed="2000000000f"/> + <host id="CPU34" speed="2000000000f"/> + <host id="CPU35" speed="2000000000f"/> + <host id="CPU36" speed="2000000000f"/> + <host id="CPU37" speed="2000000000f"/> + <host id="CPU38" speed="2000000000f"/> + <host id="CPU39" speed="2000000000f"/> + <host id="CUDA0" speed="2000000000f"> + <prop id="memsize" value="47769059328"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" speed="2000000000f"> + <prop id="memsize" value="47769059328"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" speed="1f"/> + + <link id="Host" bandwidth="12982210409.627262Bps" latency="0.000000s"/> + + <link id="RAM-CUDA0" bandwidth="12195916559.100412Bps" latency="0.000007s"/> + <link id="CUDA0-RAM" bandwidth="12982210409.627260Bps" latency="0.000007s"/> + <link id="RAM-CUDA1" bandwidth="12189175076.438492Bps" latency="0.000008s"/> + <link id="CUDA1-RAM" bandwidth="12971346207.908758Bps" latency="0.000008s"/> + + <link id="CUDA0-CUDA1" bandwidth="9732451436.147791Bps" latency="0.000008s"/> + <link id="CUDA1-CUDA0" bandwidth="10212931896.883699Bps" latency="0.000009s"/> + + <link id="PCI:0000:[3a-3b] up" bandwidth="12982210409.627260Bps" latency="0.000000s"/> + <link id="PCI:0000:[3a-3b] down" bandwidth="12195916559.100412Bps" latency="0.000000s"/> + <link id="PCI:0000:[3a-3b] through" bandwidth="25964420819.254520Bps" latency="0.000000s"/> + <link id="PCI:0000:[3b-3b] up" bandwidth="12982210409.627260Bps" latency="0.000000s"/> + <link id="PCI:0000:[3b-3b] down" bandwidth="12195916559.100412Bps" latency="0.000000s"/> + <link id="PCI:0000:[3b-3b] through" bandwidth="25964420819.254520Bps" latency="0.000000s"/> + <link id="PCI:0000:3b:00.0 up" bandwidth="12982210409.627260Bps" latency="0.000000s"/> + <link id="PCI:0000:3b:00.0 down" bandwidth="12195916559.100412Bps" latency="0.000000s"/> + <link id="PCI:0000:[d7-d8] up" bandwidth="12971346207.908758Bps" latency="0.000000s"/> + <link id="PCI:0000:[d7-d8] down" bandwidth="12189175076.438492Bps" latency="0.000000s"/> + <link id="PCI:0000:[d7-d8] through" bandwidth="25942692415.817516Bps" latency="0.000000s"/> + <link id="PCI:0000:[d8-d8] up" bandwidth="12971346207.908758Bps" latency="0.000000s"/> + <link id="PCI:0000:[d8-d8] down" bandwidth="12189175076.438492Bps" latency="0.000000s"/> + <link id="PCI:0000:[d8-d8] through" bandwidth="25942692415.817516Bps" latency="0.000000s"/> + <link id="PCI:0000:d8:00.0 up" bandwidth="12971346207.908758Bps" latency="0.000000s"/> + <link id="PCI:0000:d8:00.0 down" bandwidth="12189175076.438492Bps" latency="0.000000s"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:3b:00.0 up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:d8:00.0 up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.platform.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.platform.xml new file mode 100644 index 0000000000000000000000000000000000000000..fa8c2a5462337e9de78ec79e4e2c17ffdb1a21d9 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-rtx8000.platform.xml @@ -0,0 +1,157 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'> + <platform version="3"> + <config id="General"> + <prop id="network/TCP_gamma" value="-1"></prop> + <prop id="network/latency_factor" value="1"></prop> + <prop id="network/bandwidth_factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight_S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" power="1"/> + <host id="CPU0" power="2000000000"/> + <host id="CPU1" power="2000000000"/> + <host id="CPU2" power="2000000000"/> + <host id="CPU3" power="2000000000"/> + <host id="CPU4" power="2000000000"/> + <host id="CPU5" power="2000000000"/> + <host id="CPU6" power="2000000000"/> + <host id="CPU7" power="2000000000"/> + <host id="CPU8" power="2000000000"/> + <host id="CPU9" power="2000000000"/> + <host id="CPU10" power="2000000000"/> + <host id="CPU11" power="2000000000"/> + <host id="CPU12" power="2000000000"/> + <host id="CPU13" power="2000000000"/> + <host id="CPU14" power="2000000000"/> + <host id="CPU15" power="2000000000"/> + <host id="CPU16" power="2000000000"/> + <host id="CPU17" power="2000000000"/> + <host id="CPU18" power="2000000000"/> + <host id="CPU19" power="2000000000"/> + <host id="CPU20" power="2000000000"/> + <host id="CPU21" power="2000000000"/> + <host id="CPU22" power="2000000000"/> + <host id="CPU23" power="2000000000"/> + <host id="CPU24" power="2000000000"/> + <host id="CPU25" power="2000000000"/> + <host id="CPU26" power="2000000000"/> + <host id="CPU27" power="2000000000"/> + <host id="CPU28" power="2000000000"/> + <host id="CPU29" power="2000000000"/> + <host id="CPU30" power="2000000000"/> + <host id="CPU31" power="2000000000"/> + <host id="CPU32" power="2000000000"/> + <host id="CPU33" power="2000000000"/> + <host id="CPU34" power="2000000000"/> + <host id="CPU35" power="2000000000"/> + <host id="CPU36" power="2000000000"/> + <host id="CPU37" power="2000000000"/> + <host id="CPU38" power="2000000000"/> + <host id="CPU39" power="2000000000"/> + <host id="CUDA0" power="2000000000"> + <prop id="memsize" value="47769059328"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" power="2000000000"> + <prop id="memsize" value="47769059328"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" power="1"/> + + <link id="Host" bandwidth="12982210409.627262" latency="0.000000"/> + + <link id="RAM-CUDA0" bandwidth="12195916559.100412" latency="0.000007"/> + <link id="CUDA0-RAM" bandwidth="12982210409.627260" latency="0.000007"/> + <link id="RAM-CUDA1" bandwidth="12189175076.438492" latency="0.000008"/> + <link id="CUDA1-RAM" bandwidth="12971346207.908758" latency="0.000008"/> + + <link id="CUDA0-CUDA1" bandwidth="9732451436.147791" latency="0.000008"/> + <link id="CUDA1-CUDA0" bandwidth="10212931896.883699" latency="0.000009"/> + + <link id="PCI:0000:[3a-3b] up" bandwidth="12982210409.627260" latency="0.000000"/> + <link id="PCI:0000:[3a-3b] down" bandwidth="12195916559.100412" latency="0.000000"/> + <link id="PCI:0000:[3a-3b] through" bandwidth="25964420819.254520" latency="0.000000"/> + <link id="PCI:0000:[3b-3b] up" bandwidth="12982210409.627260" latency="0.000000"/> + <link id="PCI:0000:[3b-3b] down" bandwidth="12195916559.100412" latency="0.000000"/> + <link id="PCI:0000:[3b-3b] through" bandwidth="25964420819.254520" latency="0.000000"/> + <link id="PCI:0000:3b:00.0 up" bandwidth="12982210409.627260" latency="0.000000"/> + <link id="PCI:0000:3b:00.0 down" bandwidth="12195916559.100412" latency="0.000000"/> + <link id="PCI:0000:[d7-d8] up" bandwidth="12971346207.908758" latency="0.000000"/> + <link id="PCI:0000:[d7-d8] down" bandwidth="12189175076.438492" latency="0.000000"/> + <link id="PCI:0000:[d7-d8] through" bandwidth="25942692415.817516" latency="0.000000"/> + <link id="PCI:0000:[d8-d8] up" bandwidth="12971346207.908758" latency="0.000000"/> + <link id="PCI:0000:[d8-d8] down" bandwidth="12189175076.438492" latency="0.000000"/> + <link id="PCI:0000:[d8-d8] through" bandwidth="25942692415.817516" latency="0.000000"/> + <link id="PCI:0000:d8:00.0 up" bandwidth="12971346207.908758" latency="0.000000"/> + <link id="PCI:0000:d8:00.0 down" bandwidth="12189175076.438492" latency="0.000000"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:3b:00.0 up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:d8:00.0 up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.affinity b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.affinity new file mode 100644 index 0000000000000000000000000000000000000000..98e7753edbf563110d3b316c3c8bf434205d1505 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.affinity @@ -0,0 +1,3 @@ +# GPU NUMA0 +0 0 +1 0 diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.bandwidth b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.bandwidth new file mode 100644 index 0000000000000000000000000000000000000000..73c7a5cdee47e64fda9db58346f6de39e8bd1363 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.bandwidth @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 1.220505e+04 1.219110e+04 nan nan nan nan nan +1.295764e+04 0.000000e+00 9.713193e+03 nan nan nan nan nan +1.260116e+04 1.019915e+04 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.config b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.config new file mode 100644 index 0000000000000000000000000000000000000000..591f5eef778926571cf3394a6b28226eab8938a0 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.config @@ -0,0 +1,7 @@ +# Current configuration +40 # Number of CPUs +1 # Number of NUMA nodes +2 # Number of CUDA devices +0 # Number of OpenCL devices +0 # Number of MIC devices +0 # Number of MPI devices diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.latency b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.latency new file mode 100644 index 0000000000000000000000000000000000000000..44992c86bdaf1f29aa27315b0290311bbdb70d60 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.latency @@ -0,0 +1,9 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 +0.000000e+00 7.694375e+00 7.997844e+00 nan nan nan nan nan +7.764000e+00 0.000000e+00 9.343719e+00 nan nan nan nan nan +7.886125e+00 9.496813e+00 0.000000e+00 nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.platform.v4.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.platform.v4.xml new file mode 100644 index 0000000000000000000000000000000000000000..023ae250a42b915cba47b4dfb69483da9993d6d3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.platform.v4.xml @@ -0,0 +1,157 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd'> + <platform version="4"> + <config id="General"> + <prop id="network/TCP-gamma" value="-1"></prop> + <prop id="network/latency-factor" value="1"></prop> + <prop id="network/bandwidth-factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight-S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" speed="1f"/> + <host id="CPU0" speed="2000000000f"/> + <host id="CPU1" speed="2000000000f"/> + <host id="CPU2" speed="2000000000f"/> + <host id="CPU3" speed="2000000000f"/> + <host id="CPU4" speed="2000000000f"/> + <host id="CPU5" speed="2000000000f"/> + <host id="CPU6" speed="2000000000f"/> + <host id="CPU7" speed="2000000000f"/> + <host id="CPU8" speed="2000000000f"/> + <host id="CPU9" speed="2000000000f"/> + <host id="CPU10" speed="2000000000f"/> + <host id="CPU11" speed="2000000000f"/> + <host id="CPU12" speed="2000000000f"/> + <host id="CPU13" speed="2000000000f"/> + <host id="CPU14" speed="2000000000f"/> + <host id="CPU15" speed="2000000000f"/> + <host id="CPU16" speed="2000000000f"/> + <host id="CPU17" speed="2000000000f"/> + <host id="CPU18" speed="2000000000f"/> + <host id="CPU19" speed="2000000000f"/> + <host id="CPU20" speed="2000000000f"/> + <host id="CPU21" speed="2000000000f"/> + <host id="CPU22" speed="2000000000f"/> + <host id="CPU23" speed="2000000000f"/> + <host id="CPU24" speed="2000000000f"/> + <host id="CPU25" speed="2000000000f"/> + <host id="CPU26" speed="2000000000f"/> + <host id="CPU27" speed="2000000000f"/> + <host id="CPU28" speed="2000000000f"/> + <host id="CPU29" speed="2000000000f"/> + <host id="CPU30" speed="2000000000f"/> + <host id="CPU31" speed="2000000000f"/> + <host id="CPU32" speed="2000000000f"/> + <host id="CPU33" speed="2000000000f"/> + <host id="CPU34" speed="2000000000f"/> + <host id="CPU35" speed="2000000000f"/> + <host id="CPU36" speed="2000000000f"/> + <host id="CPU37" speed="2000000000f"/> + <host id="CPU38" speed="2000000000f"/> + <host id="CPU39" speed="2000000000f"/> + <host id="CUDA0" speed="2000000000f"> + <prop id="memsize" value="16945709056"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" speed="2000000000f"> + <prop id="memsize" value="16945709056"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" speed="1f"/> + + <link id="Host" bandwidth="12957640483.976341Bps" latency="0.000000s"/> + + <link id="RAM-CUDA0" bandwidth="12205049854.385120Bps" latency="0.000008s"/> + <link id="CUDA0-RAM" bandwidth="12957640483.976341Bps" latency="0.000008s"/> + <link id="RAM-CUDA1" bandwidth="12191098614.766151Bps" latency="0.000008s"/> + <link id="CUDA1-RAM" bandwidth="12601163308.353323Bps" latency="0.000008s"/> + + <link id="CUDA0-CUDA1" bandwidth="9713193372.485958Bps" latency="0.000009s"/> + <link id="CUDA1-CUDA0" bandwidth="10199149651.830450Bps" latency="0.000009s"/> + + <link id="PCI:0000:[3a-3b] up" bandwidth="12957640483.976341Bps" latency="0.000000s"/> + <link id="PCI:0000:[3a-3b] down" bandwidth="12205049854.385120Bps" latency="0.000000s"/> + <link id="PCI:0000:[3a-3b] through" bandwidth="25915280967.952682Bps" latency="0.000000s"/> + <link id="PCI:0000:[3b-3b] up" bandwidth="12957640483.976341Bps" latency="0.000000s"/> + <link id="PCI:0000:[3b-3b] down" bandwidth="12205049854.385120Bps" latency="0.000000s"/> + <link id="PCI:0000:[3b-3b] through" bandwidth="25915280967.952682Bps" latency="0.000000s"/> + <link id="PCI:0000:3b:00.0 up" bandwidth="12957640483.976341Bps" latency="0.000000s"/> + <link id="PCI:0000:3b:00.0 down" bandwidth="12205049854.385120Bps" latency="0.000000s"/> + <link id="PCI:0000:[d7-d8] up" bandwidth="12601163308.353323Bps" latency="0.000000s"/> + <link id="PCI:0000:[d7-d8] down" bandwidth="12191098614.766151Bps" latency="0.000000s"/> + <link id="PCI:0000:[d7-d8] through" bandwidth="25202326616.706646Bps" latency="0.000000s"/> + <link id="PCI:0000:[d8-d8] up" bandwidth="12601163308.353323Bps" latency="0.000000s"/> + <link id="PCI:0000:[d8-d8] down" bandwidth="12191098614.766151Bps" latency="0.000000s"/> + <link id="PCI:0000:[d8-d8] through" bandwidth="25202326616.706646Bps" latency="0.000000s"/> + <link id="PCI:0000:d8:00.0 up" bandwidth="12601163308.353323Bps" latency="0.000000s"/> + <link id="PCI:0000:d8:00.0 down" bandwidth="12191098614.766151Bps" latency="0.000000s"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:3b:00.0 up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:d8:00.0 up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.platform.xml b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.platform.xml new file mode 100644 index 0000000000000000000000000000000000000000..d3b824a8f49078ac9b59d367ba7188ae39961f50 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/bus/sirocco-v100.platform.xml @@ -0,0 +1,157 @@ +<?xml version='1.0'?> +<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'> + <platform version="3"> + <config id="General"> + <prop id="network/TCP_gamma" value="-1"></prop> + <prop id="network/latency_factor" value="1"></prop> + <prop id="network/bandwidth_factor" value="1"></prop> + <prop id="network/crosstraffic" value="0"></prop> + <prop id="network/weight_S" value="0.0"></prop> + </config> + <AS id="AS0" routing="Full"> + <host id="MAIN" power="1"/> + <host id="CPU0" power="2000000000"/> + <host id="CPU1" power="2000000000"/> + <host id="CPU2" power="2000000000"/> + <host id="CPU3" power="2000000000"/> + <host id="CPU4" power="2000000000"/> + <host id="CPU5" power="2000000000"/> + <host id="CPU6" power="2000000000"/> + <host id="CPU7" power="2000000000"/> + <host id="CPU8" power="2000000000"/> + <host id="CPU9" power="2000000000"/> + <host id="CPU10" power="2000000000"/> + <host id="CPU11" power="2000000000"/> + <host id="CPU12" power="2000000000"/> + <host id="CPU13" power="2000000000"/> + <host id="CPU14" power="2000000000"/> + <host id="CPU15" power="2000000000"/> + <host id="CPU16" power="2000000000"/> + <host id="CPU17" power="2000000000"/> + <host id="CPU18" power="2000000000"/> + <host id="CPU19" power="2000000000"/> + <host id="CPU20" power="2000000000"/> + <host id="CPU21" power="2000000000"/> + <host id="CPU22" power="2000000000"/> + <host id="CPU23" power="2000000000"/> + <host id="CPU24" power="2000000000"/> + <host id="CPU25" power="2000000000"/> + <host id="CPU26" power="2000000000"/> + <host id="CPU27" power="2000000000"/> + <host id="CPU28" power="2000000000"/> + <host id="CPU29" power="2000000000"/> + <host id="CPU30" power="2000000000"/> + <host id="CPU31" power="2000000000"/> + <host id="CPU32" power="2000000000"/> + <host id="CPU33" power="2000000000"/> + <host id="CPU34" power="2000000000"/> + <host id="CPU35" power="2000000000"/> + <host id="CPU36" power="2000000000"/> + <host id="CPU37" power="2000000000"/> + <host id="CPU38" power="2000000000"/> + <host id="CPU39" power="2000000000"/> + <host id="CUDA0" power="2000000000"> + <prop id="memsize" value="16945709056"/> + <prop id="memcpy_peer" value="1"/> + </host> + <host id="CUDA1" power="2000000000"> + <prop id="memsize" value="16945709056"/> + <prop id="memcpy_peer" value="1"/> + </host> + + <host id="RAM" power="1"/> + + <link id="Host" bandwidth="12957640483.976341" latency="0.000000"/> + + <link id="RAM-CUDA0" bandwidth="12205049854.385120" latency="0.000008"/> + <link id="CUDA0-RAM" bandwidth="12957640483.976341" latency="0.000008"/> + <link id="RAM-CUDA1" bandwidth="12191098614.766151" latency="0.000008"/> + <link id="CUDA1-RAM" bandwidth="12601163308.353323" latency="0.000008"/> + + <link id="CUDA0-CUDA1" bandwidth="9713193372.485958" latency="0.000009"/> + <link id="CUDA1-CUDA0" bandwidth="10199149651.830450" latency="0.000009"/> + + <link id="PCI:0000:[3a-3b] up" bandwidth="12957640483.976341" latency="0.000000"/> + <link id="PCI:0000:[3a-3b] down" bandwidth="12205049854.385120" latency="0.000000"/> + <link id="PCI:0000:[3a-3b] through" bandwidth="25915280967.952682" latency="0.000000"/> + <link id="PCI:0000:[3b-3b] up" bandwidth="12957640483.976341" latency="0.000000"/> + <link id="PCI:0000:[3b-3b] down" bandwidth="12205049854.385120" latency="0.000000"/> + <link id="PCI:0000:[3b-3b] through" bandwidth="25915280967.952682" latency="0.000000"/> + <link id="PCI:0000:3b:00.0 up" bandwidth="12957640483.976341" latency="0.000000"/> + <link id="PCI:0000:3b:00.0 down" bandwidth="12205049854.385120" latency="0.000000"/> + <link id="PCI:0000:[d7-d8] up" bandwidth="12601163308.353323" latency="0.000000"/> + <link id="PCI:0000:[d7-d8] down" bandwidth="12191098614.766151" latency="0.000000"/> + <link id="PCI:0000:[d7-d8] through" bandwidth="25202326616.706646" latency="0.000000"/> + <link id="PCI:0000:[d8-d8] up" bandwidth="12601163308.353323" latency="0.000000"/> + <link id="PCI:0000:[d8-d8] down" bandwidth="12191098614.766151" latency="0.000000"/> + <link id="PCI:0000:[d8-d8] through" bandwidth="25202326616.706646" latency="0.000000"/> + <link id="PCI:0000:d8:00.0 up" bandwidth="12601163308.353323" latency="0.000000"/> + <link id="PCI:0000:d8:00.0 down" bandwidth="12191098614.766151" latency="0.000000"/> + + <route src="CUDA0" dst="CUDA1" symmetrical="NO"> + <link_ctn id="CUDA0-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + </route> + <route src="CUDA0" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA0-RAM"/> + <link_ctn id="PCI:0000:3b:00.0 up"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] up"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA0" symmetrical="NO"> + <link_ctn id="RAM-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + </route> + <route src="CUDA1" dst="CUDA0" symmetrical="NO"> + <link_ctn id="CUDA1-CUDA0"/> + <link_ctn id="PCI:0000:3b:00.0 down"/> + <link_ctn id="PCI:0000:[3b-3b] through"/> + <link_ctn id="PCI:0000:[3b-3b] down"/> + <link_ctn id="PCI:0000:[3a-3b] through"/> + <link_ctn id="PCI:0000:[3a-3b] down"/> + <link_ctn id="Host"/> + <link_ctn id="Host"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + </route> + <route src="CUDA1" dst="RAM" symmetrical="NO"> + <link_ctn id="CUDA1-RAM"/> + <link_ctn id="PCI:0000:d8:00.0 up"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] up"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] up"/> + <link_ctn id="Host"/> + </route> + <route src="RAM" dst="CUDA1" symmetrical="NO"> + <link_ctn id="RAM-CUDA1"/> + <link_ctn id="PCI:0000:d8:00.0 down"/> + <link_ctn id="PCI:0000:[d8-d8] through"/> + <link_ctn id="PCI:0000:[d8-d8] down"/> + <link_ctn id="PCI:0000:[d7-d8] through"/> + <link_ctn id="PCI:0000:[d7-d8] down"/> + <link_ctn id="Host"/> + </route> + + </AS> + </platform> diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..f9c4a8e058b292bc194b8e0f73f504ec19ca6b64 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 9.280381e+00 1.270381e+00 3.064976e+06 2.897715e+07 330264 +edd26b7f 20505600 0.000000e+00 1.005177e+01 1.202982e+00 1.479117e+05 1.508069e+06 14715 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 8.429460e+00 1.918725e+00 1.056248e+07 9.364906e+07 1253043 +edd26b7f 20505600 0.000000e+00 1.007688e+01 8.341561e-01 9.879370e+04 1.002354e+06 9804 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 2.241122e+05 1.448731e+04 1.676359e+08 3.772625e+13 748 +edd26b7f 20505600 0.000000e+00 4.434364e+03 7.640125e+02 1.910324e+07 8.722537e+10 4308 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..5803273492960175afd8c524aa4969dc13e361fd --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-k40m @@ -0,0 +1,173 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 1.716980e+03 9.933737e+01 1.314091e+08 2.263820e+11 76535 +e95f9525 7388160 0.000000e+00 5.474970e+01 8.925111e+00 1.989056e+05 1.117942e+07 3633 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 1.722084e+03 1.020452e+02 1.302705e+08 2.251244e+11 75647 +e95f9525 7388160 0.000000e+00 5.522154e+01 8.719124e+00 1.481042e+05 8.382433e+06 2682 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 1.709219e+03 9.956903e+01 1.358829e+08 2.330419e+11 79500 +e95f9525 7388160 0.000000e+00 5.499584e+01 8.718339e+00 2.012848e+05 1.134802e+07 3660 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 1.703851e+03 9.945841e+01 1.353335e+08 2.313738e+11 79428 +e95f9525 7388160 0.000000e+00 5.560772e+01 8.805018e+00 2.040803e+05 1.163297e+07 3670 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 1.035625e+05 1.472466e+04 1.056752e+09 1.116522e+14 10204 +e95f9525 7388160 0.000000e+00 6.701334e+02 9.347158e+01 3.015600e+04 2.060171e+07 45 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..07251282bfc7bbbd72cbe758e298651969187de0 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 36902400 0.000000e+00 9.848560e+02 8.656741e+01 2.228798e+08 2.212004e+11 226307 +f67c68c3 12320640 0.000000e+00 3.221096e+01 7.055998e+00 1.338043e+05 4.516780e+06 4154 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 36902400 0.000000e+00 9.597184e+02 8.950726e+01 2.646164e+08 2.561662e+11 275723 +f67c68c3 12320640 0.000000e+00 3.224087e+01 7.011461e+00 1.276416e+05 4.309903e+06 3959 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 36902400 0.000000e+00 2.548175e+05 3.067357e+04 3.750914e+09 9.696481e+14 14720 +f67c68c3 12320640 0.000000e+00 1.613677e+03 2.832545e+02 4.079375e+06 6.785621e+09 2528 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..d4d9f0cb4ff35e5deebbf6fe8ed60c47b45bdad3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.856070e+04 9.527251e+02 1.054619e+08 1.962603e+12 5682 +edd26b7f 20505600 0.000000e+00 5.183813e+01 8.181819e+00 4.318116e+04 2.294193e+06 833 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.858150e+04 9.945675e+02 1.060074e+08 1.975420e+12 5705 +edd26b7f 20505600 0.000000e+00 5.103568e+01 8.346260e+00 4.465622e+04 2.340013e+06 875 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 3.103804e+05 1.778716e+04 1.739682e+09 5.417366e+14 5605 +edd26b7f 20505600 0.000000e+00 1.753085e+03 2.792114e+02 5.259256e+04 9.453801e+07 30 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..1e50ab1145809cdcf404a1e28ea4de22a187b39a --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgemm.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.242418e+03 6.025330e+01 1.264212e+09 1.574374e+12 1017542 +edd26b7f 20505600 0.000000e+00 3.215395e+01 6.762713e+00 3.276487e+05 1.100123e+07 10190 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.240972e+03 6.390324e+01 1.268481e+09 1.578324e+12 1022167 +edd26b7f 20505600 0.000000e+00 3.216878e+01 6.733004e+00 3.424045e+05 1.149726e+07 10644 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.844429e+05 2.476123e+04 4.048262e+10 7.601301e+15 219486 +edd26b7f 20505600 0.000000e+00 5.231596e+03 1.081866e+03 1.195734e+08 6.523110e+11 22856 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..0bd0c1718071cc14bbc12b83a0988b1bdb3fae5b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5d4e0fa2 22937600 0.000000e+00 1.627047e+05 1.685973e+04 1.102975e+09 1.813861e+14 6779 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..3c759fd5dcde43012a9858716d7947c4d422a973 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f32f2daa 8847360 0.000000e+00 5.084598e+04 5.890724e+03 7.123521e+07 3.670639e+12 1401 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..7fe284fe1f18670f681617eeedc8f3fad9dffc45 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6f5a09bc 14205440 0.000000e+00 1.251814e+05 1.024372e+04 3.501324e+08 4.412356e+13 2797 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..a411f420ace48896397c4dfc8dda6904fde4d87b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5d4e0fa2 22937600 0.000000e+00 2.353397e+05 9.134317e+03 1.033141e+08 2.435055e+13 439 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..642e048af91b686e9131de502d539121bd111071 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgeqrt.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5d4e0fa2 22937600 0.000000e+00 1.876236e+05 4.015177e+04 1.618816e+09 3.176379e+14 8628 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..9ce06dbc0ef769ce7da7e58b06d46dfea3fdf860 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 6.076709e+04 7.059988e+02 3.117352e+07 1.894580e+12 513 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..4b6394e04ed9086ce031443d0955db71249b0b02 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 7372800 0.000000e+00 2.411727e+04 3.889475e+03 9.839845e+06 2.434824e+11 408 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..3ea235c04b33a3160fb9717f7b06f567df945528 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 12300800 0.000000e+00 5.829259e+04 6.985522e+03 1.649680e+07 9.754512e+11 283 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..f8465bfb39fd12eb22520e76b514d834a2b2fc40 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 1.061752e+05 1.010479e+04 2.314620e+07 2.479813e+12 218 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..e261b8487804a053f3b16e217d0421b336bd54a8 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dgetrf_nopiv.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 1.939503e+05 2.097364e+04 9.775094e+07 1.918053e+13 504 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..6040e6484060a294714aa9cb408144d93c2ac41f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.648350e+03 3.681614e+02 9.510980e+06 1.645951e+10 5770 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..d5b775f13c3b5185ba16aed83ff89082a8202cd2 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.086559e+03 2.062540e+02 1.370150e+06 1.542393e+09 1261 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..9ce932c08b03f2fd2e9ad31d235aa05f7593310b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 1.864973e+03 2.943942e+02 4.973883e+06 9.507301e+09 2667 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..72b0cd36f92b1a9803bc1d2d27c7967f0f87104b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 2.638292e+03 3.957806e+02 1.155572e+06 3.117347e+09 438 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..a2b5fb53a795aa1d7ea3f0ed889c48417505c8c7 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlacpy.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 3.204854e+03 6.964775e+02 2.553627e+07 8.570514e+10 7968 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..173aa9ea24747bddf288228cf349f4a2b72af5d9 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 1.449687e+03 3.128878e+02 6.332233e+06 9.607378e+09 4368 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..ae30f433905cdf66b2261ca73bfcc7a19da1f6a0 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 7372800 0.000000e+00 5.764709e+02 1.111667e+02 8.116711e+05 4.853049e+08 1408 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..423edf3a4dc9701bbe1c075740ce4ec83edac6c3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 12300800 0.000000e+00 9.650206e+02 1.643934e+02 2.687582e+06 2.668837e+09 2785 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..99824c72a1b6c2bd4af18fc88a5c4e92287fe629 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 1.712802e+03 3.296704e+02 7.193769e+05 1.277797e+09 420 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..c795cebe2ac0ff04a6bd091fbc0d3b621944d68b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlaset.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 1.746441e+03 3.043779e+02 1.398375e+07 2.516362e+10 8007 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..c18948bf6270fa3b61024a588ff9e99fc3172236 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 4.092602e+04 6.631620e+02 2.148616e+07 8.795739e+11 525 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..c33bcea23102025f29b8a67e823d7cf78dd6e988 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 7372800 0.000000e+00 1.443948e+04 2.124698e+03 3.465476e+06 5.112312e+10 240 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..4ba7f5d701958529cf3250f3db8e417eee1dcbf1 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 12300800 0.000000e+00 2.924810e+04 3.412982e+03 9.330143e+06 2.766048e+11 319 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..3d47fe246affff99c7e72875a0a360e89cf42588 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 6.381443e+04 5.959570e+02 6.955773e+06 4.439174e+11 109 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..81ffd91d87760deaec7b172f41d9a21b13a0d815 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dlauum.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 3.036862e+04 3.967657e+03 1.065938e+07 3.292363e+11 351 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..6131a981de0efba983d0f12af6cff523a4077066 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +51c52798 22950400 0.000000e+00 1.028858e+03 2.196775e+01 1.131744e+04 1.164935e+07 11 +db56c93a 43417600 0.000000e+00 5.766941e+02 2.810193e+01 5.473692e+07 3.164141e+10 94915 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +51c52798 22950400 0.000000e+00 1.006632e+03 1.849898e+01 1.107295e+04 1.115016e+07 11 +db56c93a 43417600 0.000000e+00 5.782597e+02 2.851629e+01 6.329399e+07 3.668937e+10 109456 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +51c52798 22950400 0.000000e+00 9.408407e+02 1.485079e+02 3.028566e+06 2.920392e+09 3219 +db56c93a 43417600 0.000000e+00 4.219179e+05 8.115052e+04 1.719316e+09 7.522456e+14 4075 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..9bcf25597d52f1452bbea64337d6a210a568067c --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-k40m @@ -0,0 +1,172 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +931f6b00 8855040 0.000000e+00 1.323987e+03 2.371398e+02 1.323987e+04 1.809177e+07 10 +e8eda962 16220160 0.000000e+00 6.322641e+03 6.924312e+02 2.558141e+07 1.636819e+11 4046 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +931f6b00 8855040 0.000000e+00 1.190762e+03 1.549319e+02 1.190762e+04 1.441917e+07 10 +e8eda962 16220160 0.000000e+00 6.346269e+03 6.025304e+02 2.629894e+07 1.684046e+11 4144 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e8eda962 16220160 0.000000e+00 6.395582e+03 6.143521e+02 1.240743e+07 8.008493e+10 1940 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +931f6b00 8855040 0.000000e+00 1.350633e+03 2.461995e+02 1.350633e+04 1.884823e+07 10 +e8eda962 16220160 0.000000e+00 6.321142e+03 4.667936e+02 2.639709e+07 1.677697e+11 4176 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +931f6b00 8855040 0.000000e+00 6.214398e+02 1.474458e+02 3.610565e+05 2.370060e+08 581 +e8eda962 16220160 0.000000e+00 1.945595e+05 3.761506e+04 1.703952e+09 3.439117e+14 8758 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..aa421b7cccffaee150c48f555f8bf96e05a10ec3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2a4e0c30 14215360 0.000000e+00 6.682375e+02 1.199986e+02 7.350612e+03 5.070351e+06 11 +14f06b24 26506240 0.000000e+00 3.394075e+03 3.738229e+02 9.223060e+07 3.168350e+11 27174 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2a4e0c30 14215360 0.000000e+00 6.168062e+02 5.861131e+01 7.401675e+03 4.606623e+06 12 +14f06b24 26506240 0.000000e+00 3.395175e+03 3.871181e+02 9.063081e+07 3.117079e+11 26694 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2a4e0c30 14215360 0.000000e+00 1.278658e+03 2.544823e+02 1.770941e+06 2.354122e+09 1385 +14f06b24 26506240 0.000000e+00 5.876556e+05 1.173121e+05 3.650516e+09 2.230737e+15 6212 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..fb237a98b936339589b15670d79526caaa2bb03a --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +51c52798 22950400 0.000000e+00 1.585045e+03 2.262410e+02 6.482834e+05 1.048493e+09 409 +db56c93a 43417600 0.000000e+00 3.785867e+05 1.172254e+04 3.763152e+08 1.426045e+14 994 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db56c93a 43417600 0.000000e+00 3.498531e+04 3.530707e+03 2.459467e+07 8.692157e+11 703 +51c52798 22950400 0.000000e+00 9.361605e+02 1.090980e+02 9.361605e+03 8.882989e+06 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db56c93a 43417600 0.000000e+00 3.493409e+04 3.552512e+03 2.515254e+07 8.877678e+11 720 +51c52798 22950400 0.000000e+00 8.972481e+02 6.032899e+01 8.972481e+03 8.086937e+06 10 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..5f5cf754b2bf068fb75d7d5f422676fe83c8d69b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dormqr.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db56c93a 43417600 0.000000e+00 4.833718e+03 4.098602e+02 3.577145e+08 1.741522e+12 74004 +51c52798 22950400 0.000000e+00 7.112361e+02 1.101951e+02 7.112361e+03 5.179997e+06 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db56c93a 43417600 0.000000e+00 4.830510e+03 3.989399e+02 3.781806e+08 1.839265e+12 78290 +51c52798 22950400 0.000000e+00 7.069872e+02 9.762008e+01 7.069872e+03 5.093606e+06 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db56c93a 43417600 0.000000e+00 4.607367e+05 5.787821e+04 4.010805e+10 1.877087e+16 87052 +51c52798 22950400 0.000000e+00 1.806142e+03 3.305484e+02 6.848890e+06 1.278439e+10 3792 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..aa5af4acef3687cce5ad22bd4d399da9023183c6 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 2.169624e+04 4.197097e+03 8.615141e+08 1.939110e+13 39708 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..41a37a73e24ac4d3a827599b1c23a74685019b98 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 7372800 0.000000e+00 1.246947e+04 2.688493e+03 2.102976e+08 2.744199e+12 16865 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..1a84d5233be6f407ba857607f613b535304ce442 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 12300800 0.000000e+00 2.375080e+04 4.674172e+03 4.044762e+08 9.978704e+12 17030 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..6d49307e0752f4048fb30969debfdcbc4972e054 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 2.495938e+04 3.008384e+03 6.037675e+07 1.528859e+12 2419 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..3825d0f9d14823690f1ca3e1626ac66b7ed4118e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplgsy.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 2.118193e+04 2.125442e+03 1.336283e+09 2.859005e+13 63086 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..c4f9551f5919bad4cdd0b8c252c46d1ed0d52bd7 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-a100 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 1.663989e+04 3.483703e+03 1.674871e+09 2.909123e+13 100654 +a1328a64 12800 0.000000e+00 7.895258e+00 3.373943e-01 8.187383e+03 6.475955e+04 1037 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..d0f773c4bbaf03a80d56cd7fe899264c22c501bf --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-k40m @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 7372800 0.000000e+00 1.126151e+04 2.310187e+03 2.029774e+08 2.382024e+12 18024 +76ff7c13 7680 0.000000e+00 5.524024e+00 2.234779e-01 5.601360e+03 3.099269e+04 1014 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..7888fb0c11f8385ffa3705e02b9e5171fd11e8f3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-p100 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 12300800 0.000000e+00 2.152872e+04 3.410300e+03 7.570359e+08 1.670697e+13 35164 +0537b876 9920 0.000000e+00 8.351975e+00 3.091056e-01 5.645935e+03 4.721930e+04 676 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..84102cf9a6265f836a572eda6aa1cebcc391a8db --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-rtx8000 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1328a64 12800 0.000000e+00 1.324610e+01 3.475353e-01 4.185767e+03 5.548325e+04 316 +5056171e 20480000 0.000000e+00 2.093052e+04 5.075223e+02 9.207334e+07 1.928276e+12 4399 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..ddd79ced8811f6e0814d5591eb21e878d7de9cfb --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dplrnt.sirocco-v100 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1328a64 12800 0.000000e+00 1.071651e+01 3.136263e-01 2.314765e+04 2.482744e+05 2160 +5056171e 20480000 0.000000e+00 1.948465e+04 2.010572e+03 3.083251e+09 6.071575e+13 158240 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..ee4c9e43711b05b1f3d1ae8e26c7f8ed0872ab4e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 3.553340e+04 1.099329e+03 3.638621e+07 1.294163e+12 1024 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..a26dfdd60a7eca8b88ad43729875e997cc99fc04 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 7372800 0.000000e+00 1.082371e+04 1.012768e+03 5.097966e+06 5.566199e+10 471 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..106d7731a6850591ad4d9312ac179b264bdeec88 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 12300800 0.000000e+00 2.681517e+04 2.816328e+03 1.721534e+07 4.667245e+11 642 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..2d01db4d89fa96d081e3bb1707bdfa12affa8739 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 4.846914e+04 8.041592e+02 1.061474e+07 5.146290e+11 219 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..ba877d23126486f50a3606f02c93447f0fbe2201 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dpotrf.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 3.210279e+04 6.276922e+03 2.715896e+07 9.052106e+11 846 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..9cea09c6bd2c191e24252b4f06c59ea7add30cb2 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-a100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 6.772559e+00 8.158048e-01 1.455017e+05 9.997170e+05 21484 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 6.249405e+00 9.043301e-01 1.470485e+04 9.382086e+04 2353 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 2.297422e+05 1.542855e+04 2.136603e+07 4.930817e+12 93 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..e3a004395dbbc0bd799dc28d07309c13c18ee171 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-k40m @@ -0,0 +1,168 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 2.320435e+03 9.220395e+01 3.517779e+06 8.175666e+09 1516 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 2.347548e+03 1.070450e+02 3.126934e+06 7.355890e+09 1332 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 2.340970e+03 1.046722e+02 3.598071e+06 8.439815e+09 1537 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 2.361884e+03 1.109710e+02 2.813003e+06 6.658653e+09 1191 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 22118400 0.000000e+00 1.028510e+05 1.540391e+04 5.636236e+07 5.926957e+12 548 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..88a0f8a62848ec97191f92b824e6660bcf21e9d6 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-p100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 36902400 0.000000e+00 1.580542e+03 1.604825e+02 9.056504e+06 1.446176e+10 5730 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 36902400 0.000000e+00 1.618657e+03 1.597639e+02 6.269057e+06 1.024631e+10 3873 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 36902400 0.000000e+00 2.528239e+05 2.897815e+04 2.735555e+08 7.006995e+13 1082 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..aea199c749baa70a660f0155a2e217c8ef10c852 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-rtx8000 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 2.186046e+04 2.555772e+03 5.333951e+06 1.181964e+11 244 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 2.206497e+04 2.765855e+03 4.677774e+06 1.048367e+11 212 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 3.107925e+05 1.295836e+04 1.992180e+08 6.202309e+13 641 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..a911b4e0eb7fbe005207dac6bb8c5fe440531cea --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsymm.sirocco-v100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.907443e+03 1.181864e+02 1.371833e+07 2.626740e+10 7192 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.925253e+03 1.042867e+02 1.340746e+07 2.588849e+10 6964 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 61440000 0.000000e+00 1.831489e+05 2.468048e+04 6.155635e+08 1.147871e+14 3361 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..37384a436652188c41f3dc4942cec9019dc12bef --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-a100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 7.532684e+00 1.219775e+00 2.666118e+05 2.060964e+06 35394 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.257396e+05 1.057821e+04 2.389052e+06 3.025244e+11 19 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.220351e+01 2.713747e+00 1.269165e+03 1.625417e+04 104 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..f8b8c9d5e0c2dd3507e26c04b6240cd3e69c0067 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-k40m @@ -0,0 +1,168 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.235603e+03 5.472129e+01 1.909007e+06 2.363402e+09 1545 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.243038e+03 5.062788e+01 2.277246e+06 2.835399e+09 1832 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.239656e+03 5.844780e+01 2.147085e+06 2.667564e+09 1732 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.241588e+03 5.874542e+01 1.953018e+06 2.430271e+09 1573 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 5.283606e+04 1.083636e+04 8.971563e+07 4.939611e+12 1698 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..3704e3b591d924e445a782abe496aab9bb70c405 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-p100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 5.847672e+02 7.903459e+01 3.303935e+06 1.967325e+09 5650 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 5.846892e+02 8.025200e+01 3.123995e+06 1.860977e+09 5343 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 1.093641e+05 2.352867e+04 4.335192e+08 4.960588e+13 3964 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..eec59b729867cda12c2b742e7dd7e3dc0d739e10 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-rtx8000 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.132424e+04 1.205650e+03 5.073258e+06 5.810198e+10 448 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.364968e+05 8.483923e+02 8.026014e+07 1.095568e+13 588 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.137034e+04 1.118593e+03 5.071170e+06 5.821896e+10 446 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..9a7302e38c723160b798fac71b38b53d0d9aada8 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dsyrk.sirocco-v100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.094470e+03 1.613789e+02 4.369124e+06 4.885840e+09 3992 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 8.926816e+04 1.076655e+04 1.588973e+09 1.439081e+14 17800 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.090884e+03 1.587604e+02 4.186812e+06 4.664061e+09 3838 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..c2264ca843cc603d26238d916fa753215fdae8b6 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +30d9b4cb 22963200 0.000000e+00 1.892479e+03 5.066208e+01 3.027967e+04 5.734472e+07 16 +3e9a8c96 63897600 0.000000e+00 1.235605e+03 7.905414e+01 3.650607e+08 4.529173e+11 295451 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +30d9b4cb 22963200 0.000000e+00 1.879738e+03 3.469355e+01 2.067712e+04 3.888080e+07 11 +3e9a8c96 63897600 0.000000e+00 1.244149e+03 8.029512e+01 3.608132e+08 4.507752e+11 290008 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +30d9b4cb 22963200 0.000000e+00 1.471617e+03 2.312922e+02 1.347265e+07 2.031634e+10 9155 +3e9a8c96 63897600 0.000000e+00 8.707704e+05 1.409713e+05 3.491789e+08 3.120237e+14 401 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..8a621ea71e37251283ea8b4720251ad9f8afe3c7 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-k40m @@ -0,0 +1,172 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +c2d58885 8862720 0.000000e+00 1.668058e+03 2.416868e+02 1.668058e+04 2.840831e+07 10 +0a3ffb47 23592960 0.000000e+00 1.200691e+04 1.213711e+03 3.192518e+08 3.872396e+12 26589 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0a3ffb47 23592960 0.000000e+00 1.207428e+04 1.195722e+03 1.348939e+08 1.644720e+12 11172 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +c2d58885 8862720 0.000000e+00 1.913130e+03 2.107777e+02 1.913130e+04 3.704492e+07 10 +0a3ffb47 23592960 0.000000e+00 1.215727e+04 1.073472e+03 3.358566e+08 4.114933e+12 27626 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +c2d58885 8862720 0.000000e+00 1.729050e+03 2.840316e+02 1.729050e+04 3.070289e+07 10 +0a3ffb47 23592960 0.000000e+00 1.230783e+04 1.264468e+03 3.228589e+08 4.015633e+12 26232 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +c2d58885 8862720 0.000000e+00 1.011244e+03 1.928603e+02 8.494453e+04 8.902406e+07 84 +0a3ffb47 23592960 0.000000e+00 3.952662e+05 8.389145e+04 4.790626e+08 1.978871e+14 1212 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..ee38fc5967352c973a1ef70e2797e3acff237337 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9f59e095 14225280 0.000000e+00 8.108513e+02 9.380892e+01 9.730216e+03 7.995360e+06 12 +df989b2e 38807040 0.000000e+00 5.998572e+03 6.554418e+02 6.669272e+08 4.048374e+12 111181 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9f59e095 14225280 0.000000e+00 8.590476e+02 1.403640e+02 9.449524e+03 8.334314e+06 11 +df989b2e 38807040 0.000000e+00 5.971102e+03 5.515659e+02 6.496619e+08 3.912297e+12 108801 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9f59e095 14225280 0.000000e+00 2.038728e+03 3.228532e+02 7.991812e+06 1.670173e+10 3920 +df989b2e 38807040 0.000000e+00 1.209392e+06 2.368456e+05 2.418783e+08 3.037447e+14 200 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..c257ddd96548fe33377354725edd62b5949f7477 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +3e9a8c96 63897600 0.000000e+00 6.641857e+04 2.445647e+03 1.461209e+08 9.718297e+12 2200 +30d9b4cb 22963200 0.000000e+00 1.571078e+03 1.048238e+02 1.571078e+04 2.479274e+07 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +3e9a8c96 63897600 0.000000e+00 6.640252e+04 2.376891e+03 1.452887e+08 9.659898e+12 2188 +30d9b4cb 22963200 0.000000e+00 1.607716e+03 4.989840e+01 1.768488e+04 2.845966e+07 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +30d9b4cb 22963200 0.000000e+00 2.282395e+03 3.476556e+02 1.693537e+06 3.955003e+09 742 +3e9a8c96 63897600 0.000000e+00 6.715577e+05 1.494539e+04 3.069019e+08 2.062044e+14 457 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..2d35a5298f45ea2a878a1bbb55341040f46268df --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpmqrt.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +3e9a8c96 63897600 0.000000e+00 8.009596e+03 7.136232e+02 3.539240e+09 2.857291e+13 441875 +30d9b4cb 22963200 0.000000e+00 1.184399e+03 2.031306e+02 1.658159e+04 2.021689e+07 14 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +3e9a8c96 63897600 0.000000e+00 8.001326e+03 7.366804e+02 3.474984e+09 2.804017e+13 434301 +30d9b4cb 22963200 0.000000e+00 1.230111e+03 1.965751e+02 1.722156e+04 2.172542e+07 14 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +3e9a8c96 63897600 0.000000e+00 1.052554e+06 1.263154e+05 5.300661e+09 5.659583e+15 5036 +30d9b4cb 22963200 0.000000e+00 2.508265e+03 4.738914e+02 2.489955e+07 6.468402e+10 9927 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..23835024f3ca1018da8d88bb5fd418bc9962aaa6 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9919f4f0 43417600 0.000000e+00 2.401157e+05 2.821546e+04 4.523060e+09 1.101054e+15 18837 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..11d342a6f3cbd320be73d961c8fd8e4556b4ba37 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4e2afbda 16220160 0.000000e+00 8.035258e+04 1.181490e+04 2.791448e+08 2.291495e+13 3474 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..cc45d34b0aaf8afb6d5685515311478eb1c93194 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +13bbad10 26506240 0.000000e+00 1.937585e+05 2.241640e+04 1.502791e+09 2.950759e+14 7756 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..b065376d9c59dd57831c1ed00a6bab8cfc956dd8 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9919f4f0 43417600 0.000000e+00 3.425062e+05 1.632552e+04 2.633873e+08 9.041673e+13 769 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..8280083d5b03e6993d66d88a7032555044a75a7f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtpqrt.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9919f4f0 43417600 0.000000e+00 2.345818e+05 4.110699e+04 4.669820e+09 1.129093e+15 19907 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..8cd59462ab7cbe2b92c27a0fe9676959e6d1b86a --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-a100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 8.612200e+00 1.323912e+00 8.612200e+01 7.592273e+02 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.009220e+05 4.636851e+02 1.412908e+06 1.425966e+11 14 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 7.265169e+00 5.238989e-01 8.845343e+04 6.459708e+05 12175 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..1b5e59566c50c88426c4b70a3bbef8cd8c164709 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-k40m @@ -0,0 +1,168 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 3.413428e+04 6.356438e+03 3.959576e+07 1.398442e+12 1160 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.951458e+03 6.288689e+01 7.552142e+05 1.475299e+09 387 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.955869e+03 5.304498e+01 8.058180e+05 1.577234e+09 412 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.974077e+03 1.012851e+02 8.330606e+05 1.648855e+09 422 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 1.977852e+03 9.827575e+01 7.772957e+05 1.541171e+09 393 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..7ee1bb5e425bef25702395521c3b202bffe95232 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-p100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 8.333315e+04 1.479312e+04 1.211664e+08 1.041536e+13 1454 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 1.090924e+03 2.041982e+01 1.970208e+06 2.150100e+09 1806 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 1.089632e+03 1.894202e+01 2.048507e+06 2.232793e+09 1880 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..4af98675768abca9743d418dce0c6985767b9418 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-rtx8000 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.365869e+05 5.330302e+02 1.734654e+07 2.369346e+12 127 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.072977e+04 2.022184e+02 1.920629e+06 2.061523e+10 179 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.073766e+04 2.072112e+02 1.997205e+06 2.145330e+10 186 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..ad98cc47d5a27dab8228bed4b9f0661b6e3e46ae --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrmm.sirocco-v100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 9.224019e+04 1.110135e+04 4.246738e+08 3.973939e+13 4604 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.431355e+03 2.785546e+01 2.834083e+06 4.058114e+09 1980 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.433429e+03 3.426837e+01 2.812387e+06 4.033661e+09 1962 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..a4b284d512db1d381d547fcae49dca0c19ffe28e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.543425e+01 1.403407e+00 5.526389e+05 8.600090e+06 35806 +46d0e6c4 20492800 0.000000e+00 9.020636e+00 1.083011e+00 1.930416e+03 1.766458e+04 214 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 6.818519e+00 9.341472e-01 9.069312e+04 6.299996e+05 13301 +46d0e6c4 20492800 0.000000e+00 8.441222e+00 7.567110e-01 5.444588e+03 4.632831e+04 645 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.064229e+05 2.926076e+03 4.576184e+06 4.873789e+11 43 +46d0e6c4 20492800 0.000000e+00 2.776986e+03 4.723803e+02 6.109369e+04 1.745654e+08 22 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..7ac7677b504f78f610488292e2576f6c9d3fef67 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-k40m @@ -0,0 +1,173 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 3.188271e+03 4.345341e+02 9.835816e+06 3.194175e+10 3085 +47fadc35 7380480 0.000000e+00 1.976731e+02 1.900438e+01 3.676720e+04 7.335064e+06 186 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 3.040194e+03 1.989142e+02 1.053731e+07 3.217262e+10 3466 +47fadc35 7380480 0.000000e+00 1.977043e+02 1.759233e+01 3.301662e+04 6.579211e+06 167 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 3.219425e+03 4.162404e+02 1.025387e+07 3.356338e+10 3185 +47fadc35 7380480 0.000000e+00 1.973297e+02 1.522773e+01 4.439918e+04 8.813448e+06 225 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 3.046143e+03 1.963419e+02 1.069196e+07 3.270456e+10 3510 +47fadc35 7380480 0.000000e+00 1.961209e+02 1.958339e+01 3.471339e+04 6.875901e+06 177 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 14745600 0.000000e+00 5.224320e+04 1.082467e+04 3.797036e+08 2.068855e+13 7268 +47fadc35 7380480 0.000000e+00 3.080339e+02 5.804194e+01 1.777356e+05 5.669243e+07 577 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..ca0a7a26ebca4ec545fdb1fdd3be57efe4acba75 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 1.795366e+03 1.839093e+02 1.571843e+07 2.851645e+10 8755 +77bc0d5f 12310720 0.000000e+00 1.186741e+02 1.096616e+01 2.064929e+04 2.471461e+06 174 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 1.841602e+03 1.901150e+02 1.367206e+07 2.544682e+10 7424 +77bc0d5f 12310720 0.000000e+00 1.174845e+02 1.366600e+01 1.633035e+04 1.944523e+06 139 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 24601600 0.000000e+00 1.249452e+05 2.525986e+04 3.694630e+08 4.804938e+13 2957 +77bc0d5f 12310720 0.000000e+00 5.649668e+02 1.150511e+02 3.107318e+05 1.828334e+08 550 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..fbcbd3a02b1ae70ed8b822b0471c9b135a9f1e57 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.548522e+04 1.940492e+03 1.320889e+07 2.077546e+11 853 +46d0e6c4 20492800 0.000000e+00 2.156217e+02 2.213893e+01 2.997142e+04 6.530617e+06 139 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.557272e+04 1.989153e+03 1.241146e+07 1.964337e+11 797 +46d0e6c4 20492800 0.000000e+00 2.215708e+02 2.530523e+01 2.570221e+04 5.769142e+06 116 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.376622e+05 1.080634e+04 3.175867e+08 4.398908e+13 2307 +46d0e6c4 20492800 0.000000e+00 8.256864e+02 1.675035e+02 4.813752e+05 4.138224e+08 583 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..651506daeb87a57b5ec753fbce7a0bd2ff34cda3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrsm.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.967933e+03 2.315649e+02 2.103720e+07 4.197302e+10 10690 +46d0e6c4 20492800 0.000000e+00 1.358853e+02 2.182176e+01 2.867179e+04 3.996551e+06 211 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 1.974790e+03 2.401514e+02 2.096832e+07 4.202041e+10 10618 +46d0e6c4 20492800 0.000000e+00 1.335816e+02 1.624314e+01 3.326181e+04 4.508862e+06 249 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 40960000 0.000000e+00 8.562359e+04 1.018059e+04 3.961204e+09 3.439674e+14 46263 +46d0e6c4 20492800 0.000000e+00 2.243644e+03 5.423497e+02 1.821839e+06 4.326403e+09 812 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..a44143ecba921a69556733c3ed0d5433effc216e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 3.727893e+04 1.877647e+03 1.576899e+07 5.893423e+11 423 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..8671814eb5913584d10147cb785f91daa03080d2 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 7372800 0.000000e+00 1.547712e+04 2.707449e+03 2.693019e+06 4.295565e+10 174 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..cc865c910726ae0bc89e4f7eee4821b9644fde52 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 12300800 0.000000e+00 3.376110e+04 5.002382e+03 7.630009e+06 2.632529e+11 226 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..978a6d4ef5798eb049853a393f8902d16303d1b6 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 5.550471e+04 1.064425e+03 6.050013e+06 3.359277e+11 109 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..05ce82ceb9a8cc04b33d6516708f6f8aa4ad74eb --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/dtrtri.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 20480000 0.000000e+00 3.218226e+04 4.270007e+03 8.914487e+06 2.919389e+11 277 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..6045f2285d20e8935c6471e3f5cb4d0eb860a32c --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 7.010326e+00 1.314763e+00 1.411205e+07 1.024098e+08 2013038 +edd26b7f 10252800 0.000000e+00 1.308512e+01 2.197058e+00 2.786607e+05 3.749105e+06 21296 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 6.854397e+00 1.233809e+00 1.442360e+07 1.020684e+08 2104284 +edd26b7f 10252800 0.000000e+00 1.382306e+01 2.170993e+00 1.955963e+05 2.770431e+06 14150 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 1.130044e+05 7.643708e+03 1.473578e+08 1.672826e+13 1304 +edd26b7f 10252800 0.000000e+00 3.420990e+03 2.772390e+02 1.402606e+05 4.829814e+08 41 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..3dcb86afb6d258aaa96a88544b9ad48ecd845d9f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-k40m @@ -0,0 +1,173 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 8.730639e+02 1.409022e+02 7.005465e+07 6.275523e+10 80240 +e95f9525 3694080 0.000000e+00 4.267057e+01 7.946726e+00 1.531874e+05 6.763302e+06 3590 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 8.795718e+02 1.383741e+02 6.671376e+07 6.013183e+10 75848 +e95f9525 3694080 0.000000e+00 4.317621e+01 7.989331e+00 1.465401e+05 6.543681e+06 3394 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 8.767296e+02 1.389004e+02 6.722675e+07 6.041907e+10 76679 +e95f9525 3694080 0.000000e+00 4.313920e+01 7.207348e+00 1.181583e+05 5.239534e+06 2739 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 8.743590e+02 1.399988e+02 7.013933e+07 6.289919e+10 80218 +e95f9525 3694080 0.000000e+00 4.286880e+01 7.971212e+00 1.581859e+05 7.015702e+06 3690 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 4.804468e+04 9.089634e+03 5.768244e+08 2.870529e+13 12006 +e95f9525 3694080 0.000000e+00 5.316907e+02 1.149995e+02 2.286270e+04 1.272455e+07 43 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..0754e630052799885a0b8994ffca74e0c943386a --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f67c68c3 6160320 0.000000e+00 2.796161e+01 7.103185e+00 1.610589e+04 4.794087e+05 576 +25d86902 18451200 0.000000e+00 5.025851e+02 4.190724e+01 8.545958e+07 4.324934e+10 170040 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f67c68c3 6160320 0.000000e+00 2.896500e+01 7.427577e+00 4.341853e+04 1.340316e+06 1499 +25d86902 18451200 0.000000e+00 4.923234e+02 4.866189e+01 1.102406e+08 5.480425e+10 223919 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f67c68c3 6160320 0.000000e+00 5.378909e+03 9.544152e+02 1.452305e+05 8.057763e+08 27 +25d86902 18451200 0.000000e+00 1.234968e+05 1.713568e+04 2.076845e+09 2.614217e+14 16817 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..6b75d4c6c60b26aa301c8883cb0ad618a3020a88 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +edd26b7f 10252800 0.000000e+00 4.842141e+01 1.127718e+01 2.566335e+04 1.310058e+06 530 +962518a7 30720000 0.000000e+00 7.302800e+02 8.945566e+01 7.191067e+06 5.330291e+09 9847 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +edd26b7f 10252800 0.000000e+00 4.828540e+01 1.104268e+01 3.104751e+04 1.577550e+06 643 +962518a7 30720000 0.000000e+00 7.307942e+02 9.008620e+01 7.724495e+06 5.730797e+09 10570 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +edd26b7f 10252800 0.000000e+00 7.976926e+02 7.712095e+01 3.206724e+05 2.581890e+08 402 +962518a7 30720000 0.000000e+00 1.676638e+05 3.078492e+04 1.810769e+07 3.138357e+12 108 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..98494754815f9f569fb2b4a6d3c2b28cf81d9fcf --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgemm.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 6.703259e+02 4.027193e+01 6.716002e+08 4.518159e+11 1001901 +edd26b7f 10252800 0.000000e+00 1.585181e+01 2.683803e+00 1.772073e+05 2.889577e+06 11179 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 6.647689e+02 4.147541e+01 6.644159e+08 4.434024e+11 999469 +edd26b7f 10252800 0.000000e+00 1.687616e+01 3.109630e+00 2.070029e+05 3.612023e+06 12266 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 7.929866e+04 4.170270e+03 1.914571e+10 1.522428e+15 241438 +edd26b7f 10252800 0.000000e+00 2.676826e+03 5.573453e+02 2.677362e+07 7.477528e+10 10002 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..df1b0943056b80076adca0a15b9f7798a9959ef3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ceeb8578 11468800 0.000000e+00 7.847925e+04 1.011361e+04 5.302843e+08 4.230746e+13 6757 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..ac3afdd94ef241b109b483743789bac45893fcc1 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e4520ea1 4423680 0.000000e+00 2.770842e+04 3.341256e+03 3.915200e+07 1.100615e+12 1413 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..c4eab247e1bcf48ab78d75d7825980ebc73e8ac3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9e519c1a 7102720 0.000000e+00 6.463017e+04 5.674431e+03 1.782500e+08 1.160914e+13 2758 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..bf02f96b49a797552ae223aa5bdf475055e20e08 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ceeb8578 11468800 0.000000e+00 1.228270e+05 5.259429e+03 7.787235e+07 9.582368e+12 634 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..362a9074194e5eb3e8c6841fd171997a9356be5b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgeqrt.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ceeb8578 11468800 0.000000e+00 1.086836e+05 2.765066e+04 8.976175e+08 1.038708e+14 8259 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..afe2c6a23ef661a260ee72c6c3405e596bb4d3a6 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 3.097313e+04 1.302496e+03 1.567240e+07 4.862818e+11 506 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..16eb19aebbcae6cfef9497ce9bcaaf8ff5811d4b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 3686400 0.000000e+00 1.180539e+04 1.733580e+03 4.840210e+06 5.837275e+10 410 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..690f3ad70487266186cf6e863c37d61aa34b24c9 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 6150400 0.000000e+00 2.851524e+04 2.868119e+03 8.155358e+06 2.349047e+11 286 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..8cf0dacfa8e2aedb6387d4fd283943a940e930f9 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 5.062915e+04 4.898540e+02 1.103715e+07 5.588541e+11 218 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..dbc50d2fc68965bf00c8276708bf6842fc2e4cbf --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sgetrf_nopiv.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 8.378867e+04 1.022018e+04 4.717302e+07 4.011371e+12 563 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..7217d3d06bce31931fca309d1455d7efc4bbb95d --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.024541e+03 1.940754e+02 3.296974e+06 3.499092e+09 3218 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..d250a7b526f0d4d1ee9bfd15310145b5775ccd75 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 6.157875e+02 1.139171e+02 6.933768e+05 4.415850e+08 1126 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..0b0289564c2baf659e65f1497e5795230cb5e514 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 12300800 0.000000e+00 9.880737e+02 1.907983e+02 2.327902e+06 2.385906e+09 2356 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..f44ae059e4df6e2948a2c9cadf9ae3e78a44320f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.298321e+03 2.275760e+02 7.958705e+05 1.065043e+09 613 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..8ad0b741c7244edcce39ded8a2238e20aca44ec3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slacpy.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.721722e+03 3.564447e+02 1.333301e+07 2.393963e+10 7744 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..63b8dcc73b05f81b87cf48b3324c65d3a08dd61e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 8.932391e+02 1.573113e+02 3.050411e+06 2.809257e+09 3415 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..6757dfcf5ec58e5b39435b7d91a9de2e86ece1db --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 3686400 0.000000e+00 3.150291e+02 5.449490e+01 4.004019e+05 1.299127e+08 1271 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..28858ec278c5ed6a91aad9a8c5a05f36fb0079e2 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 6150400 0.000000e+00 4.905721e+02 9.096803e+01 1.234279e+06 6.263234e+08 2516 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..1d61b458b1e916ccfa2b9f5be1db242d63923e98 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.023383e+03 9.595977e+01 3.796752e+05 3.919695e+08 371 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..283b3713f5b5a7a7827afc20a696762a1cad1fd7 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slaset.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.092825e+03 2.011901e+02 7.159097e+06 8.088808e+09 6551 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..e945881d7fae2bb4a8bda9512cb06bbf2569d772 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.913992e+04 7.216970e+02 1.709195e+07 3.276036e+11 893 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..9cb835f9db6666173a9542c5a5d3c72fa9036a2a --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 3686400 0.000000e+00 6.832855e+03 1.014325e+03 1.708214e+06 1.192919e+10 250 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..36fd415c1894c1a9a2d0cd2c90a2017b365e76d5 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 6150400 0.000000e+00 1.476199e+04 1.627703e+03 4.960028e+06 7.411008e+10 336 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..06955b8ca68b9f2a19acad1e2d2ecabbbfbd1b36 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 2.841681e+04 4.184480e+02 3.097432e+06 8.803820e+10 109 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..4c3b3b54c24dd4daba547b9a9c49a27b7bf1ad0f --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/slauum.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.767042e+04 2.135528e+03 7.704302e+06 1.381266e+11 436 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..23ace87766e2b43d93b9d82eeb39cf58252ce78d --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +c260ad42 11475200 0.000000e+00 1.057969e+03 1.624345e+01 1.057969e+04 1.119562e+07 10 +48f343e0 21708800 0.000000e+00 1.834582e+02 6.610245e+00 1.006470e+07 1.848849e+09 54861 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +c260ad42 11475200 0.000000e+00 1.052459e+03 3.126884e+00 1.157705e+04 1.218448e+07 11 +48f343e0 21708800 0.000000e+00 1.838439e+02 7.078813e+00 8.735526e+06 1.608354e+09 47516 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +c260ad42 11475200 0.000000e+00 3.716675e+02 5.514713e+01 1.034351e+06 3.928981e+08 2783 +48f343e0 21708800 0.000000e+00 1.784569e+05 1.784276e+04 2.870301e+09 5.173455e+14 16084 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..4db1bca0a349dbd87644729ee9700fbec838a9e1 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-k40m @@ -0,0 +1,172 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8462480b 4427520 0.000000e+00 9.488468e+02 6.789487e+01 9.488468e+03 9.049200e+06 10 +ff908a69 8110080 0.000000e+00 4.696108e+03 5.963449e+02 8.720672e+06 4.161361e+10 1857 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8462480b 4427520 0.000000e+00 8.816629e+02 1.706745e+02 9.698292e+03 8.871052e+06 11 +ff908a69 8110080 0.000000e+00 4.605113e+03 4.394329e+02 9.081282e+06 4.220112e+10 1972 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff908a69 8110080 0.000000e+00 4.741653e+03 6.838719e+02 4.253263e+06 2.058701e+10 897 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8462480b 4427520 0.000000e+00 8.316703e+02 1.537040e+02 8.316703e+03 7.153004e+06 10 +ff908a69 8110080 0.000000e+00 4.676049e+03 6.908376e+02 8.599253e+06 4.108820e+10 1839 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8462480b 4427520 0.000000e+00 2.485350e+02 1.518400e+01 4.970700e+02 1.240004e+05 2 +ff908a69 8110080 0.000000e+00 6.439117e+04 1.466673e+04 8.923972e+08 6.044374e+13 13859 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..f402392ae540e3f7ba1e29992bb7681b595bdd23 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db459996 7107680 0.000000e+00 5.552932e+02 6.844941e+01 5.552932e+03 3.130359e+06 10 +e5fbfe82 13253120 0.000000e+00 1.663608e+03 1.675558e+02 4.112939e+07 6.911729e+10 24723 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db459996 7107680 0.000000e+00 5.249922e+02 2.917290e+01 5.249922e+03 2.764679e+06 10 +e5fbfe82 13253120 0.000000e+00 1.670848e+03 1.632964e+02 4.335683e+07 7.313462e+10 25949 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +db459996 7107680 0.000000e+00 4.537722e+02 7.768565e+01 5.967104e+05 2.787067e+08 1315 +e5fbfe82 13253120 0.000000e+00 2.468844e+05 4.861313e+04 2.390335e+09 6.130173e+14 9682 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..96cbe015970d0e368ff7d334793ba450c6a1c194 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +48f343e0 21708800 0.000000e+00 2.940234e+03 3.189091e+02 5.301243e+06 1.577027e+10 1803 +c260ad42 11475200 0.000000e+00 7.497480e+02 1.252966e+02 7.497480e+03 5.778213e+06 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +48f343e0 21708800 0.000000e+00 2.963709e+03 3.139775e+02 4.984958e+06 1.493978e+10 1682 +c260ad42 11475200 0.000000e+00 6.611876e+02 1.291555e+02 6.611876e+03 4.538502e+06 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +48f343e0 21708800 0.000000e+00 1.960557e+05 1.202180e+03 2.548724e+06 4.997107e+11 13 +c260ad42 11475200 0.000000e+00 7.402658e+02 1.116272e+02 3.027687e+05 2.292257e+08 409 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..e1aa8ef46f0b11e1cd6b99d07c1d17020edbf1f9 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/sormqr.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +48f343e0 21708800 0.000000e+00 1.898418e+03 1.349363e+02 1.228580e+08 2.344142e+11 64716 +c260ad42 11475200 0.000000e+00 9.816207e+02 1.104197e+02 1.079783e+04 1.073349e+07 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +48f343e0 21708800 0.000000e+00 1.901676e+03 1.446794e+02 1.128455e+08 2.158377e+11 59340 +c260ad42 11475200 0.000000e+00 4.268515e+03 4.659905e+02 4.268515e+04 1.843736e+08 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +48f343e0 21708800 0.000000e+00 2.205795e+05 2.451683e+04 2.526959e+10 5.642812e+15 114560 +c260ad42 11475200 0.000000e+00 1.485314e+03 3.167888e+02 5.632311e+06 8.746298e+09 3792 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..70533b6432730014242170ed93c06f72680eab94 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.521444e+04 1.588236e+03 1.232400e+09 1.895460e+13 81002 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..15fc62cad6c801032b77ded9228e51a7fa947c48 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 3686400 0.000000e+00 7.174138e+03 1.550642e+03 1.591224e+08 1.194897e+12 22180 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..22ad076dd6ccba742237fcf344dec819c3b6d93e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 6150400 0.000000e+00 1.721313e+04 4.129000e+03 3.268602e+08 5.950026e+12 18989 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..262f0ad50956dfc167c2b48f3356c43ba93c18cb --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 2.112366e+04 8.036940e+02 7.665777e+07 1.621637e+12 3629 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..eb4653396d1a959fde14e59fc7dc90852959b458 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splgsy.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.737926e+04 5.962899e+02 1.096996e+09 1.908743e+13 63121 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..53ec2fefa0787d94cdd4353df20d4a1cc585ebb0 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-a100 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.358644e+04 7.140016e+02 1.376443e+09 1.875261e+13 101310 +a1328a64 6400 0.000000e+00 7.938468e+00 2.963411e-01 8.240130e+03 6.550517e+04 1038 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..c1c0c688d41e7052cb2c9b6c2cdf9658fbe3bccd --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-k40m @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 3686400 0.000000e+00 5.872392e+03 8.965279e+02 1.306372e+08 7.850335e+11 22246 +76ff7c13 3840 0.000000e+00 5.681637e+00 2.133286e-01 5.761180e+03 3.277908e+04 1014 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..a19317bebe1e0b43982edfa54f20cf51cbe0c22a --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-p100 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0537b876 4960 0.000000e+00 8.602908e+00 3.249297e-01 2.890577e+03 2.490284e+04 336 +e82ddea3 6150400 0.000000e+00 1.124820e+04 1.267652e+03 4.168695e+08 4.748585e+12 37061 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..db83100d23d308ce906ce2a83b54d65aee125079 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-rtx8000 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1328a64 6400 0.000000e+00 1.151690e+01 4.269088e-01 4.503106e+03 5.193306e+04 391 +5056171e 10240000 0.000000e+00 1.765861e+04 2.026620e+02 1.165292e+08 2.058014e+12 6599 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..4694c787d0ecf3a4085018d12c6edc1a153701fe --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/splrnt.sirocco-v100 @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1328a64 6400 0.000000e+00 9.342749e+00 2.312903e-01 2.018034e+04 1.886554e+05 2160 +5056171e 10240000 0.000000e+00 1.426911e+04 2.866621e+02 2.258044e+09 3.223330e+13 158247 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..d3f595d130f1cd2b0930b1ffa4d0dee51be64812 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.972465e+04 2.153181e+03 2.712140e+07 5.413349e+11 1375 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..1d840e85875aa6349d307886cc0b32b85314b6e3 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 3686400 0.000000e+00 6.577738e+03 8.165028e+02 3.387535e+06 2.262566e+10 515 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..2e6aebe0df5c18d9c780673f105d1a81d1c54e4d --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 6150400 0.000000e+00 1.595710e+04 1.649823e+03 1.073913e+07 1.731973e+11 673 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..7f392f91cf06c244c7ded99319f63d97a6a10f27 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 2.588666e+04 2.059756e+02 5.643292e+06 1.460952e+11 218 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..50b95221da510654da98abe55d4b7e39de172972 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/spotrf.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.732443e+04 2.527183e+03 1.467379e+07 2.596244e+11 847 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..3541a3c72e02890e5480ce899770273bca337e48 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-a100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 2.346352e+01 2.280066e+00 3.189396e+05 7.554110e+06 13593 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 2.337649e+01 1.977487e+00 2.526297e+05 5.947857e+06 10807 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 1.293824e+05 2.164776e+04 1.203256e+07 1.600384e+12 93 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..96371a0ca15f816c69ada9d25e52a0daff684418 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-k40m @@ -0,0 +1,168 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 1.771876e+03 4.869391e+01 2.259142e+06 4.005944e+09 1275 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 1.787076e+03 3.916118e+01 2.485822e+06 4.444485e+09 1391 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 1.810860e+03 6.748308e+01 2.567800e+06 4.656384e+09 1418 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 1.816794e+03 6.597805e+01 2.463573e+06 4.481706e+09 1356 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +744ff412 11059200 0.000000e+00 4.756682e+04 8.623450e+03 3.486648e+07 1.712997e+12 733 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..ae3e2c25e66e6c0ce2b10ef2c527bae8fdc9aab1 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-p100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 18451200 0.000000e+00 9.568978e+02 9.273883e+01 6.414086e+06 6.195274e+09 6703 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 18451200 0.000000e+00 9.963474e+02 1.294759e+02 3.948525e+06 4.000538e+09 3963 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25d86902 18451200 0.000000e+00 1.935997e+05 3.955176e+04 6.969590e+06 1.405627e+12 36 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..36489988125124b600d401e43d0eb1d34ebd1e3e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-rtx8000 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 1.562609e+03 1.591811e+02 8.063062e+05 1.273016e+09 516 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 1.584670e+03 1.441692e+02 8.303670e+05 1.326749e+09 524 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 1.515670e+05 1.788533e+04 8.336185e+06 1.281084e+12 55 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..bc8ceec6abd3f8eb3c76cc8b45e7f67462e4b54b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssymm.sirocco-v100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 9.843868e+02 7.250133e+01 8.379100e+06 8.293018e+09 8512 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 9.922494e+02 7.269875e+01 8.647454e+06 8.626490e+09 8715 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +962518a7 30720000 0.000000e+00 1.053054e+05 2.793623e+04 7.266071e+06 8.190062e+11 69 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..4a126a769743ef3f13e7f20b46c99f208fff57d5 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-a100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.292720e+01 1.800127e+00 7.265731e+05 9.574683e+06 56205 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 5.983456e+04 3.642700e+03 2.812224e+06 1.688919e+11 47 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.284362e+01 1.742971e+00 5.282710e+05 6.909866e+06 41131 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..fc84a45027d59678180e50969ae5853967c5f640 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-k40m @@ -0,0 +1,168 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 6.792409e+02 3.989136e+01 9.224091e+05 6.286990e+08 1358 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 2.157974e+04 4.694649e+03 5.528731e+07 1.249552e+12 2562 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 6.755971e+02 4.131319e+01 9.404312e+05 6.377285e+08 1392 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 6.822887e+02 4.006265e+01 1.025480e+06 7.020857e+08 1503 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 6.825954e+02 4.094153e+01 9.453946e+05 6.476435e+08 1385 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..71bf91f39ac242a0bf6cd38408dcdffe9bdc9647 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-p100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 12300800 0.000000e+00 3.803596e+02 3.147797e+01 2.533195e+06 9.701243e+08 6660 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 12300800 0.000000e+00 3.810127e+02 3.171342e+01 2.324558e+06 8.918222e+08 6101 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 12300800 0.000000e+00 6.022525e+04 1.195372e+04 1.420111e+08 8.889595e+12 2358 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..63321f6208b57024c5c0579a5f16c0788032480b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-rtx8000 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 5.863007e+02 9.687740e+01 3.506078e+05 2.111740e+08 598 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 6.841008e+04 7.745713e+02 1.026151e+06 7.020808e+10 15 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 5.681654e+02 7.420758e+01 4.244195e+05 2.452541e+08 747 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..da3c7887cc09466f778f880888a14ef124932db1 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/ssyrk.sirocco-v100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 6.046660e+02 4.355702e+01 1.314544e+06 7.989844e+08 2174 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 4.546014e+04 3.314100e+03 9.622548e+08 4.397672e+13 21167 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 6.079189e+02 4.332877e+01 1.383624e+06 8.454039e+08 2276 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..59f8e0f5e54e78a2024f170b67a609e2d7f41a53 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a37c3e11 11481600 0.000000e+00 1.923568e+03 1.376280e+02 3.462423e+04 6.694302e+07 18 +ad3f064c 31948800 0.000000e+00 2.446235e+03 2.936602e+02 9.581413e+07 2.377616e+11 39168 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a37c3e11 11481600 0.000000e+00 1.908892e+03 6.093748e+01 2.099781e+04 4.012339e+07 11 +ad3f064c 31948800 0.000000e+00 2.403085e+03 3.073088e+02 1.075549e+08 2.626904e+11 44757 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a37c3e11 11481600 0.000000e+00 5.310635e+02 1.056344e+02 4.627157e+06 2.554539e+09 8713 +ad3f064c 31948800 0.000000e+00 4.713320e+05 8.792344e+04 2.766719e+08 1.349421e+14 587 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..3ecf855b7fac6b9e657f8cbb793f39cd0ed24bcd --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-k40m @@ -0,0 +1,172 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d5a8ab8e 4431360 0.000000e+00 1.613582e+03 2.198595e+02 1.774941e+04 2.917185e+07 11 +1d42d84c 11796480 0.000000e+00 8.516501e+03 6.056600e+02 2.178862e+08 1.865012e+12 25584 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +1d42d84c 11796480 0.000000e+00 8.542456e+03 7.886072e+02 9.057566e+07 7.803325e+11 10603 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d5a8ab8e 4431360 0.000000e+00 1.645341e+03 2.549480e+02 1.645342e+04 2.772147e+07 10 +1d42d84c 11796480 0.000000e+00 8.524860e+03 8.104533e+02 2.090296e+08 1.798053e+12 24520 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d5a8ab8e 4431360 0.000000e+00 1.656245e+03 2.688821e+02 1.821870e+04 3.096990e+07 11 +1d42d84c 11796480 0.000000e+00 8.642899e+03 7.872516e+02 2.104805e+08 1.834255e+12 24353 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d5a8ab8e 4431360 0.000000e+00 3.682008e+02 7.622453e+01 1.546443e+04 5.938044e+06 42 +1d42d84c 11796480 0.000000e+00 1.614407e+05 3.629611e+04 6.971010e+08 1.182291e+14 4318 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..ab8604ab37509d94e955a06d490409ab224f50a5 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6e527533 7112640 0.000000e+00 7.185065e+02 5.484391e+01 9.340584e+03 6.750372e+06 13 +2e930e88 19403520 0.000000e+00 3.067987e+03 3.622152e+02 3.405466e+08 1.059356e+12 111000 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6e527533 7112640 0.000000e+00 6.867879e+02 5.506921e+01 6.867879e+03 4.747102e+06 10 +2e930e88 19403520 0.000000e+00 3.113419e+03 4.088315e+02 3.376939e+08 1.069511e+12 108464 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6e527533 7112640 0.000000e+00 8.076304e+02 1.783931e+02 3.105339e+06 2.630330e+09 3845 +2e930e88 19403520 0.000000e+00 5.882471e+05 1.041467e+05 1.029432e+08 6.245420e+13 175 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..d0cfb5f301f91457164b257f8486bdbea2edd826 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ad3f064c 31948800 0.000000e+00 5.039715e+03 5.897871e+02 2.250233e+07 1.149585e+11 4465 +a37c3e11 11481600 0.000000e+00 7.461015e+02 1.119125e+02 7.461015e+03 5.691918e+06 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ad3f064c 31948800 0.000000e+00 5.056946e+03 6.678363e+02 2.205334e+07 1.134676e+11 4361 +a37c3e11 11481600 0.000000e+00 7.761893e+02 1.582136e+02 7.761893e+03 6.275014e+06 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ad3f064c 31948800 0.000000e+00 3.325125e+05 6.435924e+03 6.982762e+06 2.322725e+12 21 +a37c3e11 11481600 0.000000e+00 8.848475e+02 1.137144e+02 7.114174e+05 6.398923e+08 804 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..84827bfbe4ff015a3b83f5595e1fcca9a365393e --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpmqrt.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ad3f064c 31948800 0.000000e+00 3.557549e+03 5.269036e+02 1.527384e+09 5.552937e+12 429336 +a37c3e11 11481600 0.000000e+00 1.138813e+03 2.614769e+02 1.480457e+04 1.774845e+07 13 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ad3f064c 31948800 0.000000e+00 3.541234e+03 5.186668e+02 1.478359e+09 5.347522e+12 417470 +a37c3e11 11481600 0.000000e+00 9.864667e+02 1.047768e+02 1.578347e+04 1.574552e+07 16 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ad3f064c 31948800 0.000000e+00 4.414695e+05 6.692352e+04 4.907375e+09 2.216243e+15 11116 +a37c3e11 11481600 0.000000e+00 1.807063e+03 3.648603e+02 1.477093e+07 2.778014e+10 8174 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..1f0c05be712a5520af1933e5bfda5cf4fb329040 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0abc7e2a 21708800 0.000000e+00 1.136749e+05 1.493485e+04 2.091390e+09 2.418422e+14 18398 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..852d0ca7d6567c94f4d73f5f43f17232d0d72a66 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5957d8d1 8110080 0.000000e+00 4.191033e+04 7.078611e+03 1.595107e+08 6.875854e+12 3806 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..5c1743837972f6a87ad295fe05b8083d88cb605b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e2b038b6 13253120 0.000000e+00 9.208958e+04 1.118564e+04 6.981311e+08 6.523912e+13 7581 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..7395d13b987d6b66aca97506ceb130fd29477f6b --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0abc7e2a 21708800 0.000000e+00 1.713943e+05 1.989112e+03 2.015597e+08 3.455084e+13 1176 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..b2c00bba60353577101ccb612d84f5f60402a7fc --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/stpqrt.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0abc7e2a 21708800 0.000000e+00 1.354361e+05 3.024460e+04 3.368295e+09 4.789380e+14 24870 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..6686804ed0b0cf9d293a636e6a963928d2df2d00 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-a100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 5.019803e+04 3.310896e+02 5.521783e+05 2.771947e+10 11 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 8.187385e+00 9.401769e-01 1.802698e+05 1.495401e+06 22018 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 8.576944e+00 1.422355e+00 1.741291e+05 1.534569e+06 20302 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..d5181186e367659c776b2e0e56c31cc23917b47c --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-k40m @@ -0,0 +1,168 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.604200e+04 2.701019e+03 1.730932e+07 2.855479e+11 1079 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.210427e+03 4.626693e+01 5.567962e+05 6.749456e+08 460 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.193718e+03 5.622313e+01 4.930057e+05 5.898156e+08 413 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.228042e+03 5.160760e+01 5.071813e+05 6.239400e+08 413 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.219076e+03 4.989904e+01 5.632130e+05 6.877497e+08 462 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..73b4ca5a783ce82137cae05fb03f495b33c7cf86 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-p100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 12300800 0.000000e+00 3.859412e+04 6.126719e+03 8.614208e+07 3.408360e+12 2232 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 12300800 0.000000e+00 5.712805e+02 1.062558e+01 8.146460e+05 4.655524e+08 1426 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +9aa66b8a 12300800 0.000000e+00 5.709627e+02 1.425480e+01 8.267540e+05 4.723400e+08 1448 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..9befbcaf96678b7de8cf258bfb3156d3165773b7 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-rtx8000 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 6.737955e+04 2.622304e+02 8.759342e+05 5.902094e+10 13 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 8.724637e+02 5.445931e+01 1.936869e+05 1.696432e+08 222 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 8.699919e+02 5.666729e+01 2.235879e+05 1.953450e+08 257 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..d107e1ff36c9da6d004bc44008f848968c592387 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strmm.sirocco-v100 @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 4.932323e+04 3.817478e+03 3.500470e+08 1.736887e+13 7097 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 7.808017e+02 2.265549e+01 5.949709e+05 4.649454e+08 762 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 7.841997e+02 1.725304e+01 5.379610e+05 4.220730e+08 686 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..d09c0af83a3165314d9dfac7c6f2fab9e8d7b2e2 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-a100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 4.359330e+00 7.193837e-01 2.530416e+05 1.133131e+06 58046 +46d0e6c4 10246400 0.000000e+00 1.514515e+01 2.314492e+00 1.120741e+03 1.737020e+04 74 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 4.481761e+00 6.822347e-01 2.405316e+05 1.102985e+06 53669 +46d0e6c4 10246400 0.000000e+00 1.431106e+01 2.770187e+00 1.093365e+04 1.623350e+05 764 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 6.080808e+04 3.043679e+03 5.107878e+06 3.113784e+11 84 +46d0e6c4 10246400 0.000000e+00 2.329401e+02 1.966118e+01 1.507123e+05 3.535703e+07 647 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..4fef5f3a0135ad12f5ebf98e5e6fadb7f2e4e0f7 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-k40m @@ -0,0 +1,173 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.838961e+03 2.546777e+02 4.426379e+06 8.296057e+09 2407 +47fadc35 3690240 0.000000e+00 1.941324e+02 1.419911e+01 1.261861e+04 2.462786e+06 65 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.907023e+03 2.783617e+02 4.525365e+06 8.813847e+09 2373 +47fadc35 3690240 0.000000e+00 1.951347e+02 2.472818e+01 1.034214e+04 2.050519e+06 53 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.840131e+03 2.524188e+02 4.270944e+06 8.006978e+09 2321 +47fadc35 3690240 0.000000e+00 1.903790e+02 1.933078e+01 1.408804e+04 2.709720e+06 74 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 1.908946e+03 2.857990e+02 4.466933e+06 8.718265e+09 2340 +47fadc35 3690240 0.000000e+00 1.944218e+02 1.530694e+01 1.108204e+04 2.167945e+06 57 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a1187f60 7372800 0.000000e+00 2.013848e+04 4.277020e+03 2.186637e+08 4.602179e+12 10858 +47fadc35 3690240 0.000000e+00 1.913192e+02 3.497276e+01 2.152341e+05 4.255438e+07 1125 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..a89f009ffe3cfaa63d5b30e1fd30ccd7fe8c7067 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-p100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +77bc0d5f 6155360 0.000000e+00 1.104347e+02 1.163077e+01 1.104347e+03 1.233110e+05 10 +9aa66b8a 12300800 0.000000e+00 9.376169e+02 1.054764e+02 3.672645e+06 3.487112e+09 3917 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +77bc0d5f 6155360 0.000000e+00 1.147283e+02 1.479330e+01 1.147283e+03 1.338142e+05 10 +9aa66b8a 12300800 0.000000e+00 9.081480e+02 9.742563e+01 3.647122e+06 3.350246e+09 4016 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +77bc0d5f 6155360 0.000000e+00 3.544244e+02 2.192985e+01 1.127070e+05 4.009903e+07 318 +9aa66b8a 12300800 0.000000e+00 4.593649e+04 9.731125e+03 2.922939e+08 1.402950e+13 6363 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..516f6860250a71c0971989ab4a49ca584c849a80 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-rtx8000 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.265135e+03 2.034064e+02 2.453096e+06 3.183721e+09 1939 +46d0e6c4 10246400 0.000000e+00 1.225023e+02 1.514483e+01 1.727283e+04 2.148302e+06 141 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.248066e+03 1.945259e+02 2.388799e+06 3.053805e+09 1914 +46d0e6c4 10246400 0.000000e+00 1.150052e+02 1.217168e+01 2.921131e+04 3.397082e+06 254 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 6.919961e+04 8.191379e+02 6.919961e+05 4.789258e+10 10 +46d0e6c4 10246400 0.000000e+00 3.069072e+02 3.384180e+01 8.409258e+04 2.612242e+07 274 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..dd25eec12883be2e0d49be308cdd433afb25b50c --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strsm.sirocco-v100 @@ -0,0 +1,107 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.048890e+03 1.579229e+02 1.000012e+07 1.072680e+10 9534 +46d0e6c4 10246400 0.000000e+00 1.262551e+02 1.667783e+01 4.974451e+04 6.390090e+06 394 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 1.049189e+03 1.563164e+02 1.000507e+07 1.073022e+10 9536 +46d0e6c4 10246400 0.000000e+00 1.268587e+02 1.587021e+01 6.672766e+04 8.597461e+06 526 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +b7b47bbe 20480000 0.000000e+00 4.225252e+04 4.277690e+03 2.035726e+09 8.689618e+13 48180 +46d0e6c4 10246400 0.000000e+00 5.911898e+02 1.193497e+02 1.007387e+06 6.198296e+08 1704 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-a100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-a100 new file mode 100644 index 0000000000000000000000000000000000000000..59cacde77e713d1bf59541fa94347c9b81c8df59 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-a100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 1.920536e+04 1.106502e+03 1.882125e+06 3.626688e+10 98 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-k40m b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-k40m new file mode 100644 index 0000000000000000000000000000000000000000..b62c5804ec50b3473060e4f1cdeb1a8e7bdc7327 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-k40m @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +901ddf46 3686400 0.000000e+00 1.160146e+04 2.231386e+03 2.668335e+06 3.210176e+10 230 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-p100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-p100 new file mode 100644 index 0000000000000000000000000000000000000000..b45ed117a388bdc80d9cc02bf4fcb2dec581e0f0 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-p100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +e82ddea3 6150400 0.000000e+00 2.197878e+04 4.023686e+03 6.000208e+06 1.362972e+11 273 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-rtx8000 b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-rtx8000 new file mode 100644 index 0000000000000000000000000000000000000000..be78ba4f924c53d9a157d75b923a65ef7f2f2c76 --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-rtx8000 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 2.994934e+04 2.499324e+02 3.264478e+06 9.777576e+10 109 + diff --git a/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-v100 b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-v100 new file mode 100644 index 0000000000000000000000000000000000000000..84f144dbeed4138617acbd502259e7ee88bb01bc --- /dev/null +++ b/simucore/perfmodels/.starpu/sampling/codelets/45/strtri.sirocco-v100 @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +5056171e 10240000 0.000000e+00 2.653446e+04 3.540263e+03 1.066685e+07 2.880775e+11 402 +