Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 2266c9a3 authored by Berenger Bramas's avatar Berenger Bramas
Browse files

Update bench tutorial

parent 4478793a
Branches
Tags
No related merge requests found
Showing
with 34 additions and 1295 deletions
File deleted
......@@ -13,14 +13,14 @@ cpu=1
STARPU_NCPUS=$cpu
STARPU_NCUDA=0
logoutput=`./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_SEQ`
logoutput=`./Tests/Release/testBlockedUniformBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_SEQ`
if [[ $VERBOSE ]] ; then
echo $logoutput
fi
$SCALFMM_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
$TUTORIAL_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
rec_name="$SCALFMM_RES_DIR/trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_BS_CPU_SEQ-CPU_$cpu.rec"
mv trace.rec $rec_name
python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
python $TUTORIAL_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
for (( cpu=1 ; cpu<=$SCALFMM_MAX_NB_CPU ; cpu++)) ; do
......@@ -29,13 +29,13 @@ for (( cpu=1 ; cpu<=$SCALFMM_MAX_NB_CPU ; cpu++)) ; do
STARPU_NCPUS=$cpu
STARPU_NCUDA=0
logoutput=`./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR`
logoutput=`./Tests/Release/testBlockedUniformBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR`
if [[ $VERBOSE ]] ; then
echo $logoutput
fi
$SCALFMM_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
$TUTORIAL_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
rec_name="$SCALFMM_RES_DIR/trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_BS_CPU_PAR-CPU_$cpu.rec"
mv trace.rec $rec_name
python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
python $TUTORIAL_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
done
0 granularity-eff tasks-eff runtime-eff pipeline-eff
1 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2 9.588832e-01 9.588832e-01 9.972215e-01 9.999844e-01
3 9.984195e-01 9.984195e-01 9.992539e-01 9.999840e-01
4 9.936055e-01 9.936055e-01 9.992505e-01 9.999843e-01
5 9.859209e-01 9.859209e-01 9.991938e-01 9.999840e-01
6 9.913540e-01 9.913540e-01 9.992224e-01 9.999848e-01
7 9.980442e-01 9.980442e-01 9.993216e-01 9.999841e-01
8 9.932070e-01 9.932070e-01 9.993356e-01 9.999844e-01
9 9.953908e-01 9.953908e-01 9.993136e-01 9.999852e-01
10 9.930517e-01 9.930517e-01 9.991280e-01 9.999848e-01
11 9.937148e-01 9.937148e-01 9.992802e-01 9.999838e-01
12 9.895039e-01 9.895039e-01 9.992958e-01 9.999842e-01
13 9.934571e-01 9.934571e-01 9.992770e-01 9.999845e-01
14 9.939346e-01 9.939346e-01 9.993242e-01 9.999845e-01
15 9.929928e-01 9.929928e-01 9.993077e-01 9.999849e-01
16 9.946804e-01 9.946804e-01 9.993051e-01 9.999838e-01
17 9.959137e-01 9.959137e-01 9.992893e-01 9.999839e-01
18 9.652375e-01 9.652375e-01 9.961152e-01 9.999832e-01
19 9.937258e-01 9.937258e-01 9.992987e-01 9.999845e-01
20 9.949256e-01 9.949256e-01 9.992757e-01 9.999843e-01
21 9.886613e-01 9.886613e-01 9.992616e-01 9.999838e-01
22 9.921982e-01 9.921982e-01 9.992499e-01 9.999842e-01
23 1.001717e+00 1.001717e+00 9.992881e-01 9.999846e-01
24 9.957642e-01 9.957642e-01 9.992461e-01 9.999836e-01
Addons/BenchEfficiency/global-eff.png

6.78 KiB

......@@ -68,7 +68,7 @@ struct LineData{
}
}
if(words.size() != 4){
printf("Error line is no composed of 4 words\n");
printf("Error line is no composed of 4 words, has %lu for %s\n", words.size(), line);
exit(111);
}
name = ReduceName(words[0].substr(1, words[0].size() - 2));
......@@ -186,30 +186,32 @@ int main(int argc, char** argv){
}
while((sizeLine = getline((char**)&line, &sizeLine, timeFile)) != -1){
LineData dt(line);
// Task, Runtime, Other
if(dt.type == "Task"){
if(dt.name != "execute_on_all_wrapper"){
timeTasks[idxFile][dt.name] += dt.duration;
allTaskNames.insert(dt.name);
times[idxFile].tt += dt.duration;
if(strncmp(line, "WARNING", 7) != 0){
LineData dt(line);
// Task, Runtime, Other
if(dt.type == "Task"){
if(dt.name != "execute_on_all_wrapper"){
timeTasks[idxFile][dt.name] += dt.duration;
allTaskNames.insert(dt.name);
times[idxFile].tt += dt.duration;
}
}
}
else if(dt.type == "Runtime"){
if(dt.name == "Scheduling"
|| dt.name == "FetchingInput"
|| dt.name == "PushingOutput"){
times[idxFile].tr += dt.duration;
else if(dt.type == "Runtime"){
if(dt.name == "Scheduling"
|| dt.name == "FetchingInput"
|| dt.name == "PushingOutput"){
times[idxFile].tr += dt.duration;
}
}
}
else if(dt.type == "Other"){
if(dt.name == "Idle"){
times[idxFile].ti += dt.duration;
else if(dt.type == "Other"){
if(dt.name == "Idle"){
times[idxFile].ti += dt.duration;
}
}
else {
printf("Arg do not know type %s\n", dt.type.c_str());
//return 3;
}
}
else {
printf("Arg do not know type %s\n", dt.type.c_str());
return 3;
}
}
......
Addons/BenchEfficiency/par-bs-search.png

5.93 KiB

This diff is collapsed.
This diff is collapsed.
......@@ -6,5 +6,5 @@ if [[ $# -ne 1 ]] ; then
fi
input=$(cat)
res=`echo "$input" | grep "$3" | cut -d'=' -f2 | cut -d's' -f1`
res=`echo "$input" | grep "$1" | cut -d'=' -f2 | cut -d' ' -f2`
echo $res
Addons/BenchEfficiency/seq-bs-search.png

5.84 KiB

0 L2L M2M P2M L2P M2L-out M2L P2P-out P2P
1 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2 9.565659e-01 9.665736e-01 1.031103e+00 1.004286e+00 9.715094e-01 9.208541e-01 9.697996e-01 9.763831e-01
3 9.263068e-01 1.024516e+00 1.029574e+00 9.889095e-01 9.937418e-01 9.954310e-01 1.001689e+00 1.000994e+00
4 1.005226e+00 1.006333e+00 1.033745e+00 1.010624e+00 9.534195e-01 9.864280e-01 9.895790e-01 9.995851e-01
5 8.615300e-01 9.844517e-01 9.938413e-01 1.009990e+00 9.569465e-01 9.791331e-01 9.887700e-01 9.975625e-01
6 8.535893e-01 9.410083e-01 1.014109e+00 1.018876e+00 9.739749e-01 9.860534e-01 9.782539e-01 9.964238e-01
7 1.046813e+00 9.975072e-01 1.037954e+00 1.003486e+00 9.786087e-01 9.933857e-01 1.004895e+00 9.965736e-01
8 9.995985e-01 1.013025e+00 9.895591e-01 1.013030e+00 9.652670e-01 9.907845e-01 1.000561e+00 9.971405e-01
9 1.039365e+00 1.013929e+00 1.047827e+00 9.852421e-01 9.711139e-01 9.898517e-01 9.980679e-01 9.993222e-01
10 9.181035e-01 9.952685e-01 1.031850e+00 1.012496e+00 9.670203e-01 9.852214e-01 9.859215e-01 9.985014e-01
11 8.717502e-01 9.889525e-01 1.028373e+00 1.011922e+00 9.699808e-01 9.888136e-01 9.826419e-01 9.981512e-01
12 9.452144e-01 1.040015e+00 1.013514e+00 9.762884e-01 9.389195e-01 9.915452e-01 9.996240e-01 9.998256e-01
13 1.022490e+00 1.021529e+00 1.014210e+00 9.896566e-01 9.668669e-01 9.898209e-01 1.011145e+00 9.991000e-01
14 9.383201e-01 9.923898e-01 1.030084e+00 1.009296e+00 9.748870e-01 9.858361e-01 1.005721e+00 9.971995e-01
15 9.387378e-01 9.986737e-01 1.032522e+00 9.967096e-01 9.675984e-01 9.877332e-01 1.003181e+00 9.974178e-01
16 9.377196e-01 9.853747e-01 1.043778e+00 1.003874e+00 9.786853e-01 9.873092e-01 1.003464e+00 9.958178e-01
17 9.293735e-01 1.034251e+00 1.038271e+00 1.003177e+00 9.700248e-01 9.915540e-01 9.899480e-01 9.984129e-01
18 9.081814e-01 9.992797e-01 1.018655e+00 9.982681e-01 9.627375e-01 9.752319e-01 9.739917e-01 9.297086e-01
19 9.471672e-01 9.763513e-01 1.026148e+00 1.013503e+00 9.656781e-01 9.868543e-01 9.891711e-01 9.992051e-01
20 9.376034e-01 1.008523e+00 1.015422e+00 9.988900e-01 9.763451e-01 9.917410e-01 1.016855e+00 9.974959e-01
21 9.649789e-01 9.941223e-01 1.023371e+00 9.720318e-01 9.427889e-01 9.864717e-01 1.011408e+00 1.001528e+00
22 8.085859e-01 1.003002e+00 1.024132e+00 1.015483e+00 9.586926e-01 9.888563e-01 9.829068e-01 9.982469e-01
23 9.843031e-01 1.009513e+00 1.041257e+00 1.012564e+00 1.009160e+00 9.949415e-01 9.970272e-01 9.964763e-01
24 9.408696e-01 9.847445e-01 1.030481e+00 9.726508e-01 9.691133e-01 9.975819e-01 1.022271e+00 1.000680e+00
Addons/BenchEfficiency/task-eff.png

10.6 KiB

0 L2L M2M P2M L2P M2L-out M2L P2P-out P2P
1 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2 9.565659e-01 9.665736e-01 1.031103e+00 1.004286e+00 9.715094e-01 9.208541e-01 9.697996e-01 9.763831e-01
3 9.263068e-01 1.024516e+00 1.029574e+00 9.889095e-01 9.937418e-01 9.954310e-01 1.001689e+00 1.000994e+00
4 1.005226e+00 1.006333e+00 1.033745e+00 1.010624e+00 9.534195e-01 9.864280e-01 9.895790e-01 9.995851e-01
5 8.615300e-01 9.844517e-01 9.938413e-01 1.009990e+00 9.569465e-01 9.791331e-01 9.887700e-01 9.975625e-01
6 8.535893e-01 9.410083e-01 1.014109e+00 1.018876e+00 9.739749e-01 9.860534e-01 9.782539e-01 9.964238e-01
7 1.046813e+00 9.975072e-01 1.037954e+00 1.003486e+00 9.786087e-01 9.933857e-01 1.004895e+00 9.965736e-01
8 9.995985e-01 1.013025e+00 9.895591e-01 1.013030e+00 9.652670e-01 9.907845e-01 1.000561e+00 9.971405e-01
9 1.039365e+00 1.013929e+00 1.047827e+00 9.852421e-01 9.711139e-01 9.898517e-01 9.980679e-01 9.993222e-01
10 9.181035e-01 9.952685e-01 1.031850e+00 1.012496e+00 9.670203e-01 9.852214e-01 9.859215e-01 9.985014e-01
11 8.717502e-01 9.889525e-01 1.028373e+00 1.011922e+00 9.699808e-01 9.888136e-01 9.826419e-01 9.981512e-01
12 9.452144e-01 1.040015e+00 1.013514e+00 9.762884e-01 9.389195e-01 9.915452e-01 9.996240e-01 9.998256e-01
13 1.022490e+00 1.021529e+00 1.014210e+00 9.896566e-01 9.668669e-01 9.898209e-01 1.011145e+00 9.991000e-01
14 9.383201e-01 9.923898e-01 1.030084e+00 1.009296e+00 9.748870e-01 9.858361e-01 1.005721e+00 9.971995e-01
15 9.387378e-01 9.986737e-01 1.032522e+00 9.967096e-01 9.675984e-01 9.877332e-01 1.003181e+00 9.974178e-01
16 9.377196e-01 9.853747e-01 1.043778e+00 1.003874e+00 9.786853e-01 9.873092e-01 1.003464e+00 9.958178e-01
17 9.293735e-01 1.034251e+00 1.038271e+00 1.003177e+00 9.700248e-01 9.915540e-01 9.899480e-01 9.984129e-01
18 9.081814e-01 9.992797e-01 1.018655e+00 9.982681e-01 9.627375e-01 9.752319e-01 9.739917e-01 9.297086e-01
19 9.471672e-01 9.763513e-01 1.026148e+00 1.013503e+00 9.656781e-01 9.868543e-01 9.891711e-01 9.992051e-01
20 9.376034e-01 1.008523e+00 1.015422e+00 9.988900e-01 9.763451e-01 9.917410e-01 1.016855e+00 9.974959e-01
21 9.649789e-01 9.941223e-01 1.023371e+00 9.720318e-01 9.427889e-01 9.864717e-01 1.011408e+00 1.001528e+00
22 8.085859e-01 1.003002e+00 1.024132e+00 1.015483e+00 9.586926e-01 9.888563e-01 9.829068e-01 9.982469e-01
23 9.843031e-01 1.009513e+00 1.041257e+00 1.012564e+00 1.009160e+00 9.949415e-01 9.970272e-01 9.964763e-01
24 9.408696e-01 9.847445e-01 1.030481e+00 9.726508e-01 9.691133e-01 9.975819e-01 1.022271e+00 1.000680e+00
Addons/BenchEfficiency/task-gr-eff.png

10.2 KiB

Addons/BenchEfficiency/trace-example-colors.png

41.7 KiB

Addons/BenchEfficiency/trace-example.png

38.2 KiB

......@@ -2,6 +2,7 @@
// ==== CMAKE =====
// @FUSE_BLAS
// @FUSE_FFT
// @FUSE_STARPU
// ================
// Keep in private GIT
......@@ -70,21 +71,11 @@ int main(int argc, char* argv[]){
typedef FP2PGroupParticleContainer<FReal> GroupContainerClass;
typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass;
#ifdef SCALFMM_USE_STARPU
typedef FStarPUAllCpuCapacities<FUnifKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper > GroupAlgorithm;
#elif defined(SCALFMM_USE_OMP4)
typedef FUnifKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
// Set the number of threads
omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass,
GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#else
typedef FUnifKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
//typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#endif
// Get params
const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment