Commit 2266c9a3 authored by Berenger Bramas's avatar Berenger Bramas

Update bench tutorial

parent 4478793a
......@@ -13,14 +13,14 @@ cpu=1
STARPU_NCPUS=$cpu
STARPU_NCUDA=0
logoutput=`./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_SEQ`
logoutput=`./Tests/Release/testBlockedUniformBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_SEQ`
if [[ $VERBOSE ]] ; then
echo $logoutput
fi
$SCALFMM_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
$TUTORIAL_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
rec_name="$SCALFMM_RES_DIR/trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_BS_CPU_SEQ-CPU_$cpu.rec"
mv trace.rec $rec_name
python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
python $TUTORIAL_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
for (( cpu=1 ; cpu<=$SCALFMM_MAX_NB_CPU ; cpu++)) ; do
......@@ -29,13 +29,13 @@ for (( cpu=1 ; cpu<=$SCALFMM_MAX_NB_CPU ; cpu++)) ; do
STARPU_NCPUS=$cpu
STARPU_NCUDA=0
logoutput=`./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR`
logoutput=`./Tests/Release/testBlockedUniformBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR`
if [[ $VERBOSE ]] ; then
echo $logoutput
fi
$SCALFMM_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
$TUTORIAL_STARPU_DIR/bin/starpu_fxt_tool -i "/tmp/prof_file_"$USER"_0"
rec_name="$SCALFMM_RES_DIR/trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_BS_CPU_PAR-CPU_$cpu.rec"
mv trace.rec $rec_name
python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
python $TUTORIAL_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
done
0 granularity-eff tasks-eff runtime-eff pipeline-eff
1 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2 9.588832e-01 9.588832e-01 9.972215e-01 9.999844e-01
3 9.984195e-01 9.984195e-01 9.992539e-01 9.999840e-01
4 9.936055e-01 9.936055e-01 9.992505e-01 9.999843e-01
5 9.859209e-01 9.859209e-01 9.991938e-01 9.999840e-01
6 9.913540e-01 9.913540e-01 9.992224e-01 9.999848e-01
7 9.980442e-01 9.980442e-01 9.993216e-01 9.999841e-01
8 9.932070e-01 9.932070e-01 9.993356e-01 9.999844e-01
9 9.953908e-01 9.953908e-01 9.993136e-01 9.999852e-01
10 9.930517e-01 9.930517e-01 9.991280e-01 9.999848e-01
11 9.937148e-01 9.937148e-01 9.992802e-01 9.999838e-01
12 9.895039e-01 9.895039e-01 9.992958e-01 9.999842e-01
13 9.934571e-01 9.934571e-01 9.992770e-01 9.999845e-01
14 9.939346e-01 9.939346e-01 9.993242e-01 9.999845e-01
15 9.929928e-01 9.929928e-01 9.993077e-01 9.999849e-01
16 9.946804e-01 9.946804e-01 9.993051e-01 9.999838e-01
17 9.959137e-01 9.959137e-01 9.992893e-01 9.999839e-01
18 9.652375e-01 9.652375e-01 9.961152e-01 9.999832e-01
19 9.937258e-01 9.937258e-01 9.992987e-01 9.999845e-01
20 9.949256e-01 9.949256e-01 9.992757e-01 9.999843e-01
21 9.886613e-01 9.886613e-01 9.992616e-01 9.999838e-01
22 9.921982e-01 9.921982e-01 9.992499e-01 9.999842e-01
23 1.001717e+00 1.001717e+00 9.992881e-01 9.999846e-01
24 9.957642e-01 9.957642e-01 9.992461e-01 9.999836e-01
......@@ -68,7 +68,7 @@ struct LineData{
}
}
if(words.size() != 4){
printf("Error line is no composed of 4 words\n");
printf("Error line is no composed of 4 words, has %lu for %s\n", words.size(), line);
exit(111);
}
name = ReduceName(words[0].substr(1, words[0].size() - 2));
......@@ -186,30 +186,32 @@ int main(int argc, char** argv){
}
while((sizeLine = getline((char**)&line, &sizeLine, timeFile)) != -1){
LineData dt(line);
// Task, Runtime, Other
if(dt.type == "Task"){
if(dt.name != "execute_on_all_wrapper"){
timeTasks[idxFile][dt.name] += dt.duration;
allTaskNames.insert(dt.name);
times[idxFile].tt += dt.duration;
if(strncmp(line, "WARNING", 7) != 0){
LineData dt(line);
// Task, Runtime, Other
if(dt.type == "Task"){
if(dt.name != "execute_on_all_wrapper"){
timeTasks[idxFile][dt.name] += dt.duration;
allTaskNames.insert(dt.name);
times[idxFile].tt += dt.duration;
}
}
}
else if(dt.type == "Runtime"){
if(dt.name == "Scheduling"
|| dt.name == "FetchingInput"
|| dt.name == "PushingOutput"){
times[idxFile].tr += dt.duration;
else if(dt.type == "Runtime"){
if(dt.name == "Scheduling"
|| dt.name == "FetchingInput"
|| dt.name == "PushingOutput"){
times[idxFile].tr += dt.duration;
}
}
}
else if(dt.type == "Other"){
if(dt.name == "Idle"){
times[idxFile].ti += dt.duration;
else if(dt.type == "Other"){
if(dt.name == "Idle"){
times[idxFile].ti += dt.duration;
}
}
else {
printf("Arg do not know type %s\n", dt.type.c_str());
//return 3;
}
}
else {
printf("Arg do not know type %s\n", dt.type.c_str());
return 3;
}
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -6,5 +6,5 @@ if [[ $# -ne 1 ]] ; then
fi
input=$(cat)
res=`echo "$input" | grep "$3" | cut -d'=' -f2 | cut -d's' -f1`
res=`echo "$input" | grep "$1" | cut -d'=' -f2 | cut -d' ' -f2`
echo $res
0 L2L M2M P2M L2P M2L-out M2L P2P-out P2P
1 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2 9.565659e-01 9.665736e-01 1.031103e+00 1.004286e+00 9.715094e-01 9.208541e-01 9.697996e-01 9.763831e-01
3 9.263068e-01 1.024516e+00 1.029574e+00 9.889095e-01 9.937418e-01 9.954310e-01 1.001689e+00 1.000994e+00
4 1.005226e+00 1.006333e+00 1.033745e+00 1.010624e+00 9.534195e-01 9.864280e-01 9.895790e-01 9.995851e-01
5 8.615300e-01 9.844517e-01 9.938413e-01 1.009990e+00 9.569465e-01 9.791331e-01 9.887700e-01 9.975625e-01
6 8.535893e-01 9.410083e-01 1.014109e+00 1.018876e+00 9.739749e-01 9.860534e-01 9.782539e-01 9.964238e-01
7 1.046813e+00 9.975072e-01 1.037954e+00 1.003486e+00 9.786087e-01 9.933857e-01 1.004895e+00 9.965736e-01
8 9.995985e-01 1.013025e+00 9.895591e-01 1.013030e+00 9.652670e-01 9.907845e-01 1.000561e+00 9.971405e-01
9 1.039365e+00 1.013929e+00 1.047827e+00 9.852421e-01 9.711139e-01 9.898517e-01 9.980679e-01 9.993222e-01
10 9.181035e-01 9.952685e-01 1.031850e+00 1.012496e+00 9.670203e-01 9.852214e-01 9.859215e-01 9.985014e-01
11 8.717502e-01 9.889525e-01 1.028373e+00 1.011922e+00 9.699808e-01 9.888136e-01 9.826419e-01 9.981512e-01
12 9.452144e-01 1.040015e+00 1.013514e+00 9.762884e-01 9.389195e-01 9.915452e-01 9.996240e-01 9.998256e-01
13 1.022490e+00 1.021529e+00 1.014210e+00 9.896566e-01 9.668669e-01 9.898209e-01 1.011145e+00 9.991000e-01
14 9.383201e-01 9.923898e-01 1.030084e+00 1.009296e+00 9.748870e-01 9.858361e-01 1.005721e+00 9.971995e-01
15 9.387378e-01 9.986737e-01 1.032522e+00 9.967096e-01 9.675984e-01 9.877332e-01 1.003181e+00 9.974178e-01
16 9.377196e-01 9.853747e-01 1.043778e+00 1.003874e+00 9.786853e-01 9.873092e-01 1.003464e+00 9.958178e-01
17 9.293735e-01 1.034251e+00 1.038271e+00 1.003177e+00 9.700248e-01 9.915540e-01 9.899480e-01 9.984129e-01
18 9.081814e-01 9.992797e-01 1.018655e+00 9.982681e-01 9.627375e-01 9.752319e-01 9.739917e-01 9.297086e-01
19 9.471672e-01 9.763513e-01 1.026148e+00 1.013503e+00 9.656781e-01 9.868543e-01 9.891711e-01 9.992051e-01
20 9.376034e-01 1.008523e+00 1.015422e+00 9.988900e-01 9.763451e-01 9.917410e-01 1.016855e+00 9.974959e-01
21 9.649789e-01 9.941223e-01 1.023371e+00 9.720318e-01 9.427889e-01 9.864717e-01 1.011408e+00 1.001528e+00
22 8.085859e-01 1.003002e+00 1.024132e+00 1.015483e+00 9.586926e-01 9.888563e-01 9.829068e-01 9.982469e-01
23 9.843031e-01 1.009513e+00 1.041257e+00 1.012564e+00 1.009160e+00 9.949415e-01 9.970272e-01 9.964763e-01
24 9.408696e-01 9.847445e-01 1.030481e+00 9.726508e-01 9.691133e-01 9.975819e-01 1.022271e+00 1.000680e+00
0 L2L M2M P2M L2P M2L-out M2L P2P-out P2P
1 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2 9.565659e-01 9.665736e-01 1.031103e+00 1.004286e+00 9.715094e-01 9.208541e-01 9.697996e-01 9.763831e-01
3 9.263068e-01 1.024516e+00 1.029574e+00 9.889095e-01 9.937418e-01 9.954310e-01 1.001689e+00 1.000994e+00
4 1.005226e+00 1.006333e+00 1.033745e+00 1.010624e+00 9.534195e-01 9.864280e-01 9.895790e-01 9.995851e-01
5 8.615300e-01 9.844517e-01 9.938413e-01 1.009990e+00 9.569465e-01 9.791331e-01 9.887700e-01 9.975625e-01
6 8.535893e-01 9.410083e-01 1.014109e+00 1.018876e+00 9.739749e-01 9.860534e-01 9.782539e-01 9.964238e-01
7 1.046813e+00 9.975072e-01 1.037954e+00 1.003486e+00 9.786087e-01 9.933857e-01 1.004895e+00 9.965736e-01
8 9.995985e-01 1.013025e+00 9.895591e-01 1.013030e+00 9.652670e-01 9.907845e-01 1.000561e+00 9.971405e-01
9 1.039365e+00 1.013929e+00 1.047827e+00 9.852421e-01 9.711139e-01 9.898517e-01 9.980679e-01 9.993222e-01
10 9.181035e-01 9.952685e-01 1.031850e+00 1.012496e+00 9.670203e-01 9.852214e-01 9.859215e-01 9.985014e-01
11 8.717502e-01 9.889525e-01 1.028373e+00 1.011922e+00 9.699808e-01 9.888136e-01 9.826419e-01 9.981512e-01
12 9.452144e-01 1.040015e+00 1.013514e+00 9.762884e-01 9.389195e-01 9.915452e-01 9.996240e-01 9.998256e-01
13 1.022490e+00 1.021529e+00 1.014210e+00 9.896566e-01 9.668669e-01 9.898209e-01 1.011145e+00 9.991000e-01
14 9.383201e-01 9.923898e-01 1.030084e+00 1.009296e+00 9.748870e-01 9.858361e-01 1.005721e+00 9.971995e-01
15 9.387378e-01 9.986737e-01 1.032522e+00 9.967096e-01 9.675984e-01 9.877332e-01 1.003181e+00 9.974178e-01
16 9.377196e-01 9.853747e-01 1.043778e+00 1.003874e+00 9.786853e-01 9.873092e-01 1.003464e+00 9.958178e-01
17 9.293735e-01 1.034251e+00 1.038271e+00 1.003177e+00 9.700248e-01 9.915540e-01 9.899480e-01 9.984129e-01
18 9.081814e-01 9.992797e-01 1.018655e+00 9.982681e-01 9.627375e-01 9.752319e-01 9.739917e-01 9.297086e-01
19 9.471672e-01 9.763513e-01 1.026148e+00 1.013503e+00 9.656781e-01 9.868543e-01 9.891711e-01 9.992051e-01
20 9.376034e-01 1.008523e+00 1.015422e+00 9.988900e-01 9.763451e-01 9.917410e-01 1.016855e+00 9.974959e-01
21 9.649789e-01 9.941223e-01 1.023371e+00 9.720318e-01 9.427889e-01 9.864717e-01 1.011408e+00 1.001528e+00
22 8.085859e-01 1.003002e+00 1.024132e+00 1.015483e+00 9.586926e-01 9.888563e-01 9.829068e-01 9.982469e-01
23 9.843031e-01 1.009513e+00 1.041257e+00 1.012564e+00 1.009160e+00 9.949415e-01 9.970272e-01 9.964763e-01
24 9.408696e-01 9.847445e-01 1.030481e+00 9.726508e-01 9.691133e-01 9.975819e-01 1.022271e+00 1.000680e+00
......@@ -2,6 +2,7 @@
// ==== CMAKE =====
// @FUSE_BLAS
// @FUSE_FFT
// @FUSE_STARPU
// ================
// Keep in private GIT
......@@ -70,21 +71,11 @@ int main(int argc, char* argv[]){
typedef FP2PGroupParticleContainer<FReal> GroupContainerClass;
typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass;
#ifdef SCALFMM_USE_STARPU
typedef FStarPUAllCpuCapacities<FUnifKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper > GroupAlgorithm;
#elif defined(SCALFMM_USE_OMP4)
typedef FUnifKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
// Set the number of threads
omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass,
GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#else
typedef FUnifKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
//typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#endif
// Get params
const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment