diff --git a/CMakeLists.txt b/CMakeLists.txt index a91eb338de119d11a1251be5894ba8a742a688c0..9fb0b898f46e4470bb842336ad1572548ffe351d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,8 @@ cmake_minimum_required (VERSION 3.7) project (Tikki) + +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake) + include_directories ("includes") include_directories("poti/include" "poti/src") diff --git a/cmake/FindPAPI.cmake b/cmake/FindPAPI.cmake new file mode 100644 index 0000000000000000000000000000000000000000..86cbf22ae1f1c80cb4ba6db874a35cb5eb7bb2bc --- /dev/null +++ b/cmake/FindPAPI.cmake @@ -0,0 +1,45 @@ +# Try to find PAPI headers and libraries. +# +# Usage of this module as follows: +# +# find_package(PAPI) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# PAPI_PREFIX Set this variable to the root installation of +# libpapi if the module has problems finding the +# proper installation path. +# +# Variables defined by this module: +# +# PAPI_FOUND System has PAPI libraries and headers +# PAPI_LIBRARIES The PAPI library +# PAPI_INCLUDE_DIRS The location of PAPI headers + +find_path(PAPI_PREFIX + NAMES include/papi.h +) + +find_library(PAPI_LIBRARIES + # Pick the static library first for easier run-time linking. + NAMES libpapi.so libpapi.a papi + HINTS ${PAPI_PREFIX}/lib ${HILTIDEPS}/lib +) + +find_path(PAPI_INCLUDE_DIRS + NAMES papi.h + HINTS ${PAPI_PREFIX}/include ${HILTIDEPS}/include +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PAPI DEFAULT_MSG + PAPI_LIBRARIES + PAPI_INCLUDE_DIRS +) + +mark_as_advanced( + PAPI_PREFIX_DIRS + PAPI_LIBRARIES + PAPI_INCLUDE_DIRS +) diff --git a/config/git_hash.sh b/config/git_hash.sh index ea5d3ebbc482fc5b68cc959875ca8d519d19f2c0..7a1d43e2b21484effe46594b1f426a4471e7062c 100755 --- a/config/git_hash.sh +++ b/config/git_hash.sh @@ -1,2 +1,2 @@ #!/bin/bash -echo "#define GIT_HASH" \"`git describe --always --dirty=+ --tags --long --abbrev=16`\" > $1 +cd $1; echo "#define GIT_HASH" \"`git describe --always --dirty=+ --tags --long --abbrev=16`\" > $2 diff --git a/includes/kaapi_trace.h b/includes/kaapi_trace.h index 3f653e724cd5622924473e79de7cc6c6332b54c3..53051f2ee6b547fcd8cef16f8a68ee9fc55c3942 100644 --- a/includes/kaapi_trace.h +++ b/includes/kaapi_trace.h @@ -47,16 +47,11 @@ #include <stdint.h> #include <stddef.h> -#include "hw_count.h" #if defined(__cplusplus) extern "C" { #endif -#if !defined(KAAPI_MAX_HWCOUNTERS) -#define KAAPI_MAX_HWCOUNTERS 4 -#endif - #if !defined(KAAPI_CACHE_LINE) #define KAAPI_CACHE_LINE 64 #endif @@ -161,10 +156,6 @@ extern double kaapi_get_elapsedtime(void); #define KAAPI_PERF_GROUP_DFGBUILD 4 #define KAAPI_PERF_GROUP_OFFLOAD 5 -#if ((KAAPI_PERF_ID_ENDSOFTWARE+KAAPI_MAX_HWCOUNTERS) > KAAPI_PERF_ID_MAX) -#error "The maximal size of the peformance counters handled by Kaapi should be extended. Please contact the authors." -#endif - /* counter type */ @@ -241,8 +232,8 @@ typedef struct kaapi_named_perfctr { #define KAAPI_EVT_TASK_BEG 2 /* begin execution of tasks */ #define KAAPI_EVT_TASK_END 3 /* end execution of tasks, d0: task, d1: numaid */ #define KAAPI_EVT_TASK_SUCC 4 /* T0 has successor T1 */ -#define KAAPI_EVT_TASK_ACCESS 5 /* d0: task, d1: mode, d2: pointer */ -#define KAAPI_EVT_COMP_DAG 6 /* computing the dag, i0[0]=1 iff beg, =0 iff else. d1: key */ +#define KAAPI_EVT_TASK_ACCESS 5 /* d0: task, d1: mode, d2: pointer, d3: numaid */ +#define KAAPI_EVT_TASK_DATA 6 /* d0: task, d1: data, d2: size, d3.i32[0]: mode d3.i32[1]: numaid */ /*#define KAAPI_EVT_FREE0 7*/ #define KAAPI_EVT_UNDEFINED_0 8 #define KAAPI_EVT_UNDEFINED_1 9 @@ -258,19 +249,19 @@ typedef struct kaapi_named_perfctr { #define KAAPI_EVT_STEAL_OP 19 /* when k-processor emit a steal request data=victimid, serial*/ #define KAAPI_EVT_STEAL_AGGR_BEG 20 /* when begin to be a combiner */ #define KAAPI_EVT_STEAL_AGGR_END 21 /* when begin to be a combiner */ -#define KAAPI_EVT_OFFLOAD_HTOH_BEG 22 /* offload copy */ +#define KAAPI_EVT_OFFLOAD_HTOH_BEG 22 /* offload copy */ #define KAAPI_EVT_OFFLOAD_HTOH_END 23 -#define KAAPI_EVT_OFFLOAD_HTOD_BEG 24 /* offload copy */ +#define KAAPI_EVT_OFFLOAD_HTOD_BEG 24 /* offload copy */ #define KAAPI_EVT_OFFLOAD_HTOD_END 25 -#define KAAPI_EVT_OFFLOAD_DTOH_BEG 26 /* offload copy */ +#define KAAPI_EVT_OFFLOAD_DTOH_BEG 26 /* offload copy */ #define KAAPI_EVT_OFFLOAD_DTOH_END 27 -#define KAAPI_EVT_OFFLOAD_DTOD_BEG 28 /* offload copy */ +#define KAAPI_EVT_OFFLOAD_DTOD_BEG 28 /* offload copy */ #define KAAPI_EVT_OFFLOAD_DTOD_END 29 #define KAAPI_EVT_OFFLOAD_KERNEL_BEG 30 #define KAAPI_EVT_OFFLOAD_KERNEL_END 31 -#define KAAPI_EVT_PARALLEL 32 /* i0[] = 1 iff beg, = 0 iff end, d1: parallel_id */ +#define KAAPI_EVT_PARALLEL 32 /* i0[] = 1 iff beg, = 0 iff end, d1: parallel_id */ /*#define KAAPI_EVT_FREE0 33*/ -#define KAAPI_EVT_TASKWAIT 34 /* i0[] = 1 iff beg, = 0 iff end, d1: task_id */ +#define KAAPI_EVT_TASKWAIT 34 /* i0[] = 1 iff beg, = 0 iff end, d1: task_id */ /*#define KAAPI_EVT_FREE0 35*/ #define KAAPI_EVT_TASKGROUP 36 /*#define KAAPI_EVT_FREE0 37*/ @@ -319,9 +310,9 @@ typedef uint64_t kaapi_event_mask_type_t; ( KAAPI_EVT_MASK(KAAPI_EVT_TASK_BEG) \ | KAAPI_EVT_MASK(KAAPI_EVT_TASK_SUCC) \ | KAAPI_EVT_MASK(KAAPI_EVT_TASK_ACCESS) \ + | KAAPI_EVT_MASK(KAAPI_EVT_TASK_DATA) \ | KAAPI_EVT_MASK(KAAPI_EVT_TASK_END) \ | KAAPI_EVT_MASK(KAAPI_EVT_TASK_STEAL) \ - | KAAPI_EVT_MASK(KAAPI_EVT_COMP_DAG) \ | KAAPI_EVT_MASK(KAAPI_EVT_OFFLOAD_KERNEL_BEG) \ | KAAPI_EVT_MASK(KAAPI_EVT_OFFLOAD_KERNEL_END) \ ) @@ -337,7 +328,6 @@ typedef uint64_t kaapi_event_mask_type_t; | KAAPI_EVT_MASK(KAAPI_EVT_LOOP_NEXT) \ | KAAPI_EVT_MASK(KAAPI_EVT_LOOP_MDATA) \ | KAAPI_EVT_MASK(KAAPI_EVT_TASK_ATTR) \ - | KAAPI_EVT_MASK(KAAPI_EVT_COMP_DAG) \ ) #define KAAPI_EVT_MASK_SCHED \ @@ -547,13 +537,12 @@ typedef enum kaapi_access_mode_t { KAAPI_ACCESS_MODE_S = 16, /* 0001 0000 : stack data */ KAAPI_ACCESS_MODE_T = 32, /* 0010 0000 : for Quark support: scratch mode or temporary */ KAAPI_ACCESS_MODE_P = 64, /* 0100 0000 : */ - KAAPI_ACCESS_MODE_IP = 128, /* 1000 0000 : in place, for CW only */ + KAAPI_ACCESS_MODE_C = 128, /* 1000 0000 : commute only - for CW only - */ KAAPI_ACCESS_MODE_RW = KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W, KAAPI_ACCESS_MODE_STACK = KAAPI_ACCESS_MODE_S|KAAPI_ACCESS_MODE_RW, KAAPI_ACCESS_MODE_SCRATCH = KAAPI_ACCESS_MODE_T|KAAPI_ACCESS_MODE_V, KAAPI_ACCESS_MODE_CWP = KAAPI_ACCESS_MODE_P|KAAPI_ACCESS_MODE_CW, - KAAPI_ACCESS_MODE_ICW = KAAPI_ACCESS_MODE_IP|KAAPI_ACCESS_MODE_CW } kaapi_access_mode_t; #define KAAPI_ACCESS_MASK_RIGHT_MODE 0x7F /* 5 bits, ie bit 0, 1, 2, 3, 4, including P mode */ @@ -890,6 +879,19 @@ extern void kaapi_tracelib_task_access( ); +/* +*/ +extern void kaapi_tracelib_task_data( + kaapi_tracelib_thread_t* kproc, + kaapi_task_id_t task, + int count, + void** data, + size_t* size, + int* mode, + void (*ompt_mode_decoder)(int,int*) +); + + /* Thread's synchronization related functions */ extern void kaapi_tracelib_barrier_begin( diff --git a/scripts/create_gantt.R b/scripts/create_gantt.R new file mode 100644 index 0000000000000000000000000000000000000000..4ffa9f010673cbb646c4a52afb7e95d0fb0f52b4 --- /dev/null +++ b/scripts/create_gantt.R @@ -0,0 +1,47 @@ +library(dplyr); +library(ggplot2); + +readtrace <- function (filename) +{ + df <- read.csv(filename, header=TRUE, sep=",", strip.white=TRUE); + df <- df %>% filter((Explicit==1)) %>% as.data.frame(); + df$Start <- df$Start*1e-9; # Convert ns to second + df$End <- df$End*1e-9; + df$Duration <- df$Duration*1e-9; + df; +} + +args <- commandArgs(trailingOnly=TRUE) + + +df <- readtrace(args[1]); + + +# helper: convert s to the date +date<-function(d) { as.POSIXct(d, origin="1970-01-01"); } + +# theplot +theplot = ggplot() + + theme_bw(base_size=16) + + xlab("Time [s]") + + ylab("Thread Identification") + + scale_fill_brewer(palette = "Set1") + + theme ( + legend.spacing = unit(.1, "line"), + panel.grid.major = element_blank(), + panel.spacing=unit(0, "cm"), + panel.grid=element_line(size=0), + legend.position = "bottom", + legend.title = element_text("Helvetica") + ) + + guides(fill = guide_legend(nrow = 1)) + + geom_rect(data=df, alpha=1, aes(fill=Name, + xmin=date(Start), + xmax=date(End), + ymin=Resource, + ymax=Resource+0.9)) + + scale_y_reverse(); + pdf("gantt.pdf", width=10, height=6) + print(theplot) + dev.off() + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 84989b7412b1b87affc3520d271ba9edc02213c2..2d5f30616453f2a88e9e7b10acfda7b464b1c2c5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,3 +4,4 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") add_executable(hello hello.c) add_executable(hello-task hello-task.c) +target_link_libraries(hello-task m) diff --git a/test/hello-task.c b/test/hello-task.c index 3ff5a30bece771085ef65ecf992cecc1efd9b5e6..b18738e0fd7f3ec5aaf7f34c9945347201009ced 100644 --- a/test/hello-task.c +++ b/test/hello-task.c @@ -1,6 +1,7 @@ #include <stdio.h> #include <unistd.h> #include <omp.h> +#include <math.h> int array[] = { 1, 2, 3, 4}; @@ -18,7 +19,10 @@ int main() for (int i = 0; i < 4; i++) { #pragma omp task depend(in: array[i]) depend(inout: array[(i+1)%4]) { + double d; array[(i+1)%4] = array[i]; + for (int j=0; j<100000; ++j) + d += sin(M_PI/j)*cos(M_PI/i); printf("Hey %i\n", i); } } diff --git a/tool/CMakeLists.txt b/tool/CMakeLists.txt index e42dac0e27c166fcd7abbb7c2a605808e715e913..2db4edd2ea890a884015992cc419ca19c2351bdf 100644 --- a/tool/CMakeLists.txt +++ b/tool/CMakeLists.txt @@ -1,4 +1,5 @@ -find_file(OMPT_HEADER NAMES omp-tools.h) +unset(OMPT_HEADER CACHE) +find_file(OMPT_HEADER NAMES omp-tools.h HINTS ENV CPLUS_INCLUDE_PATH ENV C_INCLUDE_PATH) if (${OMPT_HEADER} STREQUAL "OMPT_HEADER-NOTFOUND") message(FATAL_ERROR "The OpenMP's OMPT header (omp-tools.h) was not found. Please check your OpenMP runtime installation.") endif() @@ -8,13 +9,8 @@ set(SOURCES ompt_callbacks.cpp ) -add_custom_command ( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/hw_count.h - COMMAND ${SHELL_EXECUTABLE} ${CMAKE_SOURCE_DIR}/config/hw_count.sh ${CMAKE_CURRENT_BINARY_DIR}/hw_count.h - ) - include_directories( ${CMAKE_CURRENT_BINARY_DIR} ) -add_library(tikki SHARED ${SOURCES} ${CMAKE_CURRENT_BINARY_DIR}/hw_count.h) +add_library(tikki SHARED ${SOURCES} ) target_link_libraries (tikki tracelib) install(TARGETS tikki DESTINATION lib) diff --git a/tool/ompt_callbacks.cpp b/tool/ompt_callbacks.cpp index f73787d78bcd66c279d8340a032f9de1edf2bb48..82c7018261ae466fbc1402370694aaefeba956cc 100644 --- a/tool/ompt_callbacks.cpp +++ b/tool/ompt_callbacks.cpp @@ -73,19 +73,25 @@ #define LOG 0 -#if 0 -static ompt_get_task_id_t ompt_get_task_id; -static ompt_get_thread_id_t ompt_get_thread_id; -static ompt_get_parallel_id_t ompt_get_parallel_id; -#endif - std::atomic<uint64_t> unique_thread_id(1); +std::atomic<uint64_t> unique_parallel_id(1); + +#define MAX_PARAM 16 +typedef struct { + int count; + void* data[MAX_PARAM]; + size_t size[MAX_PARAM]; + int mode[MAX_PARAM]; +} task_data_info_t; + typedef struct tikki_task_id_s { uint64_t id; const void *task_ptr; ompt_dependence_t *deps; int ndeps; + const char* name; + task_data_info_t datainfo; } tikki_task_id_t; @@ -176,6 +182,49 @@ return_fast: return output; } + +/* +*/ +static void ompt_mode_decoder( ompt_dependence_type_t odt, int* mode ) +{ + *mode = KAAPI_ACCESS_MODE_VOID; + switch (odt) + { + case ompt_dependence_type_in: + { + *mode =KAAPI_ACCESS_MODE_R; + break; + } + case ompt_dependence_type_out: + { + *mode =KAAPI_ACCESS_MODE_W; + break; + } + case ompt_dependence_type_inout: + { + *mode =KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W; + break; + } + case ompt_dependence_type_mutexinoutset: + { + *mode =KAAPI_ACCESS_MODE_C|KAAPI_ACCESS_MODE_CW; + break; + } + default: + { + static int alreadydisplay = 0; + if (alreadydisplay ==0) + { + printf("*** OpenMP dependence type: %i not implemented\n", odt ); + alreadydisplay = 1; + } + } + } +} + + +/* +*/ static void ompt_decoder( ompt_dependence_t* dep, int i, void** addr, size_t* len, int* mode /*, size_t* len */ ) { *addr = dep[i].variable.ptr; @@ -190,6 +239,7 @@ static void ompt_decoder( ompt_dependence_t* dep, int i, void** addr, size_t* le *mode =KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W; //if (dep[i].dependence_flags.commute) //*mode |=KAAPI_ACCESS_MODE_CW; + //ompt_mode_decoder(dep[i].dependence_type, mode ); } @@ -288,7 +338,8 @@ void ompt_callback_parallel_begin_action ( ) { ompt_data_t *thread_data = ompt_get_thread_data(); - parallel_data->value = ompt_get_unique_id(); + //parallel_data->value = ompt_get_unique_id(); + parallel_data->value = unique_parallel_id++; uint64_t thread_id = thread_data->value; tikki_task_id_t *task = (tikki_task_id_t *)encountering_task_data->ptr; @@ -364,6 +415,33 @@ ompt_callback_parallel_end_action( } +/* +*/ +__thread const char* next_name = 0; +extern "C" +void tikki_ompt_set_task_name(const char* name ) +{ + next_name = name; +} + + +/* +*/ +__thread task_data_info_t next_data_info = { 0 }; + +extern "C" +void tikki_ompt_set_task_data(int count, void** data, size_t* size, int* mode ) +{ +#if LOG +printf("In %s: count: %i, data[0]:%p, size[0]: %li, mode[0]: %i\n", __func__, + count, data[0], size[0], mode[0] ); +#endif + next_data_info.count = count; + memcpy( &next_data_info.data, data, sizeof(void*)*count ); + memcpy( &next_data_info.size, size, sizeof(size_t)*count ); + memcpy( &next_data_info.mode, mode, sizeof(int)*count ); +} + void ompt_callback_task_create_action( ompt_data_t *parent_task_data, /* id of parent task */ @@ -379,11 +457,14 @@ ompt_callback_task_create_action( task->id = ompt_get_unique_id(); task->task_ptr = codeptr_ra; task->ndeps = 0; + task->name = next_name; + task->datainfo = next_data_info; + next_name = 0; #if LOG if (parent_task_data) { tikki_task_id_t *parent_task = (tikki_task_id_t*)parent_task_data->ptr; printf("%" PRIu64 ": ompt_task_create: parent_id=%" PRIu64 ", task_id=%" PRIu64\ - ", type=%i, has_dep=%i, ptr=%" PRIu64 "\n", thread_id, + ", type=%i, has_dep=%i, ptr=%p\n", thread_id, parent_task->id, task->id, type, has_dependences, task->task_ptr); } else { @@ -432,24 +513,32 @@ ompt_callback_task_schedule_action( kaapi_tracelib_thread_switchstate(koti->kproc); //free(prior_task); } - if (next_task_data->ptr) { + if (next_task_data->ptr) +{ tikki_task_id_t *new_task = (tikki_task_id_t *)next_task_data->ptr; // We are starting a task #if LOG printf("%" PRIu64 ": ompt_event_task_begin: task_id=%" PRIu64 ", status=%i, deps: %p\n", thread_id, new_task->id, prior_task_status, new_task->deps); #endif - /* This is code for implicit task begin. + /* This is code for explicit task begin. */ kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; kaapi_tracelib_thread_switchstate(koti->kproc); char buff[30]; - sprintf(buff, "<undef-%p-%i>\0", new_task->task_ptr, new_task->id); + const char* taskname = 0; + if (new_task->name == 0) + { + sprintf(buff, "<undef-%p-%i>\0", new_task->task_ptr, new_task->id); + taskname = buff; + } + else + taskname = new_task->name; kaapi_descrformat_t* fdescr = kaapi_tracelib_register_fmtdescr( 0, // TODO: get codeptr_ra there (void *)new_task->task_ptr, 0, //loc - buff, + taskname, libomp_filter_func ); int idxtop = koti->pstack.top; @@ -478,6 +567,19 @@ ompt_callback_task_schedule_action( (void (*)(void*, int, void**, size_t*, int*))ompt_decoder ); } + if (new_task->datainfo.count) + { + kaapi_tracelib_task_data( + koti->kproc, + (kaapi_task_id_t)new_task->id, + new_task->datainfo.count, + new_task->datainfo.data, + new_task->datainfo.size, + new_task->datainfo.mode, + (void (*)(int, int*))ompt_mode_decoder + ); + new_task->datainfo.count = 0; + } } } @@ -501,13 +603,15 @@ ompt_callback_implicit_task_action ( #if LOG printf("%" PRIu64 ": ompt_event_implicit_task_action: begin. parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", thread_id, parallel_data->value, task->id); #endif + char buff[30]; + sprintf(buff, "<implicit>") ; kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; kaapi_descrformat_t* fdescr = kaapi_tracelib_register_fmtdescr( 1, //team->key, [> same key as the team. Not implicit task ? why ? [TG] <] (void *)task->id, //team->name, - "<implicit>", + buff, 0, 0 /* no filter: team name should be already well formed */ ); @@ -580,19 +684,46 @@ ompt_callback_implicit_task_action ( #endif } + + void ompt_callback_dependences_action ( ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps - ) +) { tikki_task_id_t *task = (tikki_task_id_t *)task_data->ptr; uint64_t thread_id = ompt_get_thread_data()->value; task->deps = (ompt_dependence_t *)malloc(ndeps * sizeof(ompt_dependence_t)); #if LOG - printf("%" PRIu64 ": ompt_event_task_dependences: tsak_id=%" PRIu64 ", #deps=%i, deps: %p\n", thread_id, task->id, ndeps, task->deps); + printf("%" PRIu64 ": ompt_event_dependences: tsak_id=%" PRIu64 ", #deps=%i, deps: %p\n", thread_id, task->id, ndeps, task->deps); #endif task->ndeps = ndeps; memcpy(task->deps, deps, ndeps*sizeof(ompt_dependence_t)); } + + + +void +ompt_callback_task_dependence_action ( + ompt_data_t *src_task_data, + ompt_data_t *sink_task_data +) +{ + tikki_task_id_t *src_task = (tikki_task_id_t *)src_task_data->ptr; + tikki_task_id_t *dest_task = (tikki_task_id_t *)sink_task_data->ptr; + uint64_t thread_id = ompt_get_thread_data()->value; + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + + kaapi_tracelib_task_depend( + koti->kproc, + (kaapi_task_id_t)src_task->id, + (kaapi_task_id_t)dest_task->id + ); + +#if LOG + printf("%" PRIu64 ": ompt_event_task_dependence: tsak_id=%" PRIu64 ", sink: %" PRIu64 "\n", thread_id, src_task->id, dest_task->id); +#endif +} + diff --git a/tool/ompt_callbacks.def b/tool/ompt_callbacks.def index 99a78d7a494d157f3b74a858a8d438dda3255457..e4b2b7113131204f354e3567a50ee060491680eb 100644 --- a/tool/ompt_callbacks.def +++ b/tool/ompt_callbacks.def @@ -13,6 +13,7 @@ CALLBACK(ompt_callback_task_create, ompt_data_t*, const ompt_frame_t*, ompt_data CALLBACK(ompt_callback_task_schedule, ompt_data_t *, ompt_task_status_t, ompt_data_t *) CALLBACK(ompt_callback_implicit_task, ompt_scope_endpoint_t, ompt_data_t *, ompt_data_t*, unsigned int, unsigned int, int) CALLBACK(ompt_callback_dependences, ompt_data_t *, const ompt_dependence_t *, int) +CALLBACK(ompt_callback_task_dependence, ompt_data_t *, ompt_data_t * ) UNUSED(ompt_callback_target) UNUSED(ompt_callback_target_data_op) UNUSED(ompt_callback_target_submit) @@ -23,8 +24,6 @@ UNUSED(ompt_callback_device_load) UNUSED(ompt_callback_device_unload) UNUSED(ompt_callback_sync_region_wait) UNUSED(ompt_callback_mutex_released) -UNUSED(ompt_callback_dependences) -UNUSED(ompt_callback_task_dependence) UNUSED(ompt_callback_work) UNUSED(ompt_callback_master) UNUSED(ompt_callback_target_map) diff --git a/tool/tool.cpp b/tool/tool.cpp index 6a17401bd6c26f5836f26bc1b46ea10734777201..1f68149ae26dc2080f3985c58e18fc8f711eda21 100644 --- a/tool/tool.cpp +++ b/tool/tool.cpp @@ -18,19 +18,47 @@ ompt_get_unique_id_t ompt_get_unique_id; #include <atomic> std::atomic<uint64_t> myuid(0); -void initTool(ompt_function_lookup_t lookup) { - printf("init tool\n"); + + +extern "C" { + +/* Internal function +*/ +extern void tikki_ompt_set_task_name(const char* name ); +extern void tikki_ompt_set_task_data(int count, void** data, size_t* size, int* mode ); + +void __tikki_ompt_set_task_name(const char* name ) +{ tikki_ompt_set_task_name(name); } +void __tikki_ompt_set_task_data(int count, void** data, size_t* size, int* mode ) +{ tikki_ompt_set_task_data(count, data, size, mode); } + +/* Exported function as extension to be called by application +*/ +extern void ompt_set_task_name(const char* name ) +{ + tikki_ompt_set_task_name(name); +} + +//__attribute__ ((weak, alias ("__tikki_ompt_set_task_name"))); +extern void ompt_set_task_data(int count, void** data, size_t* size, int* mode ) +{ + tikki_ompt_set_task_data(count, data, size, mode); +} +//__attribute__ ((weak, alias ("__tikki_ompt_set_task_data"))); +} // extern "Cc + + + +void initTool(ompt_function_lookup_t lookup) +{ int err = 0; __kaapi_oth_info = (kaapi_ompt_thread_info_t*)calloc(__kaapi_oth_info_capacity, sizeof(kaapi_ompt_thread_info_t)); - err = kaapi_tracelib_init( - getpid() - ); - + err = kaapi_tracelib_init( getpid() ); if (err !=0) - printf("[OMP-TRACE] kaapi tracing, init error:%i, version: %s\n", err, get_kaapi_version()); + printf("[OMP-TRACE] TiKKi tracing, init error:%i, version: %s\n", err, get_kaapi_version()); else - printf("[OMP-TRACE] kaapi tracing version: %s\n",get_kaapi_version()); - printf("oth_info %p\n", __kaapi_oth_info); + printf("[OMP-TRACE] TiKKi tracing version: '%s'\n",get_kaapi_version()); + //printf("oth_info %p\n", __kaapi_oth_info); ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); @@ -39,6 +67,9 @@ void initTool(ompt_function_lookup_t lookup) { return ++myuid; }; + //ompt_set_task_name = tikki_ompt_set_task_name; + //ompt_set_task_data = tikki_ompt_set_task_data; + #define CALLBACK(name, ...) \ do{ \ if (ompt_set_callback(name, (ompt_callback_t)name##_action) == \ @@ -63,14 +94,14 @@ extern "C" { { kaapi_tracelib_fini(); free(__kaapi_oth_info); - fprintf(stderr, "Exiting Tikki tool\n"); + fprintf(stderr, "[OMP-TRACE] Exiting Tikki tool\n"); } ompt_start_tool_result_t* ompt_start_tool( unsigned int omp_version, const char *runtime_version) { - fprintf(stderr, "Loading Tikki tool\n"); + fprintf(stderr, "[OMP-TRACE] Loading TiKKi tool\n"); static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize,0}; return &ompt_start_tool_result; } diff --git a/tracelib/CMakeLists.txt b/tracelib/CMakeLists.txt index e91301da00c3cafe27a5a81739062e4536d9b235..cb32ab232f5755f63215c33edf7521a6ebe25cca 100644 --- a/tracelib/CMakeLists.txt +++ b/tracelib/CMakeLists.txt @@ -1,6 +1,6 @@ add_custom_command ( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/git_hash.h - COMMAND ${SHELL_EXECUTABLE} ${CMAKE_SOURCE_DIR}/config/git_hash.sh ${CMAKE_CURRENT_BINARY_DIR}/git_hash.h + COMMAND ${SHELL_EXECUTABLE} ${CMAKE_SOURCE_DIR}/config/git_hash.sh ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/git_hash.h ) add_custom_command ( @@ -11,8 +11,21 @@ add_custom_command ( set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -include_directories( ${CMAKE_CURRENT_BINARY_DIR} ) +find_package(PAPI) +if (${PAPI_FOUND}) + add_compile_options("-DKAAPI_USE_PAPI=1") +else() + add_compile_options("-DKAAPI_USE_PAPI=0") +endif() + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) add_library(tracelib SHARED kaapi_recorder.c kaapi_rt.c kaapi_trace_lib.c kaapi_hashmap.c kaapi_trace_rt.c kaapi_parser.c ${CMAKE_CURRENT_BINARY_DIR}/git_hash.h ${CMAKE_CURRENT_BINARY_DIR}/hw_count.h) target_link_libraries(tracelib Threads::Threads) + +if (${PAPI_FOUND}) + include_directories(${PAPI_INCLUDE_DIRS}) + target_link_libraries(tracelib ${PAPI_LIBRARIES}) +endif() + install(TARGETS tracelib DESTINATION lib) diff --git a/tracelib/kaapi_recorder.c b/tracelib/kaapi_recorder.c index dd91fd3dda955394be7bb98ddd2039d272431c75..8a9008de5f6b453ea3f42df66809da07830fda66 100644 --- a/tracelib/kaapi_recorder.c +++ b/tracelib/kaapi_recorder.c @@ -203,14 +203,24 @@ static int _kaapi_write_header( int kid ) for (i=0; i<kaapi_tracelib_param.fmt_listsize; ++i) { const kaapi_descrformat_t* fmt = kaapi_tracelib_param.fmt_list[i]; + // TODO ? + if (fmt ==0) continue; + if (header.taskfmt_count >= KAAPI_FORMAT_MAX){ + fprintf(stderr, "Warning: too many fmtdefs\n"); + break; + } kaapi_fmttrace_def* fmtdef = &header.fmtdefs[header.taskfmt_count]; fmtdef->fmtid = fmt->fmtid; - if (fmt->name !=0) - strncpy( fmtdef->name, fmt->name, 64); + if (fmt->name !=0){ + strncpy( fmtdef->name, fmt->name, 63); + fmtdef->name[63] = 0; + } else strncpy( fmtdef->name, "no name", 64); - if (fmt->color !=0) - strncpy( fmtdef->color, fmt->color, 32); + if (fmt->color !=0){ + strncpy( fmtdef->color, fmt->color, 31); + fmtdef->color[31] = 0; + } else strncpy( fmtdef->color, "0.0 0.0 1.0", 32); ++header.taskfmt_count; diff --git a/tracelib/kaapi_trace_lib.c b/tracelib/kaapi_trace_lib.c index 2d7f1bbbb66b5f3b68d45a5d62bf372394eba395..5082d5058b3ee0e3c36fb0e130daa7767478547d 100644 --- a/tracelib/kaapi_trace_lib.c +++ b/tracelib/kaapi_trace_lib.c @@ -108,6 +108,11 @@ typedef struct timespec struct_time; #include "kaapi_util.h" #include "kaapi_atomic.h" +#if ((KAAPI_PERF_ID_ENDSOFTWARE+KAAPI_MAX_HWCOUNTERS) > KAAPI_PERF_ID_MAX) +#error "The maximal size of the peformance counters handled by Kaapi should be extended. Please contact the authors." +#endif + + #if defined(__cplusplus) extern "C" { #endif @@ -117,6 +122,15 @@ extern "C" { */ +/* ------------------------------------------------------------------------------------------- */ +/* + Initialization +*/ +/* +1 each time kaapi_trace_init is called. The first call initialize the library +*/ +static int once_init = 0; + + /* ------------------------------------------------------------------------------------------- */ /* Global Variable @@ -559,10 +573,7 @@ int kaapi_tracelib_init( int gid ) { - static int once = 0; - if (once) return 0; - once = 1; - + if (++once_init >1) return 0; int i, error; /* Update counters: undefined code */ @@ -750,15 +761,15 @@ int kaapi_tracelib_init( } + /* ------------------------------------------------------------------------------------------- */ /** Finish trace. Assume that threads have reach the barrier and flush their event buffers. */ +static int once_fini = 0; void kaapi_tracelib_fini(void) { - static int once = 0; - if (once) return; - once = 1; + if (++once_fini < once_init) return; FILE *file = 0; char buffer[8192]; @@ -778,6 +789,7 @@ void kaapi_tracelib_fini(void) #endif /* Display stat per task */ +#if 0 // TG: 20-05-05 disable this feature if (kaapi_tracelib_param.display_perfcounter != KAAPI_NO_DISPLAY_PERF) { sprintf(filename, "stat.%i", getpid()); @@ -789,6 +801,7 @@ void kaapi_tracelib_fini(void) 1e-9*((double)kaapi_get_elapsedns_since_start()) ); } +#endif if (file !=0) { @@ -1072,7 +1085,7 @@ kaapi_tracelib_thread_t* kaapi_tracelib_thread_init( { if (kaapi_perf_idset_test( &ctxt->perfset, i)) { -printf("Attach event: '%s'\n", kaapi_perfctr_info[i].name); +//printf("Attach event: '%s'\n", kaapi_perfctr_info[i].name); papi_event_codes[count++] = kaapi_perfctr_info[i].eventcode; } } @@ -1671,7 +1684,7 @@ static void __kaapi_dump_access( #else unsigned int numaid = 0; #endif - KAAPI_EVENT_PUSH4(kproc, 0, KAAPI_EVT_TASK_ACCESS, task, mode, addr, numaid ); + KAAPI_EVENT_PUSH4(kproc, 0, KAAPI_EVT_TASK_ACCESS, task, mode, addr, numaid); /* how to count remote access if numa information not available ? */ if (numaid == (unsigned int)-1) return; @@ -1728,6 +1741,87 @@ void kaapi_tracelib_task_access( __kaapi_dump_access(kproc, local_numaid, task, count_noalias, deps_noalias, decoder); } + + +static void __kaapi_dump_data( + kaapi_tracelib_thread_t* kproc, + int local_numaid, + kaapi_task_id_t task, + int count, + void** data, + size_t* size, + int* mode, + void (*decoder)(int, int*) +) +{ + for (int i=0; i<count; ++i) + { + int m = KAAPI_ACCESS_MODE_VOID; + void* addr = data[i]; + size_t sz = size[i]; + decoder( mode[i], &m); + if (m & KAAPI_ACCESS_MODE_V) + continue; + +#if defined(__linux__) && KAAPI_USE_NUMA + unsigned int numaid = kaapi_numa_getpage_id( addr ); +#else + unsigned int numaid = 0; +#endif + kaapi_event_t* evt = KAAPI_EVENT_GET(kproc, 0, KAAPI_EVT_TASK_DATA ); + if (evt) + { + evt->u.s.d0.p = task; + evt->u.s.d1.p = addr; + evt->u.s.d2.i = sz; + evt->u.s.d3.i32[0] = m; + evt->u.s.d3.i32[1] = numaid; + KAAPI_EVENT_PUSH(kproc, 0, KAAPI_EVT_TASK_DATA); + } + } +} + + +/* +*/ +void kaapi_tracelib_task_data( + kaapi_tracelib_thread_t* kproc, + kaapi_task_id_t task, + int count, + void** data, + size_t* size, + int* mode, + void (*decoder)(int,int*) +) +{ + if (!(kproc->event_mask & KAAPI_EVT_MASK(KAAPI_EVT_TASK_DATA))) + return; + + +#if defined(__linux__) + int localcpu = sched_getcpu(); +#if KAAPI_USE_NUMA + int local_numaid = numa_node_of_cpu(localcpu); +#else + int local_numaid = 0; +#endif +#else + int local_numaid = 0; +#endif +#if 0 +printf("In %s: count:%i, data[0]:%p, size[0], %li, mode[0]: %i\n", + __func__, + count, + data[0], + size[0], + mode[0] +); +#endif + __kaapi_dump_data(kproc, local_numaid, task, count, data, size, mode, decoder); +} + + + /* */ void kaapi_tracelib_taskwait_begin( @@ -1827,6 +1921,7 @@ kaapi_descrformat_t* kaapi_tracelib_reserve_perfcounter(void) retval->perfctr = perf; kaapi_tracelib_param.fmt_list[kaapi_tracelib_param.fmt_listsize] = retval; ++kaapi_tracelib_param.fmt_listsize; +//printf("%s\n", __func__ ); kaapi_assert(retval != 0); return retval; @@ -2011,6 +2106,7 @@ static int kaapi_get_events( return -1; +#if KAAPI_USE_PAPI /* Register PAPI counter to be at KAAPI_PERF_ID_PAPI_BASE+cnt in kaapi_perfctr_info */ if (type == KAAPI_PCTR_PAPI) @@ -2047,8 +2143,9 @@ static int kaapi_get_events( } break; } - } - else if (type == KAAPI_PCTR_LIBRARY) + } else +#endif + if (type == KAAPI_PCTR_LIBRARY) { if (event_code <KAAPI_PERF_ID_MAX) { diff --git a/tracelib/kaapi_trace_rt.c b/tracelib/kaapi_trace_rt.c index 790532cd1f26f31bdfc481d3c8913244e48b698f..dc79ecfc45478a92dc868144ec5f7e67f1393966 100644 --- a/tracelib/kaapi_trace_rt.c +++ b/tracelib/kaapi_trace_rt.c @@ -59,8 +59,8 @@ const char* kaapi_event_name[] /* 3 */ "TaskEnd", /* 4 */ "Dependency", /* 5 */ "Access", -/* 6 */ "DagCompBegin", -/* 7 */ "DagCompEnd", +/* 6 */ "Data", +/* 7 */ 0, /* 8 */ 0, /* 9 */ 0, /*10 */ "IdleBeg", diff --git a/ukilli/CMakeLists.txt b/ukilli/CMakeLists.txt index 283965bffd8faba5b5d3c4606008b7853b6664ea..aa06d436af22563aec2312b43ff36f6a71cce90f 100644 --- a/ukilli/CMakeLists.txt +++ b/ukilli/CMakeLists.txt @@ -8,13 +8,8 @@ set(SOURCES ${CMAKE_SOURCE_DIR}/poti/src/poti_header.c ) -add_custom_command ( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/hw_count.h - COMMAND ${SHELL_EXECUTABLE} ${CMAKE_SOURCE_DIR}/config/hw_count.sh ${CMAKE_CURRENT_BINARY_DIR}/hw_count.h - ) - include_directories( ${CMAKE_CURRENT_BINARY_DIR} ) -add_executable(ukilli ${SOURCES} ${CMAKE_CURRENT_BINARY_DIR}/hw_count.h) +add_executable(ukilli ${SOURCES}) target_link_libraries (ukilli tracelib) install(TARGETS ukilli DESTINATION bin) diff --git a/ukilli/kaapi_trace_simulator.cpp b/ukilli/kaapi_trace_simulator.cpp index b7c9563eb681e556ed0118a2af35eff321e74f26..8773a5848c608d3ba845ebcef9b11d64895cddd5 100644 --- a/ukilli/kaapi_trace_simulator.cpp +++ b/ukilli/kaapi_trace_simulator.cpp @@ -632,6 +632,9 @@ static void processor_simulate_event( case KAAPI_EVT_TASK_ACCESS: break; + case KAAPI_EVT_TASK_DATA: + break; + case KAAPI_EVT_TASK_SUCC: break; @@ -724,9 +727,6 @@ static void processor_simulate_event( ++proc->active; break; - case KAAPI_EVT_COMP_DAG: - break; - /* processing request */ case KAAPI_EVT_REQUEST_BEG: break; diff --git a/ukilli/ukilli.cpp b/ukilli/ukilli.cpp index dfbb08a89beb1a3597e0965a33e422116d38ce29..b57d3cc467fbf62e26e664dae160b2e92a4576a0 100644 --- a/ukilli/ukilli.cpp +++ b/ukilli/ukilli.cpp @@ -322,10 +322,13 @@ static void callback_print_event( << ", numa: " << KAAPI_EVENT_DATA(event,3,i); break; - /* unroll graph for static schedule */ - case KAAPI_EVT_COMP_DAG: - std::cout << (KAAPI_EVENT_DATA(event,0,i) == 1 ? "BEGIN":"END") - << ", key:" << KAAPI_EVENT_DATA(event,1,u); + case KAAPI_EVT_TASK_DATA: + std::cout << " ----------------- @:" << KAAPI_EVENT_DATA(event,0,p) + << " data@:" << KAAPI_EVENT_DATA(event,1,p) + << " size:" << KAAPI_EVENT_DATA(event,2,i) + << " m:" << (KAAPI_EVENT_DATA(event,3,i32)[0]) + << "(" << kaapi_getmodename((kaapi_access_mode_t)(KAAPI_EVENT_DATA(event,3,i32)[0])) << ")" + << ", numa: " << KAAPI_EVENT_DATA(event,3,i32[1]); break; /* idle = steal state */ @@ -1029,14 +1032,6 @@ static void callback_display_paje_event( /* can we add time step value in the gantt ? */ break; - /* unroll graph for static schedule */ - case KAAPI_EVT_COMP_DAG: - if (KAAPI_EVENT_DATA(event,0,i) == 1) - kaapi_trace_poti_PushState(d0, name, "STATE", "db"); - else - kaapi_trace_poti_PopState (d0, name, "STATE"); - break; - case KAAPI_EVT_PARALLEL: if (KAAPI_EVENT_DATA(event,0,i) == 1) kaapi_trace_poti_PushState(d0, name, "STATE", "pi"); @@ -1295,11 +1290,15 @@ static int fnc_paje_gantt_close(kaapi_eventfile_header_t* header, uint64_t gantt */ struct access_t { access_t(kaapi_access_mode_t m, uint64_t i, uint64_t p, int nu) - : mode(m), idx(i), ptr(p), numaid(nu) + : mode(m), idx(i), ptr(p), size((uint64_t)-1), numaid(nu) + {} + access_t(kaapi_access_mode_t m, uint64_t i, uint64_t p, uint64_t s, int nu) + : mode(m), idx(i), ptr(p), size(s), numaid(nu) {} kaapi_access_mode_t mode; uint64_t idx; uint64_t ptr; + uint64_t size; int numaid; }; @@ -1330,7 +1329,7 @@ struct state_t : public event_t { /* */ struct task_info : public state_t { task_info( ) - : state_t(), ct(0), kid(0), fmtid(0), param(), perfctr(), pred(), succ() + : state_t(), ct(0), kid(0), fmtid(0), param(), data(), perfctr(), pred(), succ() { static uint64_t cnt_task = 0; keys[0] = keys[1] = 0; @@ -1348,6 +1347,7 @@ struct task_info : public state_t { uint64_t aff_tag; uint64_t keys[2]; std::vector<access_t> param; + std::vector<access_t> data; std::vector<perfctr_t> perfctr; std::vector<task_info*>pred; std::vector<task_info*>succ; @@ -1776,7 +1776,7 @@ static void callback_display_rastello( break; } -//printf("%" PRIu64 " Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u)); +//printf("%" PRIu64 " Implicit Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u)); std::map<uint64_t,kproc_t>::iterator kp = parallel_region_t::container_kproc.find( event->kid ); if (kp == parallel_region_t::container_kproc.end()) { @@ -1846,6 +1846,39 @@ printf("%i:: task[id:%i/%p] = {mode:%c}/%p\n", kid, task->id, (void*)task->addr, } break; + case KAAPI_EVT_TASK_DATA: + { + uint64_t ptask = KAAPI_EVENT_DATA(event,0,u); + uint64_t paddr = KAAPI_EVENT_DATA(event,1,u); + size_t size = KAAPI_EVENT_DATA(event,2,u); + int mode = KAAPI_EVENT_DATA(event,3,i32)[0]; + int numaid = (int)KAAPI_EVENT_DATA(event,3,i32)[1]; + + uint64_t idx; + if (rpr->container_implicit_task.find( ptask) != rpr->container_implicit_task.end()) + break; + task_info* task = rpr->get_taskinfo( ptask, false ); + if (task ==0) + { + printf("*** Event task_access does not correspond to event task_begin: task:%p\n", (void*)ptask ); + break; + } + + std::map<uint64_t,data_t>::iterator data = rpr->container_data.find( paddr ); + if (data == rpr->container_data.end()) + { + idx = rpr->cnt_iddata; + ++rpr->cnt_iddata; + rpr->container_data.insert( std::make_pair( paddr, data_t(idx, numaid) ) ); + } + else + idx = (int)data->second.cnt_iddata; + + task->data.push_back( access_t((kaapi_access_mode_t)mode, idx, paddr, size, numaid) ); + } + break; + + case KAAPI_EVT_TASK_PERFCOUNTER: { uint64_t addr = KAAPI_EVENT_DATA(event,0,u); @@ -1877,16 +1910,13 @@ printf("%i:: task[id:%i/%p] = {mode:%c}/%p\n", kid, task->id, (void*)task->addr, case KAAPI_EVT_TASK_SUCC: { - task_info* task = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,0,u), false ); - if (task != 0)/* should be top stack task */ - { - task_info* succ = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,1,u), true ); + task_info* task = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,0,u), true ); + task_info* succ = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,1,u), true ); #if DEBUG_RAST printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, (void*)task->addr); #endif - succ->pred.push_back( task ); - task->succ.push_back( succ ); - } + succ->pred.push_back( task ); + task->succ.push_back( succ ); } break; @@ -1917,7 +1947,11 @@ printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, ( { task->stop = event->date; task->numaid = (uint32_t)KAAPI_EVENT_DATA(event,1,i); +#if DEBUG_RAST + printf("%i:: task end:id:%i/%p\n", kid, task->id, (void*)task->addr); +#endif } + /* should be top ? */ if (kp == parallel_region_t::container_kproc.end()) printf("***[%d] Unkown thread kid: %i\n", __LINE__, event->kid); /* @@ -1997,10 +2031,6 @@ printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, ( case KAAPI_EVT_PERFCOUNTER: break; - /* unroll graph for static schedule */ - case KAAPI_EVT_COMP_DAG: - break; - case KAAPI_EVT_TASKWAIT : case KAAPI_EVT_TASKGROUP: break; @@ -2105,7 +2135,7 @@ printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, ( } break; default: - printf("***Unknown event number: %i\n", event->evtno); + printf("***(Rast)Unknown event number: %i\n", event->evtno); break; } } @@ -2277,6 +2307,8 @@ int dot_parallel_region_t::openfile(kaapi_eventfile_header_t* header) return 0; } + +/* */ int dot_parallel_region_t::closefile(int cpucount) { if (fout == 0) return 1; @@ -2840,22 +2872,6 @@ void csv_parallel_region_t::dump_task_info( task_info* ti ) ti->perfctr[i].value ); } } -#if 0 /* not yet in for csv */ - for (size_t i=0; i< ti->param.size(); ++i) - switch (ti->param[i].mode) - { - case KAAPI_ACCESS_MODE_R: - fprintf(fout,"\taction_add_read(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx); - break; - case KAAPI_ACCESS_MODE_W: - fprintf(fout,"\taction_add_write(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx); - break; - case KAAPI_ACCESS_MODE_RW: - fprintf(fout,"\taction_add_readwrite(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx); - break; - default: break; - } -#endif fprintf(fout, "\n" ); } @@ -2943,6 +2959,235 @@ static void fnc_csv( int count, const char** filenames ) } +/* ============================= SOMP OUTPUT */ +/* SOMP: trace format +*/ +struct somp_parallel_region_t : public parallel_region_t { + somp_parallel_region_t(uintptr_t pid) + : parallel_region_t(pid) + {} + + int openfile(kaapi_eventfile_header_t* header); + int closefile(int cpucount); + void dump_task_info( task_info* ti ); +private: +}; + +class somp_parallel_region_factory_t { +public: + virtual parallel_region_t* create(uintptr_t); +}; +somp_parallel_region_factory_t somp_pr_factory; + +parallel_region_t* somp_parallel_region_factory_t::create(uintptr_t prid) +{ + return new somp_parallel_region_t(prid); +} + + +int somp_parallel_region_t::openfile(kaapi_eventfile_header_t* header) +{ + FILE* file = 0; + sprintf(filename, "trace_%i.rec", (int)parallel_id); + fout = file = fopen(filename,"w"); + if (file ==0) + { + fprintf(stderr,"*** Cannot open file '%s'\n",filename); + exit(-1); + } + + for (int cnt=0; cnt<header->taskfmt_count; ++cnt) + if (header->fmtdefs[cnt].fmtid !=0) + { + rastello_fmtname.insert( std::make_pair(header->fmtdefs[cnt].fmtid, header->fmtdefs[cnt].name) ); +// fprintf(stdout, "insert fmtid: %" PRIu64 " -> name: %s\n", header->fmtdefs[cnt].fmtid, header->fmtdefs[cnt].name); + } + + return 0; +} + +/* */ +int somp_parallel_region_t::closefile(int cpucount) +{ + if (fout == 0) return 1; + std::list<task_info*>::iterator ibeg = container_orderedlist.begin(); + std::list<task_info*>::iterator iend = container_orderedlist.end(); + /* reset container_data used in dump_task_info to attach version number to data */ + container_data.clear(); + while (ibeg != iend) + { + dump_task_info(*ibeg); + ++ibeg; + } + + fprintf(stdout,"*** File '%s' generated\n", filename); + fclose(fout); + fout = 0; + + //rastello_fmtname.clear(); + container_data.clear(); + return 0; +} + + +/* Kind of format + Name*: dgemm + JobId: 11 + SubmitOrder: 3 + SubmitTime: 0.1 + StartTime*: 0.2 + EndTime*: 0.3 + MemoryNode*: 0 + Handles*: 2df4ce5 3dve4a4 1dvgf5d + Modes*: R R RW + Sizes: 2000x2000x8 2000x2000x8 2000x2000x8 + MData: <list of addresses> + MSize: <list of size (Bytes)> + MMode: <list of modes (R|W|RW) only> + DependsOn*: 5 +*/ +void somp_parallel_region_t::dump_task_info( task_info* ti ) +{ + /* task */ + std::map<uint64_t, char*>::iterator fmt =rastello_fmtname.find(ti->fmtid); + + if (fmt == rastello_fmtname.end()) + fprintf(stdout, "cannot find fmtid: %" PRIu64 "\n", ti->fmtid); + + fprintf(fout,"Name: %s\n" + "JobId: %li\n" + "StartTime: %f\n" + "EndTime: %f\n" + "MemoryNode: %li\n", + (fmt == rastello_fmtname.end() ? "<unknown>" : fmt->second ), + ti->id, + (double)ti->start*1e-6 /* ns -> millis */, + (double)ti->stop*1e-6 /* ns -> millis */, + ti->numaid + ); + + /* handles */ + fprintf(fout,"Handles:"); + for (int i=0; i<ti->param.size(); ++i) + { + uint64_t ptr = ti->param[i].ptr; + fprintf(fout," %p", ptr); + } + fprintf(fout,"\n"); + + /* Modes */ + fprintf(fout,"Modes:"); + for (int i=0; i<ti->param.size(); ++i) + { + kaapi_access_mode_t mode = ti->param[i].mode; + if (KAAPI_ACCESS_IS_READWRITE(mode)) + fprintf(fout," RW"); + else if (KAAPI_ACCESS_IS_READ(mode)) + fprintf(fout," R"); + else if (KAAPI_ACCESS_IS_WRITE(mode)) + fprintf(fout," W"); + else if (mode == (KAAPI_ACCESS_MODE_C|KAAPI_ACCESS_MODE_CW)) + fprintf(fout," MX"); + else if (mode & KAAPI_ACCESS_MODE_SCRATCH) + fprintf(fout," S"); + } + fprintf(fout,"\n"); + + /* Modes */ + fprintf(fout,"Sizes:"); + fprintf(fout,"\n"); + + /* MData */ + if (ti->data.size() >0) + { + fprintf(fout,"MData:"); + for (int i=0; i<ti->data.size(); ++i) + { + uint64_t ptr = ti->data[i].ptr; + fprintf(fout," %p", ptr); + } + fprintf(fout,"\n"); + + /* MSize */ + fprintf(fout,"MSize:"); + for (int i=0; i<ti->data.size(); ++i) + { + uint64_t ptr = ti->data[i].size; + fprintf(fout," %li", ptr); + } + fprintf(fout,"\n"); + + /* MMode */ + fprintf(fout,"MMode:"); + for (int i=0; i<ti->data.size(); ++i) + { + kaapi_access_mode_t mode = ti->data[i].mode; + if (KAAPI_ACCESS_IS_READWRITE(mode)) + fprintf(fout," RW"); + else if (KAAPI_ACCESS_IS_READ(mode)) + fprintf(fout," R"); + else if (KAAPI_ACCESS_IS_WRITE(mode)) + fprintf(fout," W"); + else if (mode & KAAPI_ACCESS_MODE_SCRATCH) + fprintf(fout," S"); + else + fprintf(stderr," Invalid mode "); + } + fprintf(fout,"\n"); + } + + /* DependsOn */ + fprintf(fout,"DependsOn:"); + for (int i=0; i<ti->pred.size(); ++i) + { + fprintf(fout," %li", ti->pred[i]->id); + } + fprintf(fout,"\n\n"); +} + + + +/* reuse rastello data structure +*/ +static void fnc_somp( int count, const char** filenames ) +{ + rastello_parallel_region.reserve(128); + rastello_front_parallel_region.reserve(256); + rastello_fs = OpenFiles( count, filenames ); + if (rastello_fs ==0) + return; + if (GetHeader(rastello_fs, &rastello_header) !=0) + return; + + /* generate dot graph: one per parallel + region if katracereader_options.dotoption & DOT_OPTIONS_CREGION + */ + ReadFiles(rastello_fs, &somp_pr_factory, callback_display_rastello ); + + for (size_t i = 0; i<rastello_parallel_region.size(); ++i) + { + somp_parallel_region_t* dotpr = (somp_parallel_region_t*)rastello_parallel_region[i]; + if (dotpr ==0) continue; + if (dotpr->openfile( &rastello_header) !=0) + { + exit(1); + } + if (dotpr->closefile(rastello_parallel_region[i]->nproc) !=0) + { + exit(1); + } + } + +// if (katracereader_options.dotoption & DOT_OPTIONS_CREGION ) +// { +// rastello_parallel_region[0]->closefile( GetProcessorCount(rastello_fs) ); +// } + + /* close & umap */ + CloseFiles(rastello_fs); +} + + /* */ @@ -2967,8 +3212,9 @@ static void print_usage(const char* msg = 0) fprintf(stderr, " --dot-nolabel : do not output label.\n"); fprintf(stderr, " --dot-cregion : output graph accross parallel regions.\n"); // fprintf(stderr, " --dot-nodata : do not output data node.\n"); - fprintf(stderr, " -r | --rastello : output Rastello format compatible with CORSE team simulator.\n"); - fprintf(stderr, " Output filename is rastello_<n>.c, one per parallel region.\n"); + fprintf(stderr, " -s | --somp : output file with SOMP trace format .\n"); +// fprintf(stderr, " -r | --rastello : output Rastello format compatible with CORSE team simulator.\n"); +// fprintf(stderr, " Output filename is rastello_<n>.c, one per parallel region.\n"); // fprintf(stderr, " --steal-event : include steal events in trace.\n"); // fprintf(stderr, " --gpu-trace : include GPU trace information.\n"); // fprintf(stderr, " --gpu-transfer : include GPU transfers.\n"); @@ -3005,6 +3251,8 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou option = 'a'; else if ((strcmp(argv[i], "--display-header") ==0)||(strcmp(argv[i], "-e") ==0)) option = 'h'; + else if ((strcmp(argv[i], "--somp") ==0) || (strcmp(argv[i], "-s") ==0)) + option = 'o'; else if ((strcmp(argv[i], "--rastello") ==0) || (strcmp(argv[i], "-r") ==0)) option = 'r'; else if ((strcmp(argv[i], "--csv") ==0) || (strcmp(argv[i], "-c") ==0)) @@ -3123,6 +3371,9 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou case 'd': return fnc_dot; + case 'o': + return fnc_somp; + case 'H': default: print_usage();