diff --git a/includes/kaapi_trace.h b/includes/kaapi_trace.h index 256013240807f7a48d44f6146e6ba9b7b627ecad..bfe968a528e2ee1eae9fa3921016ba7a15ff4a3d 100644 --- a/includes/kaapi_trace.h +++ b/includes/kaapi_trace.h @@ -538,13 +538,12 @@ typedef enum kaapi_access_mode_t { KAAPI_ACCESS_MODE_S = 16, /* 0001 0000 : stack data */ KAAPI_ACCESS_MODE_T = 32, /* 0010 0000 : for Quark support: scratch mode or temporary */ KAAPI_ACCESS_MODE_P = 64, /* 0100 0000 : */ - KAAPI_ACCESS_MODE_IP = 128, /* 1000 0000 : in place, for CW only */ + KAAPI_ACCESS_MODE_C = 128, /* 1000 0000 : commute only - for CW only - */ KAAPI_ACCESS_MODE_RW = KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W, KAAPI_ACCESS_MODE_STACK = KAAPI_ACCESS_MODE_S|KAAPI_ACCESS_MODE_RW, KAAPI_ACCESS_MODE_SCRATCH = KAAPI_ACCESS_MODE_T|KAAPI_ACCESS_MODE_V, KAAPI_ACCESS_MODE_CWP = KAAPI_ACCESS_MODE_P|KAAPI_ACCESS_MODE_CW, - KAAPI_ACCESS_MODE_ICW = KAAPI_ACCESS_MODE_IP|KAAPI_ACCESS_MODE_CW } kaapi_access_mode_t; #define KAAPI_ACCESS_MASK_RIGHT_MODE 0x7F /* 5 bits, ie bit 0, 1, 2, 3, 4, including P mode */ diff --git a/tool/ompt_callbacks.cpp b/tool/ompt_callbacks.cpp index de3c5079da48fa1e8b591369e2c2a43145406e99..6fbcdc28965a81dbb14b68c0db9cf45c01cc2043 100644 --- a/tool/ompt_callbacks.cpp +++ b/tool/ompt_callbacks.cpp @@ -182,14 +182,38 @@ static void ompt_decoder( ompt_dependence_t* dep, int i, void** addr, size_t* le //*len = dep[i].variable_len; *len = 1; *mode = KAAPI_ACCESS_MODE_VOID; - if (dep[i].dependence_type & ompt_dependence_type_in) - *mode |=KAAPI_ACCESS_MODE_R; - if (dep[i].dependence_type & ompt_dependence_type_out) - *mode |=KAAPI_ACCESS_MODE_W; - if (dep[i].dependence_type & ompt_dependence_type_inout) - *mode =KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W; - //if (dep[i].dependence_flags.commute) - //*mode |=KAAPI_ACCESS_MODE_CW; + switch (dep[i].dependence_type) + { + case ompt_dependence_type_in: + { + *mode =KAAPI_ACCESS_MODE_R; + break; + } + case ompt_dependence_type_out: + { + *mode =KAAPI_ACCESS_MODE_W; + break; + } + case ompt_dependence_type_inout: + { + *mode =KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W; + break; + } + case ompt_dependence_type_mutexinoutset: + { + *mode =KAAPI_ACCESS_MODE_C|KAAPI_ACCESS_MODE_CW; + break; + } + default: + { + static int alreadydisplay = 0; + if (alreadydisplay ==0) + { + printf("*** OpenMP pependence type: %i not implemented\n", dep[i].dependence_type); + alreadydisplay = 1; + } + } + } } @@ -383,7 +407,7 @@ ompt_callback_task_create_action( if (parent_task_data) { tikki_task_id_t *parent_task = (tikki_task_id_t*)parent_task_data->ptr; printf("%" PRIu64 ": ompt_task_create: parent_id=%" PRIu64 ", task_id=%" PRIu64\ - ", type=%i, has_dep=%i, ptr=%" PRIu64 "\n", thread_id, + ", type=%i, has_dep=%i, ptr=%p\n", thread_id, parent_task->id, task->id, type, has_dependences, task->task_ptr); } else { @@ -432,7 +456,8 @@ ompt_callback_task_schedule_action( kaapi_tracelib_thread_switchstate(koti->kproc); //free(prior_task); } - if (next_task_data->ptr) { + if (next_task_data->ptr) +{ tikki_task_id_t *new_task = (tikki_task_id_t *)next_task_data->ptr; // We are starting a task #if LOG @@ -585,14 +610,38 @@ ompt_callback_dependences_action ( ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps - ) +) { tikki_task_id_t *task = (tikki_task_id_t *)task_data->ptr; uint64_t thread_id = ompt_get_thread_data()->value; task->deps = (ompt_dependence_t *)malloc(ndeps * sizeof(ompt_dependence_t)); #if LOG - printf("%" PRIu64 ": ompt_event_task_dependences: tsak_id=%" PRIu64 ", #deps=%i, deps: %p\n", thread_id, task->id, ndeps, task->deps); + printf("%" PRIu64 ": ompt_event_dependences: tsak_id=%" PRIu64 ", #deps=%i, deps: %p\n", thread_id, task->id, ndeps, task->deps); #endif task->ndeps = ndeps; memcpy(task->deps, deps, ndeps*sizeof(ompt_dependence_t)); } + + +void +ompt_callback_task_dependence_action ( + ompt_data_t *src_task_data, + ompt_data_t *sink_task_data +) +{ + tikki_task_id_t *src_task = (tikki_task_id_t *)src_task_data->ptr; + tikki_task_id_t *dest_task = (tikki_task_id_t *)sink_task_data->ptr; + uint64_t thread_id = ompt_get_thread_data()->value; + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + + kaapi_tracelib_task_depend( + koti->kproc, + (kaapi_task_id_t)src_task->id, + (kaapi_task_id_t)dest_task->id + ); + +#if LOG + printf("%" PRIu64 ": ompt_event_task_dependence: tsak_id=%" PRIu64 ", sink: %" PRIu64 "\n", thread_id, src_task->id, dest_task->id); +#endif +} + diff --git a/tool/ompt_callbacks.def b/tool/ompt_callbacks.def index 99a78d7a494d157f3b74a858a8d438dda3255457..e4b2b7113131204f354e3567a50ee060491680eb 100644 --- a/tool/ompt_callbacks.def +++ b/tool/ompt_callbacks.def @@ -13,6 +13,7 @@ CALLBACK(ompt_callback_task_create, ompt_data_t*, const ompt_frame_t*, ompt_data CALLBACK(ompt_callback_task_schedule, ompt_data_t *, ompt_task_status_t, ompt_data_t *) CALLBACK(ompt_callback_implicit_task, ompt_scope_endpoint_t, ompt_data_t *, ompt_data_t*, unsigned int, unsigned int, int) CALLBACK(ompt_callback_dependences, ompt_data_t *, const ompt_dependence_t *, int) +CALLBACK(ompt_callback_task_dependence, ompt_data_t *, ompt_data_t * ) UNUSED(ompt_callback_target) UNUSED(ompt_callback_target_data_op) UNUSED(ompt_callback_target_submit) @@ -23,8 +24,6 @@ UNUSED(ompt_callback_device_load) UNUSED(ompt_callback_device_unload) UNUSED(ompt_callback_sync_region_wait) UNUSED(ompt_callback_mutex_released) -UNUSED(ompt_callback_dependences) -UNUSED(ompt_callback_task_dependence) UNUSED(ompt_callback_work) UNUSED(ompt_callback_master) UNUSED(ompt_callback_target_map) diff --git a/tracelib/kaapi_recorder.c b/tracelib/kaapi_recorder.c index dd91fd3dda955394be7bb98ddd2039d272431c75..15beccbbf584698522ae698345a907fbde4b9241 100644 --- a/tracelib/kaapi_recorder.c +++ b/tracelib/kaapi_recorder.c @@ -203,6 +203,8 @@ static int _kaapi_write_header( int kid ) for (i=0; i<kaapi_tracelib_param.fmt_listsize; ++i) { const kaapi_descrformat_t* fmt = kaapi_tracelib_param.fmt_list[i]; + // TODO ? + if (fmt ==0) continue; kaapi_fmttrace_def* fmtdef = &header.fmtdefs[header.taskfmt_count]; fmtdef->fmtid = fmt->fmtid; if (fmt->name !=0) diff --git a/tracelib/kaapi_trace_lib.c b/tracelib/kaapi_trace_lib.c index e552489bd528424f6ca642938b58b644dc6454b2..fb71672079749c5781f5f7d758821fcee50b0583 100644 --- a/tracelib/kaapi_trace_lib.c +++ b/tracelib/kaapi_trace_lib.c @@ -783,6 +783,7 @@ void kaapi_tracelib_fini(void) #endif /* Display stat per task */ +#if 0 // TG: 20-05-05 disable this feature if (kaapi_tracelib_param.display_perfcounter != KAAPI_NO_DISPLAY_PERF) { sprintf(filename, "stat.%i", getpid()); @@ -794,6 +795,7 @@ void kaapi_tracelib_fini(void) 1e-9*((double)kaapi_get_elapsedns_since_start()) ); } +#endif if (file !=0) { @@ -1832,6 +1834,7 @@ kaapi_descrformat_t* kaapi_tracelib_reserve_perfcounter(void) retval->perfctr = perf; kaapi_tracelib_param.fmt_list[kaapi_tracelib_param.fmt_listsize] = retval; ++kaapi_tracelib_param.fmt_listsize; +//printf("%s\n", __func__ ); kaapi_assert(retval != 0); return retval; diff --git a/ukilli/ukilli.cpp b/ukilli/ukilli.cpp index dfbb08a89beb1a3597e0965a33e422116d38ce29..88e5a1f5c459164700500d4cda8be58ba1d359c1 100644 --- a/ukilli/ukilli.cpp +++ b/ukilli/ukilli.cpp @@ -1776,7 +1776,7 @@ static void callback_display_rastello( break; } -//printf("%" PRIu64 " Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u)); +//printf("%" PRIu64 " Implicit Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u)); std::map<uint64_t,kproc_t>::iterator kp = parallel_region_t::container_kproc.find( event->kid ); if (kp == parallel_region_t::container_kproc.end()) { @@ -1877,16 +1877,13 @@ printf("%i:: task[id:%i/%p] = {mode:%c}/%p\n", kid, task->id, (void*)task->addr, case KAAPI_EVT_TASK_SUCC: { - task_info* task = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,0,u), false ); - if (task != 0)/* should be top stack task */ - { - task_info* succ = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,1,u), true ); + task_info* task = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,0,u), true ); + task_info* succ = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,1,u), true ); #if DEBUG_RAST printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, (void*)task->addr); #endif - succ->pred.push_back( task ); - task->succ.push_back( succ ); - } + succ->pred.push_back( task ); + task->succ.push_back( succ ); } break; @@ -1917,7 +1914,11 @@ printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, ( { task->stop = event->date; task->numaid = (uint32_t)KAAPI_EVENT_DATA(event,1,i); +#if DEBUG_RAST + printf("%i:: task end:id:%i/%p\n", kid, task->id, (void*)task->addr); +#endif } + /* should be top ? */ if (kp == parallel_region_t::container_kproc.end()) printf("***[%d] Unkown thread kid: %i\n", __LINE__, event->kid); /* @@ -2277,6 +2278,8 @@ int dot_parallel_region_t::openfile(kaapi_eventfile_header_t* header) return 0; } + +/* */ int dot_parallel_region_t::closefile(int cpucount) { if (fout == 0) return 1; @@ -2840,22 +2843,6 @@ void csv_parallel_region_t::dump_task_info( task_info* ti ) ti->perfctr[i].value ); } } -#if 0 /* not yet in for csv */ - for (size_t i=0; i< ti->param.size(); ++i) - switch (ti->param[i].mode) - { - case KAAPI_ACCESS_MODE_R: - fprintf(fout,"\taction_add_read(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx); - break; - case KAAPI_ACCESS_MODE_W: - fprintf(fout,"\taction_add_write(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx); - break; - case KAAPI_ACCESS_MODE_RW: - fprintf(fout,"\taction_add_readwrite(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx); - break; - default: break; - } -#endif fprintf(fout, "\n" ); } @@ -2943,6 +2930,194 @@ static void fnc_csv( int count, const char** filenames ) } +/* ============================= SOMP OUTPUT */ +/* SOMP: trace format +*/ +struct somp_parallel_region_t : public parallel_region_t { + somp_parallel_region_t(uintptr_t pid) + : parallel_region_t(pid) + {} + + int openfile(kaapi_eventfile_header_t* header); + int closefile(int cpucount); + void dump_task_info( task_info* ti ); +private: +}; + +class somp_parallel_region_factory_t { +public: + virtual parallel_region_t* create(uintptr_t); +}; +somp_parallel_region_factory_t somp_pr_factory; + +parallel_region_t* somp_parallel_region_factory_t::create(uintptr_t prid) +{ + return new somp_parallel_region_t(prid); +} + + +int somp_parallel_region_t::openfile(kaapi_eventfile_header_t* header) +{ + FILE* file = 0; + sprintf(filename, "trace_%i.rec", (int)parallel_id); + fout = file = fopen(filename,"w"); + if (file ==0) + { + fprintf(stderr,"*** Cannot open file '%s'\n",filename); + exit(-1); + } + + for (int cnt=0; cnt<header->taskfmt_count; ++cnt) + if (header->fmtdefs[cnt].fmtid !=0) + { + rastello_fmtname.insert( std::make_pair(header->fmtdefs[cnt].fmtid, header->fmtdefs[cnt].name) ); +// fprintf(stdout, "insert fmtid: %" PRIu64 " -> name: %s\n", header->fmtdefs[cnt].fmtid, header->fmtdefs[cnt].name); + } + + return 0; +} + +/* */ +int somp_parallel_region_t::closefile(int cpucount) +{ + if (fout == 0) return 1; + std::list<task_info*>::iterator ibeg = container_orderedlist.begin(); + std::list<task_info*>::iterator iend = container_orderedlist.end(); + /* reset container_data used in dump_task_info to attach version number to data */ + container_data.clear(); + while (ibeg != iend) + { + dump_task_info(*ibeg); + ++ibeg; + } + + fprintf(stdout,"*** File '%s' generated\n", filename); + fclose(fout); + fout = 0; + + //rastello_fmtname.clear(); + container_data.clear(); + return 0; +} + + +/* Kind of format + Name*: dgemm + JobId: 11 + SubmitOrder: 3 + SubmitTime: 0.1 + StartTime*: 0.2 + EndTime*: 0.3 + MemoryNode*: 0 + Handles*: 2df4ce5 3dve4a4 1dvgf5d + Modes*: R R RW + Sizes: 2000x2000x8 2000x2000x8 2000x2000x8 + DependsOn*: 5 +*/ +void somp_parallel_region_t::dump_task_info( task_info* ti ) +{ + /* task */ + std::map<uint64_t, char*>::iterator fmt =rastello_fmtname.find(ti->fmtid); + + if (fmt == rastello_fmtname.end()) + fprintf(stdout, "cannot find fmtid: %" PRIu64 "\n", ti->fmtid); + + fprintf(fout,"Name: %s\n" + "JobId: %li\n" + "StartTime: %f\n" + "EndTime: %f\n" + "MemoryNode: %li\n", + (fmt == rastello_fmtname.end() ? "<unknown>" : fmt->second ), + ti->id, + (double)ti->start*1e-6 /* ns -> millis */, + (double)ti->stop*1e-6 /* ns -> millis */, + ti->numaid + ); + + /* handles */ + fprintf(fout,"Handles:"); + for (int i=0; i<ti->param.size(); ++i) + { + uint64_t ptr = ti->param[i].ptr; + fprintf(fout," %p", ptr); + } + fprintf(fout,"\n"); + + /* Modes */ + fprintf(fout,"Modes:"); + for (int i=0; i<ti->param.size(); ++i) + { + kaapi_access_mode_t mode = ti->param[i].mode; + if (KAAPI_ACCESS_IS_READWRITE(mode)) + fprintf(fout," RW"); + else if (KAAPI_ACCESS_IS_READ(mode)) + fprintf(fout," R"); + else if (KAAPI_ACCESS_IS_WRITE(mode)) + fprintf(fout," W"); + else if (mode == (KAAPI_ACCESS_MODE_C|KAAPI_ACCESS_MODE_CW)) + fprintf(fout," MX"); + else if (mode & KAAPI_ACCESS_MODE_SCRATCH) + fprintf(fout," S"); + } + fprintf(fout,"\n"); + + /* Modes */ + fprintf(fout,"Sizes:"); + fprintf(fout,"\n"); + + + /* DependsOn */ + fprintf(fout,"DependsOn:"); + for (int i=0; i<ti->pred.size(); ++i) + { + fprintf(fout," %li", ti->pred[i]->id); + } + fprintf(fout,"\n\n"); +} + + + +/* reuse rastello data structure +*/ +static void fnc_somp( int count, const char** filenames ) +{ + rastello_parallel_region.reserve(128); + rastello_front_parallel_region.reserve(256); + rastello_fs = OpenFiles( count, filenames ); + if (rastello_fs ==0) + return; + if (GetHeader(rastello_fs, &rastello_header) !=0) + return; + + /* generate dot graph: one per parallel + region if katracereader_options.dotoption & DOT_OPTIONS_CREGION + */ + ReadFiles(rastello_fs, &somp_pr_factory, callback_display_rastello ); + + for (size_t i = 0; i<rastello_parallel_region.size(); ++i) + { + somp_parallel_region_t* dotpr = (somp_parallel_region_t*)rastello_parallel_region[i]; + if (dotpr ==0) continue; + if (dotpr->openfile( &rastello_header) !=0) + { + exit(1); + } + if (dotpr->closefile(rastello_parallel_region[i]->nproc) !=0) + { + exit(1); + } + } + +// if (katracereader_options.dotoption & DOT_OPTIONS_CREGION ) +// { +// rastello_parallel_region[0]->closefile( GetProcessorCount(rastello_fs) ); +// } + + /* close & umap */ + CloseFiles(rastello_fs); +} + + /* */ @@ -2967,8 +3142,9 @@ static void print_usage(const char* msg = 0) fprintf(stderr, " --dot-nolabel : do not output label.\n"); fprintf(stderr, " --dot-cregion : output graph accross parallel regions.\n"); // fprintf(stderr, " --dot-nodata : do not output data node.\n"); - fprintf(stderr, " -r | --rastello : output Rastello format compatible with CORSE team simulator.\n"); - fprintf(stderr, " Output filename is rastello_<n>.c, one per parallel region.\n"); + fprintf(stderr, " -s | --somp : output file with SOMP trace format .\n"); +// fprintf(stderr, " -r | --rastello : output Rastello format compatible with CORSE team simulator.\n"); +// fprintf(stderr, " Output filename is rastello_<n>.c, one per parallel region.\n"); // fprintf(stderr, " --steal-event : include steal events in trace.\n"); // fprintf(stderr, " --gpu-trace : include GPU trace information.\n"); // fprintf(stderr, " --gpu-transfer : include GPU transfers.\n"); @@ -3005,6 +3181,8 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou option = 'a'; else if ((strcmp(argv[i], "--display-header") ==0)||(strcmp(argv[i], "-e") ==0)) option = 'h'; + else if ((strcmp(argv[i], "--somp") ==0) || (strcmp(argv[i], "-s") ==0)) + option = 'o'; else if ((strcmp(argv[i], "--rastello") ==0) || (strcmp(argv[i], "-r") ==0)) option = 'r'; else if ((strcmp(argv[i], "--csv") ==0) || (strcmp(argv[i], "-c") ==0)) @@ -3123,6 +3301,9 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou case 'd': return fnc_dot; + case 'o': + return fnc_somp; + case 'H': default: print_usage();