Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 533a6ec3 authored by Hakan Acundas's avatar Hakan Acundas
Browse files

update on perfevent module

parent 23a8ee70
Branches monitoring
No related tags found
No related merge requests found
......@@ -90,7 +90,7 @@ static void infocbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, vo
DEBUG_WAKEUP_THREAD(lock);
}
int main(int argc, char **argv)
int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_value_t *val = NULL;
......@@ -126,7 +126,6 @@ int main(int argc, char **argv)
myproc.rank);
goto done;
}
/* job-related info is found in our nspace, assigned to the
* wildcard rank as it doesn't relate to a specific rank. Setup
* a name to retrieve such values */
......@@ -186,19 +185,15 @@ int main(int argc, char **argv)
PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_PERFEVENT_TIME, &n, PMIX_UINT32);
n = 2; // two metrics can be missed before declaring us "stalled"
PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_PERFEVENT_DROPS, &n, PMIX_UINT32);
/* make the request */
DEBUG_CONSTRUCT_LOCK(&mylock);
for (int i = 0; i < 4; i++)
{
if (PMIX_SUCCESS
!= (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_PERFEVENT_ALERT, info, 4, infocbfunc,
(void *) &mylock))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace,
myproc.rank, rc);
DEBUG_DESTRUCT_LOCK(&mylock);
goto done;
}
if (PMIX_SUCCESS
!= (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_PERFEVENT_ALERT, info, 4, infocbfunc,
(void *) &mylock))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace,
myproc.rank, rc);
DEBUG_DESTRUCT_LOCK(&mylock);
goto done;
}
DEBUG_WAIT_THREAD(&mylock);
......
#
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
#
# Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pmixdata_DATA = help-pmix-psensor-perfevent.txt
sources = \
psensor_perfevent.c \
psensor_perfevent.h \
psensor_perfevent_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_pmix_psensor_perfevent_DSO
component_noinst =
component_install = pmix_mca_psensor_perfevent.la
else
component_noinst = libpmix_mca_psensor_perfevent.la
component_install =
endif
mcacomponentdir = $(pmixlibdir)
mcacomponent_LTLIBRARIES = $(component_install)
pmix_mca_psensor_perfevent_la_SOURCES = $(sources)
pmix_mca_psensor_perfevent_la_LDFLAGS = -module -avoid-version
if NEED_LIBPMIX
pmix_mca_psensor_perfevent_la_LIBADD = $(top_builddir)/src/libpmix.la
endif
noinst_LTLIBRARIES = $(component_noinst)
libpmix_mca_psensor_perfevent_la_SOURCES =$(sources)
libpmix_mca_psensor_perfevent_la_LDFLAGS = -module -avoid-version
# -*- text -*-
#
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
#
# Copyright (c) 2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for the memory usage sensor
#
[mem-limit-exceeded]
A process has exceeded the specified limit on memory usage:
Node: %s
Process rank: %s
Memory used: %luGbytes
Memory limit: %luGbytes
......@@ -102,10 +102,17 @@ typedef struct {
size_t ninfo;
bool stopped;
// PerfEvent metrics
uint64_t cpu_usage;
uint64_t memory_usage;
// perfevent metrics
uint64_t cycles;
uint64_t instructions;
uint64_t l1_misses;
uint64_t llc_misses;
uint64_t branch_misses;
uint64_t task_clock;
uint64_t scale;
uint64_t ipc;
uint64_t cpus;
uint64_t ghz;
} pmix_perfevent_trkr_t;
static void ft_constructor(pmix_perfevent_trkr_t *pt)
......@@ -123,8 +130,18 @@ static void ft_constructor(pmix_perfevent_trkr_t *pt)
pt->info = NULL;
pt->ninfo = 0;
pt->stopped = false;
pt->cpu_usage = 0;
pt->memory_usage = 0;
// perfevent metrics
pt->cycles = 0;
pt->instructions = 0;
pt->l1_misses = 0;
pt->llc_misses = 0;
pt->branch_misses = 0;
pt->task_clock = 0;
pt->scale = 0;
pt->ipc = 0;
pt->cpus = 0;
pt->ghz = 0;
}
static void ft_destructor(pmix_perfevent_trkr_t *pt)
{
......@@ -186,7 +203,7 @@ PMIX_CLASS_INSTANCE(pmix_psensor_metric_t, pmix_object_t, bcon, bdes);
static void check_perfevent_metric(int fd, short dummy, void *arg);
static void add_tracker(int sd, short flags, void *cbdata)
{
{
pmix_perfevent_trkr_t *pt = (pmix_perfevent_trkr_t *) cbdata;
PMIX_ACQUIRE_OBJECT(pt);
......@@ -357,9 +374,18 @@ static void check_perfevent_metric(int fd, short dummy, void *cbdata)
pmix_globals.myid.nspace, pmix_globals.myid.rank, pt->nbeats,
pt->requestor->info->pname.nspace, pt->requestor->info->pname.rank);
}
/* reset for next period */
pt->cpu_usage = 0;
pt->memory_usage = 0;
/* reset perfevent metrics for next period */
pt->cycles = 0;
pt->instructions = 0;
pt->l1_misses = 0;
pt->llc_misses = 0;
pt->branch_misses = 0;
pt->task_clock = 0;
pt->scale = 0;
pt->ipc = 0;
pt->cpus = 0;
pt->ghz = 0;
/* reset the timer */
pmix_event_evtimer_add(&pt->ev, &pt->tv);
......@@ -376,12 +402,35 @@ static void add_metric(int sd, short args, void *cbdata)
/* find this peer in our trackers */
PMIX_LIST_FOREACH (pt, &pmix_mca_psensor_perfevent_component.trackers, pmix_perfevent_trkr_t) {
if (pt->requestor == b->peer) {
/* increment the beat count */
++pt->cpu_usage;
++pt->memory_usage;
/* ensure we know that the proc is alive */
int node_id = b->peer->info->pname.rank;
pt->cycles = node_id + 0;
pt->instructions = node_id + 100;
pt->l1_misses = node_id + 200;
pt->llc_misses = node_id + 300;
pt->branch_misses = node_id + 400;
pt->task_clock = node_id + 500;
pt->scale = node_id + 600;
pt->ipc = node_id + 700;
pt->cpus = node_id + 800;
pt->ghz = node_id + 900;
pt->stopped = false;
break;
pmix_output_verbose(1, pmix_psensor_base_framework.framework_output,
"[%s:%d] Updated metrics for peer %s:%d\n"
" cycles : %" PRIu64 "\n"
" instructions : %" PRIu64 "\n"
" L1 misses : %" PRIu64 "\n"
" LLC misses : %" PRIu64 "\n"
" branch misses : %" PRIu64 "\n"
" task clock : %" PRIu64 "\n"
" scale : %" PRIu64 "\n"
" IPC : %" PRIu64 "\n"
" CPUs : %" PRIu64 "\n"
" GHz : %" PRIu64,
pmix_globals.myid.nspace, pmix_globals.myid.rank,
b->peer->info->pname.nspace, b->peer->info->pname.rank,
pt->cycles, pt->instructions, pt->l1_misses, pt->llc_misses, pt->branch_misses,
pt->task_clock, pt->scale, pt->ipc, pt->cpus, pt->ghz);
}
}
......
......@@ -14,7 +14,7 @@
#include "pmix_common.h"
#include "src/mca/psensor/base/base.h"
#include "src/mca/psensor/heartbeat/psensor_perfevent.h"
#include "src/mca/psensor/perfevent/psensor_perfevent.h"
#include "src/mca/ptl/ptl.h"
/*
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment