diff --git a/backend/cluster_monitor_cron.sh b/backend/cluster_monitor_cron.sh index eadf2d9af3223b00b83ad6067dbb75585f16fdc8..7741a3c2fe15dd766fccb75efcebdf1a19c97fec 100755 --- a/backend/cluster_monitor_cron.sh +++ b/backend/cluster_monitor_cron.sh @@ -31,6 +31,7 @@ for ((gpu_id = 0; gpu_id < "$n_gpus"; gpu_id++)); do gpu_jobs=$(nvidia-smi --query-compute-apps=pid,used_gpu_memory -i "$gpu_id" --format=csv,noheader,nounits) biggest_job=$(echo "$gpu_jobs" | sort -k2,2nr | head -n 1) biggest_job_pid=$(echo "$biggest_job" | cut -d " " -f1 | sed 's/,//g') + gpu_user="" if [ -n "$biggest_job_pid" ]; then gpu_user=$(ps -o uname= -p "$biggest_job_pid") fi