From 0c5150772afac4b7ec3bbd791f9db7b3f3f13d49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20=C5=98ezn=C3=AD=C4=8Dek?= <246254@mail.muni.cz> Date: Tue, 7 Jun 2022 14:50:43 +0200 Subject: [PATCH] fix: gpumon no-GPU execution fixes --- CHANGELOG.md | 5 +++++ src/metric-generators/gpumon.sh | 10 +++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f247216..90093cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.2.1] - 2022-06-07 +### Added +- gpumon: avoid crashing on no GPU +- gpumon: avoid reporting GPU metric help on no GPU + ## [1.2.0] - 2022-06-07 ### Added - GPU presence and availablility added as gpumon metric generator diff --git a/src/metric-generators/gpumon.sh b/src/metric-generators/gpumon.sh index efff12e..a262c23 100755 --- a/src/metric-generators/gpumon.sh +++ b/src/metric-generators/gpumon.sh @@ -17,7 +17,9 @@ source "${CMG_SRC_DIR}/lib.sh" # get_gpu_devices [lspci-additional-args] # list PCI devices one per line function get_gpu_devices() { - lspci -D -mm "$@" | grep -i nvidia | grep -Ei "(VGA|2D|3D).+controller" + if ! lspci -D -mm "$@" | grep -i nvidia | grep -Ei "(VGA|2D|3D).+controller"; then + true + fi } # find_gpu_passthough_vm_manifest <gpu-location> [libvirtd-qemu-dir] @@ -30,7 +32,7 @@ function find_gpu_passthough_vm_manifest () { local gpu_slot=$(echo -n "${gpu_location}" | sed 's/[:.]/ /g' | awk '{printf $3}') local gpu_function=$(echo -n "${gpu_location}" | sed 's/[:.]/ /g' | awk '{printf $4}') - test -d ${dir} || return 0 + test -d "${dir}" || return 0 for i_gpu_manifest_file in $(ls "${dir}"/*.xml); do if grep -i "domain=['\"]0x${gpu_domain}['\"]" "${i_gpu_manifest_file}" | \ @@ -55,7 +57,9 @@ fi # browse the GPUs and export metrics METRIC_NAME="gpumon_device_state_code" STAGE_NAME="GPU cards ${METRIC_NAME} metrics generated" -get_metric_help "${METRIC_NAME}" "gauge" "GPU device state code (0/1 ~ available free/unavailable used)." +if [ -n "${GPU_DEVICES}" ]; then + get_metric_help "${METRIC_NAME}" "gauge" "GPU device state code (0/1 ~ available free/unavailable used)." +fi echo "${GPU_DEVICES}" | \ while read i_gpu_device ; do [ -z "${i_gpu_device}" ] && \ -- GitLab