From 0c5150772afac4b7ec3bbd791f9db7b3f3f13d49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franti=C5=A1ek=20=C5=98ezn=C3=AD=C4=8Dek?=
 <246254@mail.muni.cz>
Date: Tue, 7 Jun 2022 14:50:43 +0200
Subject: [PATCH] fix: gpumon no-GPU execution fixes

---
 CHANGELOG.md                    |  5 +++++
 src/metric-generators/gpumon.sh | 10 +++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f247216..90093cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.2.1] - 2022-06-07
+### Added
+- gpumon: avoid crashing on no GPU
+- gpumon: avoid reporting GPU metric help on no GPU
+
 ## [1.2.0] - 2022-06-07
 ### Added
 - GPU presence and availablility added as gpumon metric generator
diff --git a/src/metric-generators/gpumon.sh b/src/metric-generators/gpumon.sh
index efff12e..a262c23 100755
--- a/src/metric-generators/gpumon.sh
+++ b/src/metric-generators/gpumon.sh
@@ -17,7 +17,9 @@ source "${CMG_SRC_DIR}/lib.sh"
 # get_gpu_devices [lspci-additional-args]
 #   list PCI devices one per line
 function get_gpu_devices() {
-    lspci -D -mm "$@" | grep -i nvidia | grep -Ei "(VGA|2D|3D).+controller"
+    if ! lspci -D -mm "$@" | grep -i nvidia | grep -Ei "(VGA|2D|3D).+controller"; then
+        true
+    fi
 }
 
 # find_gpu_passthough_vm_manifest <gpu-location> [libvirtd-qemu-dir]
@@ -30,7 +32,7 @@ function find_gpu_passthough_vm_manifest () {
   local gpu_slot=$(echo -n "${gpu_location}" | sed 's/[:.]/ /g' | awk '{printf $3}')
   local gpu_function=$(echo -n "${gpu_location}" | sed 's/[:.]/ /g' | awk '{printf $4}')
 
-  test -d ${dir} || return 0
+  test -d "${dir}" || return 0
 
   for i_gpu_manifest_file in $(ls "${dir}"/*.xml); do
       if grep -i  "domain=['\"]0x${gpu_domain}['\"]" "${i_gpu_manifest_file}" | \
@@ -55,7 +57,9 @@ fi
 # browse the GPUs and export metrics
 METRIC_NAME="gpumon_device_state_code"
 STAGE_NAME="GPU cards ${METRIC_NAME} metrics generated"
-get_metric_help "${METRIC_NAME}" "gauge" "GPU device state code (0/1 ~ available free/unavailable used)."
+if [ -n "${GPU_DEVICES}" ]; then
+    get_metric_help "${METRIC_NAME}" "gauge" "GPU device state code (0/1 ~ available free/unavailable used)."
+fi
 echo "${GPU_DEVICES}" | \
   while read i_gpu_device ; do
       [ -z "${i_gpu_device}" ] && \
-- 
GitLab