From 0c31fcab0fb35c292d5ce4b72e5e1b50a1c5d5be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franti=C5=A1ek=20=C5=98ezn=C3=AD=C4=8Dek?=
 <246254@mail.muni.cz>
Date: Tue, 7 Jun 2022 17:01:43 +0200
Subject: [PATCH] fix: avoid crashing on read(line) when no GPU found

---
 CHANGELOG.md                    |  4 ++++
 src/metric-generators/gpumon.sh | 41 +++++++++++++++++----------------
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 90093cf..8c0fecb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.2.2] - 2022-06-07
+### Added
+- gpumon: avoid crashing on no GPU (readlink)
+
 ## [1.2.1] - 2022-06-07
 ### Added
 - gpumon: avoid crashing on no GPU
diff --git a/src/metric-generators/gpumon.sh b/src/metric-generators/gpumon.sh
index a262c23..c2c8549 100755
--- a/src/metric-generators/gpumon.sh
+++ b/src/metric-generators/gpumon.sh
@@ -56,26 +56,27 @@ fi
 
 # browse the GPUs and export metrics
 METRIC_NAME="gpumon_device_state_code"
-STAGE_NAME="GPU cards ${METRIC_NAME} metrics generated"
+STAGE_NAME="GPU devices ${METRIC_NAME} metrics generated"
 if [ -n "${GPU_DEVICES}" ]; then
     get_metric_help "${METRIC_NAME}" "gauge" "GPU device state code (0/1 ~ available free/unavailable used)."
+    echo "${GPU_DEVICES}" | \
+      while read i_gpu_device ; do
+          STAGE_NAME="GPU device ${METRIC_NAME} metrics generated (${i_gpu_device})"
+          [ -z "${i_gpu_device}" ] && \
+            continue
+          i_gpu_device_location="$(echo "${i_gpu_device}" | awk '{printf $1}')"
+          i_gpu_device_vendor="$(echo "${i_gpu_device}" | gawk 'BEGIN{FPAT = "(\"[^\"]+\")"}{printf $2}' | tr -d '"')"
+          i_gpu_device_model="$(echo "${i_gpu_device}" | gawk 'BEGIN{FPAT = "(\"[^\"]+\")"}{printf $3}' | tr -d '"')"
+          i_gpu_device_revision="$(echo "${i_gpu_device}" | grep -Eo -- "-r[^ \t]+")"
+          i_gpu_attached=0
+          i_vm_domain_name=""
+          if lspci -v -s "${i_gpu_device_location}" | grep -qE 'Kernel driver in use: .+'; then
+              i_gpu_attached=1
+              i_vm_domain_file="$(find_gpu_passthough_vm_manifest "${i_gpu_device_location}")"
+              i_vm_domain_name="$(basename $(echo "${i_vm_domain_file}" | head -1) | sed 's/.xml//g')"
+          fi
+          printf '%s{device="%s",vendor="%s",location="%s",revision="%s",domain="%s"} %d\n' "${METRIC_NAME}" \
+            "${i_gpu_device_model}" "${i_gpu_device_vendor}" "${i_gpu_device_location}" \
+            "${i_gpu_device_revision:2}" "${i_vm_domain_name}" "${i_gpu_attached}"
+      done
 fi
-echo "${GPU_DEVICES}" | \
-  while read i_gpu_device ; do
-      [ -z "${i_gpu_device}" ] && \
-        continue
-      i_gpu_device_location="$(echo "${i_gpu_device}" | awk '{printf $1}')"
-      i_gpu_device_vendor="$(echo "${i_gpu_device}" | gawk 'BEGIN{FPAT = "(\"[^\"]+\")"}{printf $2}' | tr -d '"')"
-      i_gpu_device_model="$(echo "${i_gpu_device}" | gawk 'BEGIN{FPAT = "(\"[^\"]+\")"}{printf $3}' | tr -d '"')"
-      i_gpu_device_revision="$(echo "${i_gpu_device}" | grep -Eo -- "-r[^ \t]+")"
-      i_gpu_attached=0
-      i_vm_domain_name=""
-      if lspci -v -s "${i_gpu_device_location}" | grep -qE 'Kernel driver in use: .+'; then
-          i_gpu_attached=1
-          i_vm_domain_file="$(find_gpu_passthough_vm_manifest "${i_gpu_device_location}")"
-          i_vm_domain_name="$(basename $(echo "${i_vm_domain_file}" | head -1) | sed 's/.xml//g')"
-      fi
-      printf '%s{device="%s",vendor="%s",location="%s",revision="%s",domain="%s"} %d\n' "${METRIC_NAME}" \
-        "${i_gpu_device_model}" "${i_gpu_device_vendor}" "${i_gpu_device_location}" \
-        "${i_gpu_device_revision:2}" "${i_vm_domain_name}" "${i_gpu_attached}"
-  done
-- 
GitLab