From aa138fda9bfae7bf461315cade51693670200d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20=C5=98ezn=C3=AD=C4=8Dek?= <246254@mail.muni.cz> Date: Tue, 7 Jun 2022 20:40:06 +0200 Subject: [PATCH] fix: add environment variables to cron (to execute lspci), better debugging with CMG_TRACE --- CHANGELOG.md | 13 +++++++++---- entrypoint.sh | 3 ++- src/lib.sh | 4 ++++ src/metric-generator-exec.sh | 10 ++++++---- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ab96fb..e2b8a54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,21 +6,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.2.5] - 2022-06-07 +### Fixed +- cron: assure lspci is executable +- metric-generator-exec.sh: better logging/tracing with CMG_TRACE + ## [1.2.4] - 2022-06-07 -### Added +### Fixed - gpumon: GPU_DEVICES_UNRECOGNIZED detection reworked without grep, with gawk, avoiding crash ## [1.2.3] - 2022-06-07 -### Added +### Fixed - gpumon: publish at least a metric help (metric-generator-exec requirement) - metric-generator-exec.sh: added note on how should generator module behave ## [1.2.2] - 2022-06-07 -### Added +### Fixed - gpumon: avoid crashing on no GPU (readlink) ## [1.2.1] - 2022-06-07 -### Added +### Fixed - gpumon: avoid crashing on no GPU - gpumon: avoid reporting GPU metric help on no GPU diff --git a/entrypoint.sh b/entrypoint.sh index 31c81a7..271fc41 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -27,13 +27,14 @@ test -d "${CMG_OUT_METRICS_DIR}" || \ mkdir -p "${CMG_OUT_METRICS_DIR}" i_fixed_sleep=0 +echo -e "SHELL=/bin/bash\nPATH=/usr/bin:/bin:/usr/sbin\n\n" > /var/spool/cron/root for i_module in ${CMG_EXEC_MODULES}; do touch "${CMG_OUT_METRICS_DIR}/${i_module}.prom" \ "${CMG_OUT_METRICS_DIR}/custom-metrics-generator_${i_module}.prom" > /dev/null echo "* * * * * ${CMG_BASE_DIR}/src/delay-jitter-exec.sh ${i_fixed_sleep} ${CMG_MAX_JITTER_DELAY} \ ${CMG_BASE_DIR}/src/metric-generator-exec.sh ${i_module} ${CMG_OUT_METRICS_DIR}/${i_module}.prom ${CMG_OUT_METRICS_DIR}/custom-metrics-generator_${i_module}.prom" i_fixed_sleep=$(( ${i_fixed_sleep} + 5 )) -done > /var/spool/cron/root +done >> /var/spool/cron/root echo "custom-metrics-generator configuration:" set | grep -E '^CMG_' | awk '{print "export " $0}' > "${CMG_BASE_DIR}/custom-metrics-generator.conf.env" diff --git a/src/lib.sh b/src/lib.sh index ab09413..7c34031 100644 --- a/src/lib.sh +++ b/src/lib.sh @@ -1,4 +1,8 @@ +[[ "${CMG_TRACE}" =~ ^1|[Tt]rue$ ]] && \ + set -x + + # get_metric_help ( <metric-name> <metric-type> <metric-desc>) # generates testform metric help function get_metric_help() { diff --git a/src/metric-generator-exec.sh b/src/metric-generator-exec.sh index 3adea22..8881f0c 100755 --- a/src/metric-generator-exec.sh +++ b/src/metric-generator-exec.sh @@ -78,10 +78,10 @@ function at_exit() { fi mv -f "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp" "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}" fi - local env_dump_file="/tmp/cmg-${METRICS_GENERATOR_NAME}-trace.log" - set > ${env_dump_file} + + set > "/tmp/cmg-${METRICS_GENERATOR_NAME}-trace.log" if [ "${STAGE_NAME}" != "success" ]; then - log_stderr "ERROR: Job ${METRICS_GENERATOR_NAME} failed at step \"${STAGE_NAME}\" (PPID: $$, duration: ${SECONDS} sec[s], see env. dump at ${env_dump_file})." + log_stderr "ERROR: Job ${METRICS_GENERATOR_NAME} failed at step \"${STAGE_NAME}\" (PPID: $$, duration: ${SECONDS} sec[s], see env. dump at /tmp/cmg-*${METRICS_GENERATOR_NAME}-trace.log)." exit 1 fi log_stdout "INFO: Job ${METRICS_GENERATOR_NAME} succeeded. (PPID: $$, duration: ${SECONDS} sec[s])" @@ -96,7 +96,9 @@ rm -f "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp" get_metric_text "job_last_run_timestamp" "${METRICS_GENERATOR_NAME}" "${GENERATOR_START_TIME}" >> "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp" STAGE_NAME="metric-generator successfuly executed" -timeout ${CMG_METRIC_GENERATOR_EXEC_TIMEOUT} ${METRICS_GENERATOR_FILE} ${OUTPUT_METRIC_GENERATOR_METRIC_FILE} > "${OUTPUT_METRIC_GENERATOR_METRIC_FILE}.tmp" +timeout ${CMG_METRIC_GENERATOR_EXEC_TIMEOUT} ${METRICS_GENERATOR_FILE} ${OUTPUT_METRIC_GENERATOR_METRIC_FILE} \ + > "${OUTPUT_METRIC_GENERATOR_METRIC_FILE}.tmp" \ + 2> "/tmp/cmg-module-${METRICS_GENERATOR_NAME}-trace.log" STAGE_NAME="metric-generator generated valid metrics" test -s "${OUTPUT_METRIC_GENERATOR_METRIC_FILE}.tmp" -- GitLab