Skip to content
Snippets Groups Projects
Commit aa138fda authored by František Řezníček's avatar František Řezníček
Browse files

fix: add environment variables to cron (to execute lspci), better debugging with CMG_TRACE

parent 5b5bc77b
Branches
Tags v1.2.5
No related merge requests found
Pipeline #161372 passed
...@@ -6,21 +6,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -6,21 +6,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
## [1.2.5] - 2022-06-07
### Fixed
- cron: assure lspci is executable
- metric-generator-exec.sh: better logging/tracing with CMG_TRACE
## [1.2.4] - 2022-06-07 ## [1.2.4] - 2022-06-07
### Added ### Fixed
- gpumon: GPU_DEVICES_UNRECOGNIZED detection reworked without grep, with gawk, avoiding crash - gpumon: GPU_DEVICES_UNRECOGNIZED detection reworked without grep, with gawk, avoiding crash
## [1.2.3] - 2022-06-07 ## [1.2.3] - 2022-06-07
### Added ### Fixed
- gpumon: publish at least a metric help (metric-generator-exec requirement) - gpumon: publish at least a metric help (metric-generator-exec requirement)
- metric-generator-exec.sh: added note on how should generator module behave - metric-generator-exec.sh: added note on how should generator module behave
## [1.2.2] - 2022-06-07 ## [1.2.2] - 2022-06-07
### Added ### Fixed
- gpumon: avoid crashing on no GPU (readlink) - gpumon: avoid crashing on no GPU (readlink)
## [1.2.1] - 2022-06-07 ## [1.2.1] - 2022-06-07
### Added ### Fixed
- gpumon: avoid crashing on no GPU - gpumon: avoid crashing on no GPU
- gpumon: avoid reporting GPU metric help on no GPU - gpumon: avoid reporting GPU metric help on no GPU
......
...@@ -27,13 +27,14 @@ test -d "${CMG_OUT_METRICS_DIR}" || \ ...@@ -27,13 +27,14 @@ test -d "${CMG_OUT_METRICS_DIR}" || \
mkdir -p "${CMG_OUT_METRICS_DIR}" mkdir -p "${CMG_OUT_METRICS_DIR}"
i_fixed_sleep=0 i_fixed_sleep=0
echo -e "SHELL=/bin/bash\nPATH=/usr/bin:/bin:/usr/sbin\n\n" > /var/spool/cron/root
for i_module in ${CMG_EXEC_MODULES}; do for i_module in ${CMG_EXEC_MODULES}; do
touch "${CMG_OUT_METRICS_DIR}/${i_module}.prom" \ touch "${CMG_OUT_METRICS_DIR}/${i_module}.prom" \
"${CMG_OUT_METRICS_DIR}/custom-metrics-generator_${i_module}.prom" > /dev/null "${CMG_OUT_METRICS_DIR}/custom-metrics-generator_${i_module}.prom" > /dev/null
echo "* * * * * ${CMG_BASE_DIR}/src/delay-jitter-exec.sh ${i_fixed_sleep} ${CMG_MAX_JITTER_DELAY} \ echo "* * * * * ${CMG_BASE_DIR}/src/delay-jitter-exec.sh ${i_fixed_sleep} ${CMG_MAX_JITTER_DELAY} \
${CMG_BASE_DIR}/src/metric-generator-exec.sh ${i_module} ${CMG_OUT_METRICS_DIR}/${i_module}.prom ${CMG_OUT_METRICS_DIR}/custom-metrics-generator_${i_module}.prom" ${CMG_BASE_DIR}/src/metric-generator-exec.sh ${i_module} ${CMG_OUT_METRICS_DIR}/${i_module}.prom ${CMG_OUT_METRICS_DIR}/custom-metrics-generator_${i_module}.prom"
i_fixed_sleep=$(( ${i_fixed_sleep} + 5 )) i_fixed_sleep=$(( ${i_fixed_sleep} + 5 ))
done > /var/spool/cron/root done >> /var/spool/cron/root
echo "custom-metrics-generator configuration:" echo "custom-metrics-generator configuration:"
set | grep -E '^CMG_' | awk '{print "export " $0}' > "${CMG_BASE_DIR}/custom-metrics-generator.conf.env" set | grep -E '^CMG_' | awk '{print "export " $0}' > "${CMG_BASE_DIR}/custom-metrics-generator.conf.env"
......
[[ "${CMG_TRACE}" =~ ^1|[Tt]rue$ ]] && \
set -x
# get_metric_help ( <metric-name> <metric-type> <metric-desc>) # get_metric_help ( <metric-name> <metric-type> <metric-desc>)
# generates testform metric help # generates testform metric help
function get_metric_help() { function get_metric_help() {
......
...@@ -78,10 +78,10 @@ function at_exit() { ...@@ -78,10 +78,10 @@ function at_exit() {
fi fi
mv -f "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp" "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}" mv -f "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp" "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}"
fi fi
local env_dump_file="/tmp/cmg-${METRICS_GENERATOR_NAME}-trace.log"
set > ${env_dump_file} set > "/tmp/cmg-${METRICS_GENERATOR_NAME}-trace.log"
if [ "${STAGE_NAME}" != "success" ]; then if [ "${STAGE_NAME}" != "success" ]; then
log_stderr "ERROR: Job ${METRICS_GENERATOR_NAME} failed at step \"${STAGE_NAME}\" (PPID: $$, duration: ${SECONDS} sec[s], see env. dump at ${env_dump_file})." log_stderr "ERROR: Job ${METRICS_GENERATOR_NAME} failed at step \"${STAGE_NAME}\" (PPID: $$, duration: ${SECONDS} sec[s], see env. dump at /tmp/cmg-*${METRICS_GENERATOR_NAME}-trace.log)."
exit 1 exit 1
fi fi
log_stdout "INFO: Job ${METRICS_GENERATOR_NAME} succeeded. (PPID: $$, duration: ${SECONDS} sec[s])" log_stdout "INFO: Job ${METRICS_GENERATOR_NAME} succeeded. (PPID: $$, duration: ${SECONDS} sec[s])"
...@@ -96,7 +96,9 @@ rm -f "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp" ...@@ -96,7 +96,9 @@ rm -f "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp"
get_metric_text "job_last_run_timestamp" "${METRICS_GENERATOR_NAME}" "${GENERATOR_START_TIME}" >> "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp" get_metric_text "job_last_run_timestamp" "${METRICS_GENERATOR_NAME}" "${GENERATOR_START_TIME}" >> "${OUTPUT_METRIC_GENERATOR_RUNNER_METRIC_FILE}.tmp"
STAGE_NAME="metric-generator successfuly executed" STAGE_NAME="metric-generator successfuly executed"
timeout ${CMG_METRIC_GENERATOR_EXEC_TIMEOUT} ${METRICS_GENERATOR_FILE} ${OUTPUT_METRIC_GENERATOR_METRIC_FILE} > "${OUTPUT_METRIC_GENERATOR_METRIC_FILE}.tmp" timeout ${CMG_METRIC_GENERATOR_EXEC_TIMEOUT} ${METRICS_GENERATOR_FILE} ${OUTPUT_METRIC_GENERATOR_METRIC_FILE} \
> "${OUTPUT_METRIC_GENERATOR_METRIC_FILE}.tmp" \
2> "/tmp/cmg-module-${METRICS_GENERATOR_NAME}-trace.log"
STAGE_NAME="metric-generator generated valid metrics" STAGE_NAME="metric-generator generated valid metrics"
test -s "${OUTPUT_METRIC_GENERATOR_METRIC_FILE}.tmp" test -s "${OUTPUT_METRIC_GENERATOR_METRIC_FILE}.tmp"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment