19 KB

  1. #!/usr/bin/env bash
  2. # SPDX-License-Identifier: GPL-3.0-or-later
  3. # netdata
  4. # real-time performance and health monitoring, done right!
  5. # (C) 2017 Costa Tsaousis <>
  6. # GPL v3+
  7. #
  8. # charts.d.plugin allows easy development of BASH plugins
  9. #
  10. # if you need to run parallel charts.d processes, link this file to a different name
  11. # in the same directory, with a .plugin suffix and netdata will start both of them,
  12. # each will have a different config file and modules configuration directory.
  13. #
  14. export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"
  15. PROGRAM_FILE="$0"
  16. PROGRAM_NAME="$(basename $0)"
  17. PROGRAM_NAME="${PROGRAM_NAME/.plugin/}"
  18. MODULE_NAME="main"
  19. # -----------------------------------------------------------------------------
  20. # create temp dir
  21. debug=0
  22. TMP_DIR=
  23. chartsd_cleanup() {
  24. trap '' EXIT QUIT HUP INT TERM
  25. if [ ! -z "$TMP_DIR" -a -d "$TMP_DIR" ]; then
  26. [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: cleaning up temporary directory $TMP_DIR ..."
  27. rm -rf "$TMP_DIR"
  28. fi
  29. exit 0
  30. }
  31. trap chartsd_cleanup EXIT QUIT HUP INT TERM
  32. if [ $UID = "0" ]; then
  33. TMP_DIR="$(mktemp -d /var/run/netdata-${PROGRAM_NAME}-XXXXXXXXXX)"
  34. else
  35. TMP_DIR="$(mktemp -d /tmp/.netdata-${PROGRAM_NAME}-XXXXXXXXXX)"
  36. fi
  37. logdate() {
  38. date "+%Y-%m-%d %H:%M:%S"
  39. }
  40. log() {
  41. local status="${1}"
  42. shift
  43. echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: ${*}"
  44. }
  45. warning() {
  46. log WARNING "${@}"
  47. }
  48. error() {
  49. log ERROR "${@}"
  50. }
  51. info() {
  52. log INFO "${@}"
  53. }
  54. fatal() {
  55. log FATAL "${@}"
  56. echo "DISABLE"
  57. exit 1
  58. }
  59. debug() {
  60. [ $debug -eq 1 ] && log DEBUG "${@}"
  61. }
  62. # -----------------------------------------------------------------------------
  63. # check a few commands
  64. require_cmd() {
  65. local x=$(which "${1}" 2>/dev/null || command -v "${1}" 2>/dev/null)
  66. if [ -z "${x}" -o ! -x "${x}" ]; then
  67. warning "command '${1}' is not found in ${PATH}."
  68. eval "${1^^}_CMD=\"\""
  69. return 1
  70. fi
  71. eval "${1^^}_CMD=\"${x}\""
  72. return 0
  73. }
  74. require_cmd date || exit 1
  75. require_cmd sed || exit 1
  76. require_cmd basename || exit 1
  77. require_cmd dirname || exit 1
  78. require_cmd cat || exit 1
  79. require_cmd grep || exit 1
  80. require_cmd egrep || exit 1
  81. require_cmd mktemp || exit 1
  82. require_cmd awk || exit 1
  83. require_cmd timeout || exit 1
  84. require_cmd curl || exit 1
  85. # -----------------------------------------------------------------------------
  86. [ $((BASH_VERSINFO[0])) -lt 4 ] && fatal "BASH version 4 or later is required, but found version: ${BASH_VERSION}. Please upgrade."
  87. info "started from '$PROGRAM_FILE' with options: $*"
  88. # -----------------------------------------------------------------------------
  89. # internal defaults
  90. # netdata exposes a few environment variables for us
  91. [ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="$(dirname "${0}")"
  93. [ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@"
  94. pluginsd="${NETDATA_PLUGINS_DIR}"
  97. olduserconfd="${NETDATA_USER_CONFIG_DIR}"
  98. chartsd="$pluginsd/../charts.d"
  99. minimum_update_frequency="${NETDATA_UPDATE_EVERY-1}"
  100. update_every=${minimum_update_frequency} # this will be overwritten by the command line
  101. # work around for non BASH shells
  102. charts_create="_create"
  103. charts_update="_update"
  104. charts_check="_check"
  105. charts_undescore="_"
  106. # when making iterations, charts.d can loop more frequently
  107. # to prevent plugins missing iterations.
  108. # this is a percentage relative to update_every to align its
  109. # iterations.
  110. # The minimum is 10%, the maximum 100%.
  111. # So, if update_every is 1 second and time_divisor is 50,
  112. # charts.d will iterate every 500ms.
  113. # Charts will be called to collect data only if the time
  114. # passed since the last time the collected data is equal or
  115. # above their update_every.
  116. time_divisor=50
  117. # number of seconds to run without restart
  118. # after this time, charts.d.plugin will exit
  119. # netdata will restart it
  120. restart_timeout=$((3600 * 4))
  121. # check if the charts.d plugins are using global variables
  122. # they should not.
  123. # It does not currently support BASH v4 arrays, so it is
  124. # disabled
  125. dryrunner=0
  126. # check for timeout command
  127. check_for_timeout=1
  128. # the default enable/disable value for all charts
  129. enable_all_charts="yes"
  130. # -----------------------------------------------------------------------------
  131. # parse parameters
  132. check=0
  133. chart_only=
  134. while [ ! -z "$1" ]; do
  135. if [ "$1" = "check" ]; then
  136. check=1
  137. shift
  138. continue
  139. fi
  140. if [ "$1" = "debug" -o "$1" = "all" ]; then
  141. debug=1
  142. shift
  143. continue
  144. fi
  145. if [ -f "$chartsd/$" ]; then
  146. debug=1
  147. chart_only="$(echo $ | sed "s/\.chart\.sh$//g")"
  148. shift
  149. continue
  150. fi
  151. if [ -f "$chartsd/$1" ]; then
  152. debug=1
  153. chart_only="$(echo $1 | sed "s/\.chart\.sh$//g")"
  154. shift
  155. continue
  156. fi
  157. # number check
  158. n="$1"
  159. x=$((n))
  160. if [ "$x" = "$n" ]; then
  161. shift
  162. update_every=$x
  163. [ $update_every -lt $minimum_update_frequency ] && update_every=$minimum_update_frequency
  164. continue
  165. fi
  166. fatal "Cannot understand parameter $1. Aborting."
  167. done
  168. # -----------------------------------------------------------------------------
  169. # loop control
  170. # default sleep function
  172. now_ms=
  173. current_time_ms_default() {
  174. now_ms="$(date +'%s')000"
  175. }
  176. current_time_ms="current_time_ms_default"
  177. current_time_ms_accuracy=1
  178. mysleep="sleep"
  179. # if found and included, this file overwrites loopsleepms()
  180. # and current_time_ms() with a high resolution timer function
  181. # for precise looping.
  182. source "$pluginsd/"
  183. [ $? -ne 0 ] && error "Failed to load '$pluginsd/'."
  184. # -----------------------------------------------------------------------------
  185. # load my configuration
  186. for myconfig in "${NETDATA_STOCK_CONFIG_DIR}/${PROGRAM_NAME}.conf" "${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}.conf"; do
  187. if [ -f "$myconfig" ]; then
  188. source "$myconfig"
  189. if [ $? -ne 0 ]; then
  190. error "Config file '$myconfig' loaded with errors."
  191. else
  192. info "Configuration file '$myconfig' loaded."
  193. fi
  194. else
  195. warning "Configuration file '$myconfig' not found."
  196. fi
  197. done
  198. # make sure time_divisor is right
  199. time_divisor=$((time_divisor))
  200. [ $time_divisor -lt 10 ] && time_divisor=10
  201. [ $time_divisor -gt 100 ] && time_divisor=100
  202. # we check for the timeout command, after we load our
  203. # configuration, so that the user may overwrite the
  204. # timeout command we use, providing a function that
  205. # can emulate the timeout command we need:
  206. # > timeout SECONDS command ...
  207. if [ $check_for_timeout -eq 1 ]; then
  208. require_cmd timeout || exit 1
  209. fi
  210. # -----------------------------------------------------------------------------
  211. # internal checks
  212. # netdata passes the requested update frequency as the first argument
  213. update_every=$((update_every + 1 - 1)) # makes sure it is a number
  214. test $update_every -eq 0 && update_every=1 # if it is zero, make it 1
  215. # check the charts.d directory
  216. [ ! -d "$chartsd" ] && fatal "cannot find charts directory '$chartsd'"
  217. # -----------------------------------------------------------------------------
  218. # library functions
  219. fixid() {
  220. echo "$*" |
  221. tr -c "[A-Z][a-z][0-9]" "_" |
  222. sed -e "s|^_\+||g" -e "s|_\+$||g" -e "s|_\+|_|g" |
  223. tr "[A-Z]" "[a-z]"
  224. }
  225. run() {
  226. local ret pid="${BASHPID}" t
  227. if [ "z${1}" = "z-t" -a "${2}" != "0" ]; then
  228. t="${2}"
  229. shift 2
  230. timeout ${t} "${@}" 2>"${TMP_DIR}/run.${pid}"
  231. ret=$?
  232. else
  233. "${@}" 2>"${TMP_DIR}/run.${pid}"
  234. ret=$?
  235. fi
  236. if [ ${ret} -ne 0 ]; then
  237. {
  238. printf "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: command '"
  239. printf "%q " "${@}"
  240. printf "' failed with code ${ret}:\n --- BEGIN TRACE ---\n"
  241. cat "${TMP_DIR}/run.${pid}"
  242. printf " --- END TRACE ---\n"
  243. } >&2
  244. fi
  245. rm "${TMP_DIR}/run.${pid}"
  246. return ${ret}
  247. }
  248. # convert any floating point number
  249. # to integer, give a multiplier
  250. # the result is stored in ${FLOAT2INT_RESULT}
  251. # so that no fork is necessary
  252. # the multiplier must be a power of 10
  253. float2int() {
  254. local f m="$2" a b l v=($1)
  255. f=${v[0]}
  256. # the length of the multiplier - 1
  257. l=$((${#m} - 1))
  258. # check if the number is in scientific notation
  259. if [[ ${f} =~ ^[[:space:]]*(-)?[0-9.]+(e|E)(\+|-)[0-9]+ ]]; then
  260. # convert it to decimal
  261. # unfortunately, this fork cannot be avoided
  262. # if you know of a way to avoid it, please let me know
  263. f=$(printf "%0.${l}f" ${f})
  264. fi
  265. # split the floating point number
  266. # in integer (a) and decimal (b)
  267. a=${f/.*/}
  268. b=${f/*./}
  269. # if the integer part is missing
  270. # set it to zero
  271. [ -z "${a}" ] && a="0"
  272. # strip leading zeros from the integer part
  273. # base 10 convertion
  274. a=$((10#$a))
  275. # check the length of the decimal part
  276. # against the length of the multiplier
  277. if [ ${#b} -gt ${l} ]; then
  278. # too many digits - take the most significant
  279. b=${b:0:l}
  280. elif [ ${#b} -lt ${l} ]; then
  281. # too few digits - pad with zero on the right
  282. local z="00000000000000000000000" r=$((l - ${#b}))
  283. b="${b}${z:0:r}"
  284. fi
  285. # strip leading zeros from the decimal part
  286. # base 10 convertion
  287. b=$((10#$b))
  288. # store the result
  289. FLOAT2INT_RESULT=$(((a * m) + b))
  290. }
  291. # -----------------------------------------------------------------------------
  292. # charts check functions
  293. all_charts() {
  294. cd "$chartsd"
  295. [ $? -ne 0 ] && error "cannot cd to $chartsd" && return 1
  296. ls * | sed "s/\.chart\.sh$//g"
  297. }
  298. declare -A charts_enable_keyword=(
  299. ['apache']="force"
  300. ['cpu_apps']="force"
  301. ['cpufreq']="force"
  302. ['example']="force"
  303. ['exim']="force"
  304. ['hddtemp']="force"
  305. ['load_average']="force"
  306. ['mem_apps']="force"
  307. ['mysql']="force"
  308. ['nginx']="force"
  309. ['phpfpm']="force"
  310. ['postfix']="force"
  311. ['sensors']="force"
  312. ['squid']="force"
  313. ['tomcat']="force"
  314. )
  315. all_enabled_charts() {
  316. local charts= enabled= required=
  317. # find all enabled charts
  318. for chart in $(all_charts); do
  319. MODULE_NAME="${chart}"
  320. eval "enabled=\$$chart"
  321. if [ -z "${enabled}" ]; then
  322. enabled="${enable_all_charts}"
  323. fi
  324. required="${charts_enable_keyword[${chart}]}"
  325. [ -z "${required}" ] && required="yes"
  326. if [ ! "${enabled}" = "${required}" ]; then
  327. info "is disabled. Add a line with $chart=$required in '${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}.conf' to enable it (or remove the line that disables it)."
  328. else
  329. debug "is enabled for auto-detection."
  330. local charts="$charts $chart"
  331. fi
  332. done
  333. MODULE_NAME="main"
  334. local charts2=
  335. for chart in $charts; do
  336. MODULE_NAME="${chart}"
  337. # check the enabled charts
  338. local check="$(cat "$chartsd/$" | sed "s/^ \+//g" | grep "^$chart$charts_check()")"
  339. if [ -z "$check" ]; then
  340. error "module '$chart' does not seem to have a $chart$charts_check() function. Disabling it."
  341. continue
  342. fi
  343. local create="$(cat "$chartsd/$" | sed "s/^ \+//g" | grep "^$chart$charts_create()")"
  344. if [ -z "$create" ]; then
  345. error "module '$chart' does not seem to have a $chart$charts_create() function. Disabling it."
  346. continue
  347. fi
  348. local update="$(cat "$chartsd/$" | sed "s/^ \+//g" | grep "^$chart$charts_update()")"
  349. if [ -z "$update" ]; then
  350. error "module '$chart' does not seem to have a $chart$charts_update() function. Disabling it."
  351. continue
  352. fi
  353. # check its config
  354. #if [ -f "$userconfd/$chart.conf" ]
  355. #then
  356. # if [ ! -z "$( cat "$userconfd/$chart.conf" | sed "s/^ \+//g" | grep -v "^$" | grep -v "^#" | grep -v "^$chart$charts_undescore" )" ]
  357. # then
  358. # error "module's $chart config $userconfd/$chart.conf should only have lines starting with $chart$charts_undescore . Disabling it."
  359. # continue
  360. # fi
  361. #fi
  362. #if [ $dryrunner -eq 1 ]
  363. # then
  364. # "$pluginsd/" "$chart" "$chartsd/$" "$userconfd/$chart.conf" >/dev/null
  365. # if [ $? -ne 0 ]
  366. # then
  367. # error "module's $chart did not pass the dry run check. This means it uses global variables not starting with $chart. Disabling it."
  368. # continue
  369. # fi
  370. #fi
  371. local charts2="$charts2 $chart"
  372. done
  373. MODULE_NAME="main"
  374. echo $charts2
  375. debug "enabled charts: $charts2"
  376. }
  377. # -----------------------------------------------------------------------------
  378. # load the charts
  379. suffix_retries="_retries"
  380. suffix_update_every="_update_every"
  381. active_charts=
  382. for chart in $(all_enabled_charts); do
  383. MODULE_NAME="${chart}"
  384. debug "loading module: '$chartsd/$'"
  385. source "$chartsd/$"
  386. [ $? -ne 0 ] && warning "Module '$chartsd/$' loaded with errors."
  387. # first load the stock config
  388. if [ -f "$stockconfd/$chart.conf" ]; then
  389. debug "loading module configuration: '$stockconfd/$chart.conf'"
  390. source "$stockconfd/$chart.conf"
  391. [ $? -ne 0 ] && warning "Config file '$stockconfd/$chart.conf' loaded with errors."
  392. else
  393. debug "not found module configuration: '$stockconfd/$chart.conf'"
  394. fi
  395. # then load the user config (it overwrites the stock)
  396. if [ -f "$userconfd/$chart.conf" ]; then
  397. debug "loading module configuration: '$userconfd/$chart.conf'"
  398. source "$userconfd/$chart.conf"
  399. [ $? -ne 0 ] && warning "Config file '$userconfd/$chart.conf' loaded with errors."
  400. else
  401. debug "not found module configuration: '$userconfd/$chart.conf'"
  402. if [ -f "$olduserconfd/$chart.conf" ]; then
  403. # support for very old netdata that had the charts.d module configs in /etc/netdata
  404. info "loading module configuration from obsolete location: '$olduserconfd/$chart.conf'"
  405. source "$olduserconfd/$chart.conf"
  406. [ $? -ne 0 ] && warning "Config file '$olduserconfd/$chart.conf' loaded with errors."
  407. fi
  408. fi
  409. eval "dt=\$$chart$suffix_update_every"
  410. dt=$((dt + 1 - 1)) # make sure it is a number
  411. if [ $dt -lt $update_every ]; then
  412. eval "$chart$suffix_update_every=$update_every"
  413. fi
  414. $chart$charts_check
  415. if [ $? -eq 0 ]; then
  416. debug "module '$chart' activated"
  417. active_charts="$active_charts $chart"
  418. else
  419. error "module's '$chart' check() function reports failure."
  420. fi
  421. done
  422. MODULE_NAME="main"
  423. debug "activated modules: $active_charts"
  424. # -----------------------------------------------------------------------------
  425. # check overwrites
  426. # enable work time reporting
  427. debug_time=
  428. test $debug -eq 1 && debug_time=tellwork
  429. # if we only need a specific chart, remove all the others
  430. if [ ! -z "${chart_only}" ]; then
  431. debug "requested to run only for: '${chart_only}'"
  432. check_charts=
  433. for chart in $active_charts; do
  434. if [ "$chart" = "$chart_only" ]; then
  435. check_charts="$chart"
  436. break
  437. fi
  438. done
  439. active_charts="$check_charts"
  440. fi
  441. debug "activated charts: $active_charts"
  442. # stop if we just need a pre-check
  443. if [ $check -eq 1 ]; then
  444. info "CHECK RESULT"
  445. info "Will run the charts: $active_charts"
  446. exit 0
  447. fi
  448. # -----------------------------------------------------------------------------
  449. cd "${TMP_DIR}" || exit 1
  450. # -----------------------------------------------------------------------------
  451. # create charts
  452. run_charts=
  453. for chart in $active_charts; do
  454. MODULE_NAME="${chart}"
  455. debug "calling '$chart$charts_create()'..."
  456. $chart$charts_create
  457. if [ $? -eq 0 ]; then
  458. run_charts="$run_charts $chart"
  459. debug "'$chart' initialized."
  460. else
  461. error "module's '$chart' function '$chart$charts_create()' reports failure."
  462. fi
  463. done
  464. MODULE_NAME="main"
  465. debug "run_charts='$run_charts'"
  466. # -----------------------------------------------------------------------------
  467. # update dimensions
  468. [ -z "$run_charts" ] && fatal "No charts to collect data from."
  469. declare -A charts_last_update=() charts_update_every=() charts_retries=() charts_next_update=() charts_run_counter=() charts_serial_failures=()
  470. global_update() {
  471. local exit_at \
  472. c=0 dt ret last_ms exec_start_ms exec_end_ms \
  473. chart now_charts=() next_charts=($run_charts) \
  474. next_ms x seconds millis
  475. # return the current time in ms in $now_ms
  476. ${current_time_ms}
  477. exit_at=$((now_ms + (restart_timeout * 1000)))
  478. for chart in $run_charts; do
  479. eval "charts_update_every[$chart]=\$$chart$suffix_update_every"
  480. test -z "${charts_update_every[$chart]}" && charts_update_every[$chart]=$update_every
  481. eval "charts_retries[$chart]=\$$chart$suffix_retries"
  482. test -z "${charts_retries[$chart]}" && charts_retries[$chart]=10
  483. charts_last_update[$chart]=$((now_ms - (now_ms % (charts_update_every[$chart] * 1000))))
  484. charts_next_update[$chart]=$((charts_last_update[$chart] + (charts_update_every[$chart] * 1000)))
  485. charts_run_counter[$chart]=0
  486. charts_serial_failures[$chart]=0
  487. echo "CHART netdata.plugin_chartsd_$chart '' 'Execution time for $chart plugin' 'milliseconds / run' charts.d netdata.plugin_charts area 145000 ${charts_update_every[$chart]}"
  488. echo "DIMENSION run_time 'run time' absolute 1 1"
  489. done
  490. # the main loop
  491. while [ "${#next_charts[@]}" -gt 0 ]; do
  492. c=$((c + 1))
  493. now_charts=("${next_charts[@]}")
  494. next_charts=()
  495. # return the current time in ms in $now_ms
  496. ${current_time_ms}
  497. for chart in "${now_charts[@]}"; do
  498. MODULE_NAME="${chart}"
  499. if [ ${now_ms} -ge ${charts_next_update[$chart]} ]; then
  500. last_ms=${charts_last_update[$chart]}
  501. dt=$((now_ms - last_ms))
  502. charts_last_update[$chart]=${now_ms}
  503. while [ ${charts_next_update[$chart]} -lt ${now_ms} ]; do
  504. charts_next_update[$chart]=$((charts_next_update[$chart] + (charts_update_every[$chart] * 1000)))
  505. done
  506. # the first call should not give a duration
  507. # so that netdata calibrates to current time
  508. dt=$((dt * 1000))
  509. charts_run_counter[$chart]=$((charts_run_counter[$chart] + 1))
  510. if [ ${charts_run_counter[$chart]} -eq 1 ]; then
  511. dt=
  512. fi
  513. exec_start_ms=$now_ms
  514. $chart$charts_update $dt
  515. ret=$?
  516. # return the current time in ms in $now_ms
  517. ${current_time_ms}
  518. exec_end_ms=$now_ms
  519. echo "BEGIN netdata.plugin_chartsd_$chart $dt"
  520. echo "SET run_time = $((exec_end_ms - exec_start_ms))"
  521. echo "END"
  522. if [ $ret -eq 0 ]; then
  523. charts_serial_failures[$chart]=0
  524. next_charts+=($chart)
  525. else
  526. charts_serial_failures[$chart]=$((charts_serial_failures[$chart] + 1))
  527. if [ ${charts_serial_failures[$chart]} -gt ${charts_retries[$chart]} ]; then
  528. error "module's '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it."
  529. else
  530. error "module's '$chart' update() function reports failure. Will keep trying for a while."
  531. next_charts+=($chart)
  532. fi
  533. fi
  534. else
  535. next_charts+=($chart)
  536. fi
  537. done
  538. MODULE_NAME="${chart}"
  539. # wait the time you are required to
  540. next_ms=$((now_ms + (update_every * 1000 * 100)))
  541. for x in "${charts_next_update[@]}"; do [ ${x} -lt ${next_ms} ] && next_ms=${x}; done
  542. next_ms=$((next_ms - now_ms))
  543. if [ ${LOOPSLEEPMS_HIGHRES} -eq 1 -a ${next_ms} -gt 0 ]; then
  544. next_ms=$((next_ms + current_time_ms_accuracy))
  545. seconds=$((next_ms / 1000))
  546. millis=$((next_ms % 1000))
  547. if [ ${millis} -lt 10 ]; then
  548. millis="00${millis}"
  549. elif [ ${millis} -lt 100 ]; then
  550. millis="0${millis}"
  551. fi
  552. debug "sleeping for ${seconds}.${millis} seconds."
  553. ${mysleep} ${seconds}.${millis}
  554. else
  555. debug "sleeping for ${update_every} seconds."
  556. ${mysleep} $update_every
  557. fi
  558. test ${now_ms} -ge ${exit_at} && exit 0
  559. done
  560. fatal "nothing left to do, exiting..."
  561. }
  562. global_update