tc-qos-helper.sh.in 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. #!/usr/bin/env bash
  2. # netdata
  3. # real-time performance and health monitoring, done right!
  4. # (C) 2023 Netdata Inc.
  5. # SPDX-License-Identifier: GPL-3.0-or-later
  6. #
  7. # This script is a helper to allow netdata collect tc data.
  8. # tc output parsing has been implemented in C, inside netdata
  9. # This script allows setting names to dimensions.
  10. export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@"
  11. export LC_ALL=C
  12. cmd_line="'${0}' $(printf "'%s' " "${@}")"
  13. # -----------------------------------------------------------------------------
  14. # logging
  15. PROGRAM_NAME="$(basename "${0}")"
  16. PROGRAM_NAME="${PROGRAM_NAME/.plugin/}"
  17. # these should be the same with syslog() priorities
  18. NDLP_EMERG=0 # system is unusable
  19. NDLP_ALERT=1 # action must be taken immediately
  20. NDLP_CRIT=2 # critical conditions
  21. NDLP_ERR=3 # error conditions
  22. NDLP_WARN=4 # warning conditions
  23. NDLP_NOTICE=5 # normal but significant condition
  24. NDLP_INFO=6 # informational
  25. NDLP_DEBUG=7 # debug-level messages
  26. # the max (numerically) log level we will log
  27. LOG_LEVEL=$NDLP_INFO
  28. set_log_min_priority() {
  29. case "${NETDATA_LOG_LEVEL,,}" in
  30. "emerg" | "emergency")
  31. LOG_LEVEL=$NDLP_EMERG
  32. ;;
  33. "alert")
  34. LOG_LEVEL=$NDLP_ALERT
  35. ;;
  36. "crit" | "critical")
  37. LOG_LEVEL=$NDLP_CRIT
  38. ;;
  39. "err" | "error")
  40. LOG_LEVEL=$NDLP_ERR
  41. ;;
  42. "warn" | "warning")
  43. LOG_LEVEL=$NDLP_WARN
  44. ;;
  45. "notice")
  46. LOG_LEVEL=$NDLP_NOTICE
  47. ;;
  48. "info")
  49. LOG_LEVEL=$NDLP_INFO
  50. ;;
  51. "debug")
  52. LOG_LEVEL=$NDLP_DEBUG
  53. ;;
  54. esac
  55. }
  56. set_log_min_priority
  57. log() {
  58. local level="${1}"
  59. shift 1
  60. [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
  61. systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG
  62. INVOCATION_ID=${NETDATA_INVOCATION_ID}
  63. SYSLOG_IDENTIFIER=${PROGRAM_NAME}
  64. PRIORITY=${level}
  65. THREAD_TAG=tc-qos-helper
  66. ND_LOG_SOURCE=collector
  67. ND_REQUEST=${cmd_line}
  68. MESSAGE=${*//\\n/--NEWLINE--}
  69. EOFLOG
  70. # AN EMPTY LINE IS NEEDED ABOVE
  71. }
  72. info() {
  73. log "$NDLP_INFO" "${@}"
  74. }
  75. warning() {
  76. log "$NDLP_WARN" "${@}"
  77. }
  78. error() {
  79. log "$NDLP_ERR" "${@}"
  80. }
  81. fatal() {
  82. log "$NDLP_ALERT" "${@}"
  83. exit 1
  84. }
  85. debug() {
  86. log "$NDLP_DEBUG" "${@}"
  87. }
  88. # -----------------------------------------------------------------------------
  89. # find /var/run/fireqos
  90. # the default
  91. fireqos_run_dir="/var/run/fireqos"
  92. function realdir() {
  93. local r
  94. local t
  95. r="$1"
  96. t="$(readlink "$r")"
  97. while [ "$t" ]; do
  98. r=$(cd "$(dirname "$r")" && cd "$(dirname "$t")" && pwd -P)/$(basename "$t")
  99. t=$(readlink "$r")
  100. done
  101. dirname "$r"
  102. }
  103. if [ ! -d "${fireqos_run_dir}" ]; then
  104. # the fireqos executable - we will use it to find its config
  105. fireqos="$(command -v fireqos 2>/dev/null)"
  106. if [ -n "${fireqos}" ]; then
  107. fireqos_exec_dir="$(realdir "${fireqos}")"
  108. if [ -n "${fireqos_exec_dir}" ] && [ "${fireqos_exec_dir}" != "." ] && [ -f "${fireqos_exec_dir}/install.config" ]; then
  109. LOCALSTATEDIR=
  110. #shellcheck source=/dev/null
  111. source "${fireqos_exec_dir}/install.config"
  112. if [ -d "${LOCALSTATEDIR}/run/fireqos" ]; then
  113. fireqos_run_dir="${LOCALSTATEDIR}/run/fireqos"
  114. else
  115. warning "FireQOS is installed as '${fireqos}', its installation config at '${fireqos_exec_dir}/install.config' specifies local state data at '${LOCALSTATEDIR}/run/fireqos', but this directory is not found or is not readable (check the permissions of its parents)."
  116. fi
  117. else
  118. warning "Although FireQOS is installed on this system as '${fireqos}', I cannot find/read its installation configuration at '${fireqos_exec_dir}/install.config'."
  119. fi
  120. else
  121. warning "FireQOS is not installed on this system. Use FireQOS to apply traffic QoS and expose the class names to netdata. Check https://github.com/netdata/netdata/tree/master/collectors/tc.plugin#tcplugin"
  122. fi
  123. fi
  124. # -----------------------------------------------------------------------------
  125. [ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="$(dirname "${0}")"
  126. [ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@"
  127. [ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@"
  128. plugins_dir="${NETDATA_PLUGINS_DIR}"
  129. tc="$(command -v tc 2>/dev/null)"
  130. # -----------------------------------------------------------------------------
  131. # user configuration
  132. # time in seconds to refresh QoS class/qdisc names
  133. qos_get_class_names_every=120
  134. # time in seconds to exit - netdata will restart the script
  135. qos_exit_every=3600
  136. # what to use? classes or qdiscs?
  137. tc_show="qdisc" # can also be "class"
  138. # -----------------------------------------------------------------------------
  139. # check if we have a valid number for interval
  140. t=${1}
  141. update_every=$((t))
  142. [ $((update_every)) -lt 1 ] && update_every=${NETDATA_UPDATE_EVERY}
  143. [ $((update_every)) -lt 1 ] && update_every=1
  144. # -----------------------------------------------------------------------------
  145. # allow the user to override our defaults
  146. for CONFIG in "${NETDATA_STOCK_CONFIG_DIR}/tc-qos-helper.conf" "${NETDATA_USER_CONFIG_DIR}/tc-qos-helper.conf"; do
  147. if [ -f "${CONFIG}" ]; then
  148. info "Loading config file '${CONFIG}'..."
  149. #shellcheck source=/dev/null
  150. source "${CONFIG}" || error "Failed to load config file '${CONFIG}'."
  151. else
  152. warning "Cannot find file '${CONFIG}'."
  153. fi
  154. done
  155. case "${tc_show}" in
  156. qdisc | class) ;;
  157. *)
  158. error "tc_show variable can be either 'qdisc' or 'class' but is set to '${tc_show}'. Assuming it is 'qdisc'."
  159. tc_show="qdisc"
  160. ;;
  161. esac
  162. # -----------------------------------------------------------------------------
  163. # default sleep function
  164. LOOPSLEEPMS_LASTWORK=0
  165. loopsleepms() {
  166. sleep "$1"
  167. }
  168. # if found and included, this file overwrites loopsleepms()
  169. # with a high resolution timer function for precise looping.
  170. #shellcheck source=/dev/null
  171. . "${plugins_dir}/loopsleepms.sh.inc"
  172. # -----------------------------------------------------------------------------
  173. # final checks we can run
  174. if [ -z "${tc}" ] || [ ! -x "${tc}" ]; then
  175. fatal "cannot find command 'tc' in this system."
  176. fi
  177. tc_devices=
  178. fix_names=
  179. # -----------------------------------------------------------------------------
  180. setclassname() {
  181. if [ "${tc_show}" = "qdisc" ]; then
  182. echo "SETCLASSNAME $4 $2"
  183. else
  184. echo "SETCLASSNAME $3 $2"
  185. fi
  186. }
  187. show_tc_cls() {
  188. [ "${tc_show}" = "qdisc" ] && return 1
  189. local x="${1}"
  190. if [ -f /etc/iproute2/tc_cls ]; then
  191. local classid name rest
  192. while read -r classid name rest; do
  193. if [ -z "${classid}" ] ||
  194. [ -z "${name}" ] ||
  195. [ "${classid}" = "#" ] ||
  196. [ "${name}" = "#" ] ||
  197. [ "${classid:0:1}" = "#" ] ||
  198. [ "${name:0:1}" = "#" ]; then
  199. continue
  200. fi
  201. setclassname "" "${name}" "${classid}"
  202. done </etc/iproute2/tc_cls
  203. return 0
  204. fi
  205. return 1
  206. }
  207. show_fireqos_names() {
  208. local x="${1}" name n interface_dev interface_classes_monitor
  209. if [ -f "${fireqos_run_dir}/ifaces/${x}" ]; then
  210. name="$(<"${fireqos_run_dir}/ifaces/${x}")"
  211. echo "SETDEVICENAME ${name}" || exit
  212. #shellcheck source=/dev/null
  213. source "${fireqos_run_dir}/${name}.conf"
  214. for n in ${interface_classes_monitor}; do
  215. # shellcheck disable=SC2086
  216. setclassname ${n//|/ }
  217. done
  218. [ -n "${interface_dev}" ] && echo "SETDEVICEGROUP ${interface_dev}" || exit
  219. return 0
  220. fi
  221. return 1
  222. }
  223. show_tc() {
  224. local x="${1}"
  225. echo "BEGIN ${x}" || exit
  226. # netdata can parse the output of tc
  227. ${tc} -s ${tc_show} show dev "${x}"
  228. # check FireQOS names for classes
  229. if [ -n "${fix_names}" ]; then
  230. show_fireqos_names "${x}" || show_tc_cls "${x}"
  231. fi
  232. echo "END ${x}" || exit
  233. }
  234. find_tc_devices() {
  235. local count=0 devs dev rest l
  236. # find all the devices in the system
  237. # without forking
  238. while IFS=":| " read -r dev rest; do
  239. count=$((count + 1))
  240. [ ${count} -le 2 ] && continue
  241. devs="${devs} ${dev}"
  242. done </proc/net/dev
  243. # from all the devices find the ones
  244. # that have QoS defined
  245. # unfortunately, one fork per device cannot be avoided
  246. tc_devices=
  247. for dev in ${devs}; do
  248. l="$(${tc} class show dev "${dev}" 2>/dev/null)"
  249. [ -n "${l}" ] && tc_devices="${tc_devices} ${dev}"
  250. done
  251. }
  252. # update devices and class names
  253. # once every 2 minutes
  254. names_every=$((qos_get_class_names_every / update_every))
  255. # exit this script every hour
  256. # it will be restarted automatically
  257. exit_after=$((qos_exit_every / update_every))
  258. c=0
  259. gc=0
  260. while true; do
  261. fix_names=
  262. c=$((c + 1))
  263. gc=$((gc + 1))
  264. if [ ${c} -le 1 ] || [ ${c} -ge ${names_every} ]; then
  265. c=1
  266. fix_names="YES"
  267. find_tc_devices
  268. fi
  269. for d in ${tc_devices}; do
  270. show_tc "${d}"
  271. done
  272. echo "WORKTIME ${LOOPSLEEPMS_LASTWORK}" || exit
  273. loopsleepms "${update_every}"
  274. [ ${gc} -gt ${exit_after} ] && exit 0
  275. done