tc-qos-helper.sh.in 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. #!/usr/bin/env bash
  2. # netdata
  3. # real-time performance and health monitoring, done right!
  4. # (C) 2017 Costa Tsaousis <costa@tsaousis.gr>
  5. # SPDX-License-Identifier: GPL-3.0-or-later
  6. #
  7. # This script is a helper to allow netdata collect tc data.
  8. # tc output parsing has been implemented in C, inside netdata
  9. # This script allows setting names to dimensions.
  10. export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin"
  11. export LC_ALL=C
  12. # -----------------------------------------------------------------------------
  13. # logging functions
  14. PROGRAM_NAME="$(basename "$0")"
  15. PROGRAM_NAME="${PROGRAM_NAME/.plugin/}"
  16. logdate() {
  17. date "+%Y-%m-%d %H:%M:%S"
  18. }
  19. log() {
  20. local status="${1}"
  21. shift
  22. echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}"
  23. }
  24. warning() {
  25. log WARNING "${@}"
  26. }
  27. error() {
  28. log ERROR "${@}"
  29. }
  30. info() {
  31. log INFO "${@}"
  32. }
  33. fatal() {
  34. log FATAL "${@}"
  35. exit 1
  36. }
  37. debug=0
  38. debug() {
  39. [ $debug -eq 1 ] && log DEBUG "${@}"
  40. }
  41. # -----------------------------------------------------------------------------
  42. # find /var/run/fireqos
  43. # the default
  44. fireqos_run_dir="/var/run/fireqos"
  45. function realdir() {
  46. local r
  47. local t
  48. r="$1"
  49. t="$(readlink "$r")"
  50. while [ "$t" ]; do
  51. r=$(cd "$(dirname "$r")" && cd "$(dirname "$t")" && pwd -P)/$(basename "$t")
  52. t=$(readlink "$r")
  53. done
  54. dirname "$r"
  55. }
  56. if [ ! -d "${fireqos_run_dir}" ]; then
  57. # the fireqos executable - we will use it to find its config
  58. fireqos="$(command -v fireqos 2>/dev/null)"
  59. if [ -n "${fireqos}" ]; then
  60. fireqos_exec_dir="$(realdir "${fireqos}")"
  61. if [ -n "${fireqos_exec_dir}" ] && [ "${fireqos_exec_dir}" != "." ] && [ -f "${fireqos_exec_dir}/install.config" ]; then
  62. LOCALSTATEDIR=
  63. #shellcheck source=/dev/null
  64. source "${fireqos_exec_dir}/install.config"
  65. if [ -d "${LOCALSTATEDIR}/run/fireqos" ]; then
  66. fireqos_run_dir="${LOCALSTATEDIR}/run/fireqos"
  67. else
  68. warning "FireQOS is installed as '${fireqos}', its installation config at '${fireqos_exec_dir}/install.config' specifies local state data at '${LOCALSTATEDIR}/run/fireqos', but this directory is not found or is not readable (check the permissions of its parents)."
  69. fi
  70. else
  71. warning "Although FireQOS is installed on this system as '${fireqos}', I cannot find/read its installation configuration at '${fireqos_exec_dir}/install.config'."
  72. fi
  73. else
  74. warning "FireQOS is not installed on this system. Use FireQOS to apply traffic QoS and expose the class names to netdata. Check https://github.com/netdata/netdata/tree/master/collectors/tc.plugin#tcplugin"
  75. fi
  76. fi
  77. # -----------------------------------------------------------------------------
  78. [ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="$(dirname "${0}")"
  79. [ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@"
  80. [ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@"
  81. plugins_dir="${NETDATA_PLUGINS_DIR}"
  82. tc="$(command -v tc 2>/dev/null)"
  83. # -----------------------------------------------------------------------------
  84. # user configuration
  85. # time in seconds to refresh QoS class/qdisc names
  86. qos_get_class_names_every=120
  87. # time in seconds to exit - netdata will restart the script
  88. qos_exit_every=3600
  89. # what to use? classes or qdiscs?
  90. tc_show="qdisc" # can also be "class"
  91. # -----------------------------------------------------------------------------
  92. # check if we have a valid number for interval
  93. t=${1}
  94. update_every=$((t))
  95. [ $((update_every)) -lt 1 ] && update_every=${NETDATA_UPDATE_EVERY}
  96. [ $((update_every)) -lt 1 ] && update_every=1
  97. # -----------------------------------------------------------------------------
  98. # allow the user to override our defaults
  99. for CONFIG in "${NETDATA_STOCK_CONFIG_DIR}/tc-qos-helper.conf" "${NETDATA_USER_CONFIG_DIR}/tc-qos-helper.conf"; do
  100. if [ -f "${CONFIG}" ]; then
  101. info "Loading config file '${CONFIG}'..."
  102. #shellcheck source=/dev/null
  103. source "${CONFIG}" || error "Failed to load config file '${CONFIG}'."
  104. else
  105. warning "Cannot find file '${CONFIG}'."
  106. fi
  107. done
  108. case "${tc_show}" in
  109. qdisc | class) ;;
  110. *)
  111. error "tc_show variable can be either 'qdisc' or 'class' but is set to '${tc_show}'. Assuming it is 'qdisc'."
  112. tc_show="qdisc"
  113. ;;
  114. esac
  115. # -----------------------------------------------------------------------------
  116. # default sleep function
  117. LOOPSLEEPMS_LASTWORK=0
  118. loopsleepms() {
  119. sleep "$1"
  120. }
  121. # if found and included, this file overwrites loopsleepms()
  122. # with a high resolution timer function for precise looping.
  123. #shellcheck source=/dev/null
  124. . "${plugins_dir}/loopsleepms.sh.inc"
  125. # -----------------------------------------------------------------------------
  126. # final checks we can run
  127. if [ -z "${tc}" ] || [ ! -x "${tc}" ]; then
  128. fatal "cannot find command 'tc' in this system."
  129. fi
  130. tc_devices=
  131. fix_names=
  132. # -----------------------------------------------------------------------------
  133. setclassname() {
  134. if [ "${tc_show}" = "qdisc" ]; then
  135. echo "SETCLASSNAME $4 $2"
  136. else
  137. echo "SETCLASSNAME $3 $2"
  138. fi
  139. }
  140. show_tc_cls() {
  141. [ "${tc_show}" = "qdisc" ] && return 1
  142. local x="${1}"
  143. if [ -f /etc/iproute2/tc_cls ]; then
  144. local classid name rest
  145. while read -r classid name rest; do
  146. if [ -z "${classid}" ] ||
  147. [ -z "${name}" ] ||
  148. [ "${classid}" = "#" ] ||
  149. [ "${name}" = "#" ] ||
  150. [ "${classid:0:1}" = "#" ] ||
  151. [ "${name:0:1}" = "#" ]; then
  152. continue
  153. fi
  154. setclassname "" "${name}" "${classid}"
  155. done </etc/iproute2/tc_cls
  156. return 0
  157. fi
  158. return 1
  159. }
  160. show_fireqos_names() {
  161. local x="${1}" name n interface_dev interface_classes_monitor
  162. if [ -f "${fireqos_run_dir}/ifaces/${x}" ]; then
  163. name="$(<"${fireqos_run_dir}/ifaces/${x}")"
  164. echo "SETDEVICENAME ${name}" || exit
  165. #shellcheck source=/dev/null
  166. source "${fireqos_run_dir}/${name}.conf"
  167. for n in ${interface_classes_monitor}; do
  168. # shellcheck disable=SC2086
  169. setclassname ${n//|/ }
  170. done
  171. [ -n "${interface_dev}" ] && echo "SETDEVICEGROUP ${interface_dev}" || exit
  172. return 0
  173. fi
  174. return 1
  175. }
  176. show_tc() {
  177. local x="${1}"
  178. echo "BEGIN ${x}" || exit
  179. # netdata can parse the output of tc
  180. ${tc} -s ${tc_show} show dev "${x}"
  181. # check FireQOS names for classes
  182. if [ -n "${fix_names}" ]; then
  183. show_fireqos_names "${x}" || show_tc_cls "${x}"
  184. fi
  185. echo "END ${x}" || exit
  186. }
  187. find_tc_devices() {
  188. local count=0 devs dev rest l
  189. # find all the devices in the system
  190. # without forking
  191. while IFS=":| " read -r dev rest; do
  192. count=$((count + 1))
  193. [ ${count} -le 2 ] && continue
  194. devs="${devs} ${dev}"
  195. done </proc/net/dev
  196. # from all the devices find the ones
  197. # that have QoS defined
  198. # unfortunately, one fork per device cannot be avoided
  199. tc_devices=
  200. for dev in ${devs}; do
  201. l="$(${tc} class show dev "${dev}" 2>/dev/null)"
  202. [ -n "${l}" ] && tc_devices="${tc_devices} ${dev}"
  203. done
  204. }
  205. # update devices and class names
  206. # once every 2 minutes
  207. names_every=$((qos_get_class_names_every / update_every))
  208. # exit this script every hour
  209. # it will be restarted automatically
  210. exit_after=$((qos_exit_every / update_every))
  211. c=0
  212. gc=0
  213. while true; do
  214. fix_names=
  215. c=$((c + 1))
  216. gc=$((gc + 1))
  217. if [ ${c} -le 1 ] || [ ${c} -ge ${names_every} ]; then
  218. c=1
  219. fix_names="YES"
  220. find_tc_devices
  221. fi
  222. for d in ${tc_devices}; do
  223. show_tc "${d}"
  224. done
  225. echo "WORKTIME ${LOOPSLEEPMS_LASTWORK}" || exit
  226. loopsleepms ${update_every}
  227. [ ${gc} -gt ${exit_after} ] && exit 0
  228. done