cgroup-network-helper.sh.in 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. #!/usr/bin/env bash
  2. # shellcheck disable=SC1117
  3. # cgroup-network-helper.sh
  4. # detect container and virtual machine interfaces
  5. #
  6. # (C) 2023 Netdata Inc.
  7. # SPDX-License-Identifier: GPL-3.0-or-later
  8. #
  9. # This script is called as root (by cgroup-network), with either a pid, or a cgroup path.
  10. # It tries to find all the network interfaces that belong to the same cgroup.
  11. #
  12. # It supports several method for this detection:
  13. #
  14. # 1. cgroup-network (the binary father of this script) detects veth network interfaces,
  15. # by examining iflink and ifindex IDs and switching namespaces
  16. # (it also detects the interface name as it is used by the container).
  17. #
  18. # 2. this script, uses /proc/PID/fdinfo to find tun/tap network interfaces.
  19. #
  20. # 3. this script, calls virsh to find libvirt network interfaces.
  21. #
  22. # -----------------------------------------------------------------------------
  23. # the system path is cleared by cgroup-network
  24. # shellcheck source=/dev/null
  25. [ -f /etc/profile ] && source /etc/profile
  26. export PATH="${PATH}:@sbindir_POST@"
  27. export LC_ALL=C
  28. cmd_line="'${0}' $(printf "'%s' " "${@}")"
  29. # -----------------------------------------------------------------------------
  30. # logging
  31. PROGRAM_NAME="$(basename "${0}")"
  32. # these should be the same with syslog() priorities
  33. NDLP_EMERG=0 # system is unusable
  34. NDLP_ALERT=1 # action must be taken immediately
  35. NDLP_CRIT=2 # critical conditions
  36. NDLP_ERR=3 # error conditions
  37. NDLP_WARN=4 # warning conditions
  38. NDLP_NOTICE=5 # normal but significant condition
  39. NDLP_INFO=6 # informational
  40. NDLP_DEBUG=7 # debug-level messages
  41. # the max (numerically) log level we will log
  42. LOG_LEVEL=$NDLP_INFO
  43. set_log_min_priority() {
  44. case "${NETDATA_LOG_LEVEL,,}" in
  45. "emerg" | "emergency")
  46. LOG_LEVEL=$NDLP_EMERG
  47. ;;
  48. "alert")
  49. LOG_LEVEL=$NDLP_ALERT
  50. ;;
  51. "crit" | "critical")
  52. LOG_LEVEL=$NDLP_CRIT
  53. ;;
  54. "err" | "error")
  55. LOG_LEVEL=$NDLP_ERR
  56. ;;
  57. "warn" | "warning")
  58. LOG_LEVEL=$NDLP_WARN
  59. ;;
  60. "notice")
  61. LOG_LEVEL=$NDLP_NOTICE
  62. ;;
  63. "info")
  64. LOG_LEVEL=$NDLP_INFO
  65. ;;
  66. "debug")
  67. LOG_LEVEL=$NDLP_DEBUG
  68. ;;
  69. esac
  70. }
  71. set_log_min_priority
  72. log() {
  73. local level="${1}"
  74. shift 1
  75. [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
  76. systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG
  77. INVOCATION_ID=${NETDATA_INVOCATION_ID}
  78. SYSLOG_IDENTIFIER=${PROGRAM_NAME}
  79. PRIORITY=${level}
  80. THREAD_TAG=cgroup-network-helper
  81. ND_LOG_SOURCE=collector
  82. ND_REQUEST=${cmd_line}
  83. MESSAGE=${*//\\n/--NEWLINE--}
  84. EOFLOG
  85. # AN EMPTY LINE IS NEEDED ABOVE
  86. }
  87. info() {
  88. log "$NDLP_INFO" "${@}"
  89. }
  90. warning() {
  91. log "$NDLP_WARN" "${@}"
  92. }
  93. error() {
  94. log "$NDLP_ERR" "${@}"
  95. }
  96. fatal() {
  97. log "$NDLP_ALERT" "${@}"
  98. exit 1
  99. }
  100. debug() {
  101. log "$NDLP_DEBUG" "${@}"
  102. }
  103. debug=0
  104. if [ "${NETDATA_CGROUP_NETWORK_HELPER_DEBUG-0}" = "1" ]; then
  105. debug=1
  106. LOG_LEVEL=$NDLP_DEBUG
  107. fi
  108. # -----------------------------------------------------------------------------
  109. # check for BASH v4+ (required for associative arrays)
  110. if [ ${BASH_VERSINFO[0]} -lt 4 ]; then
  111. echo >&2 "BASH version 4 or later is required (this is ${BASH_VERSION})."
  112. exit 1
  113. fi
  114. # -----------------------------------------------------------------------------
  115. # parse the arguments
  116. pid=
  117. cgroup=
  118. while [ -n "${1}" ]
  119. do
  120. case "${1}" in
  121. --cgroup) cgroup="${2}"; shift 1;;
  122. --pid|-p) pid="${2}"; shift 1;;
  123. --debug|debug)
  124. debug=1
  125. LOG_LEVEL=$NDLP_DEBUG
  126. ;;
  127. *) fatal "Cannot understand argument '${1}'";;
  128. esac
  129. shift
  130. done
  131. if [ -z "${pid}" ] && [ -z "${cgroup}" ]
  132. then
  133. fatal "Either --pid or --cgroup is required"
  134. fi
  135. # -----------------------------------------------------------------------------
  136. set_source() {
  137. [ ${debug} -eq 1 ] && echo "SRC ${*}"
  138. }
  139. # -----------------------------------------------------------------------------
  140. # veth interfaces via cgroup
  141. # cgroup-network can detect veth interfaces by itself (written in C).
  142. # If you seek for a shell version of what it does, check this:
  143. # https://github.com/netdata/netdata/issues/474#issuecomment-317866709
  144. # -----------------------------------------------------------------------------
  145. # tun/tap interfaces via /proc/PID/fdinfo
  146. # find any tun/tap devices linked to a pid
  147. proc_pid_fdinfo_iff() {
  148. local p="${1}" # the pid
  149. debug "Searching for tun/tap interfaces for pid ${p}..."
  150. set_source "fdinfo"
  151. grep "^iff:.*" "${NETDATA_HOST_PREFIX}/proc/${p}/fdinfo"/* 2>/dev/null | cut -f 2
  152. }
  153. find_tun_tap_interfaces_for_cgroup() {
  154. local c="${1}" # the cgroup path
  155. [ -d "${c}/emulator" ] && c="${c}/emulator" # check for 'emulator' subdirectory
  156. c="${c}/cgroup.procs" # make full path
  157. # for each pid of the cgroup
  158. # find any tun/tap devices linked to the pid
  159. if [ -f "${c}" ]
  160. then
  161. local p
  162. for p in $(< "${c}" )
  163. do
  164. proc_pid_fdinfo_iff "${p}"
  165. done
  166. else
  167. debug "Cannot find file '${c}', not searching for tun/tap interfaces."
  168. fi
  169. }
  170. # -----------------------------------------------------------------------------
  171. # virsh domain network interfaces
  172. virsh_cgroup_to_domain_name() {
  173. local c="${1}" # the cgroup path
  174. debug "extracting a possible virsh domain from cgroup ${c}..."
  175. # extract for the cgroup path
  176. sed -n -e "s|.*/machine-qemu\\\\x2d[0-9]\+\\\\x2d\(.*\)\.scope$|\1|p" \
  177. -e "s|.*/machine/qemu-[0-9]\+-\(.*\)\.libvirt-qemu$|\1|p" \
  178. -e "s|.*/machine/\(.*\)\.libvirt-qemu$|\1|p" \
  179. <<EOF
  180. ${c}
  181. EOF
  182. }
  183. virsh_find_all_interfaces_for_cgroup() {
  184. local c="${1}" # the cgroup path
  185. # the virsh command
  186. local virsh
  187. # shellcheck disable=SC2230
  188. virsh="$(which virsh 2>/dev/null || command -v virsh 2>/dev/null)"
  189. if [ -n "${virsh}" ]
  190. then
  191. local d
  192. d="$(virsh_cgroup_to_domain_name "${c}")"
  193. # convert hex to character
  194. # e.g.: vm01\x2dweb => vm01-web (https://github.com/netdata/netdata/issues/11088#issuecomment-832618149)
  195. d="$(printf '%b' "${d}")"
  196. if [ -n "${d}" ]
  197. then
  198. debug "running: virsh domiflist ${d}; to find the network interfaces"
  199. # 'virsh -r domiflist <domain>' example output
  200. # Interface Type Source Model MAC
  201. #--------------------------------------------------------------
  202. # vnet3 bridge br0 virtio 52:54:00:xx:xx:xx
  203. # vnet4 network default virtio 52:54:00:yy:yy:yy
  204. # match only 'network' interfaces from virsh output
  205. set_source "virsh"
  206. "${virsh}" -r domiflist "${d}" |\
  207. sed -n \
  208. -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+network[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" \
  209. -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+bridge[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p"
  210. else
  211. debug "no virsh domain extracted from cgroup ${c}"
  212. fi
  213. else
  214. debug "virsh command is not available"
  215. fi
  216. }
  217. # -----------------------------------------------------------------------------
  218. # netnsid detected interfaces
  219. netnsid_find_all_interfaces_for_pid() {
  220. local pid="${1}"
  221. [ -z "${pid}" ] && return 1
  222. local nsid
  223. nsid=$(lsns -t net -p "${pid}" -o NETNSID -nr 2>/dev/null)
  224. if [ -z "${nsid}" ] || [ "${nsid}" = "unassigned" ]; then
  225. return 1
  226. fi
  227. set_source "netnsid"
  228. ip link show |\
  229. grep -B 1 -E " link-netnsid ${nsid}($| )" |\
  230. sed -n -e "s|^[[:space:]]*[0-9]\+:[[:space:]]\+\([A-Za-z0-9_]\+\)\(@[A-Za-z0-9_]\+\)*:[[:space:]].*$|\1|p"
  231. }
  232. netnsid_find_all_interfaces_for_cgroup() {
  233. local c="${1}" # the cgroup path
  234. if [ -f "${c}/cgroup.procs" ]; then
  235. netnsid_find_all_interfaces_for_pid "$(head -n 1 "${c}/cgroup.procs" 2>/dev/null)"
  236. else
  237. debug "Cannot find file '${c}/cgroup.procs', not searching for netnsid interfaces."
  238. fi
  239. }
  240. # -----------------------------------------------------------------------------
  241. find_all_interfaces_of_pid_or_cgroup() {
  242. local p="${1}" c="${2}" # the pid and the cgroup path
  243. if [ -n "${pid}" ]
  244. then
  245. # we have been called with a pid
  246. proc_pid_fdinfo_iff "${p}"
  247. netnsid_find_all_interfaces_for_pid "${p}"
  248. elif [ -n "${c}" ]
  249. then
  250. # we have been called with a cgroup
  251. info "searching for network interfaces of cgroup '${c}'"
  252. find_tun_tap_interfaces_for_cgroup "${c}"
  253. virsh_find_all_interfaces_for_cgroup "${c}"
  254. netnsid_find_all_interfaces_for_cgroup "${c}"
  255. else
  256. error "Either a pid or a cgroup path is needed"
  257. return 1
  258. fi
  259. return 0
  260. }
  261. # -----------------------------------------------------------------------------
  262. # an associative array to store the interfaces
  263. # the index is the interface name as seen by the host
  264. # the value is the interface name as seen by the guest / container
  265. declare -A devs=()
  266. # store all interfaces found in the associative array
  267. # this will also give the unique devices, as seen by the host
  268. last_src=
  269. # shellcheck disable=SC2162
  270. while read host_device guest_device
  271. do
  272. [ -z "${host_device}" ] && continue
  273. [ "${host_device}" = "SRC" ] && last_src="${guest_device}" && continue
  274. # the default guest_device is the host_device
  275. [ -z "${guest_device}" ] && guest_device="${host_device}"
  276. # when we run in debug, show the source
  277. debug "Found host device '${host_device}', guest device '${guest_device}', detected via '${last_src}'"
  278. if [ -z "${devs[${host_device}]}" ] || [ "${devs[${host_device}]}" = "${host_device}" ]; then
  279. devs[${host_device}]="${guest_device}"
  280. fi
  281. done < <( find_all_interfaces_of_pid_or_cgroup "${pid}" "${cgroup}" )
  282. # print the interfaces found, in the format netdata expects them
  283. found=0
  284. for x in "${!devs[@]}"
  285. do
  286. found=$((found + 1))
  287. echo "${x} ${devs[${x}]}"
  288. done
  289. debug "found ${found} network interfaces for pid '${pid}', cgroup '${cgroup}', run as ${USER}, ${UID}"
  290. # let netdata know if we found any
  291. [ ${found} -eq 0 ] && exit 1
  292. exit 0