cgroup-name.sh 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. #!/usr/bin/env bash
  2. #shellcheck disable=SC2001
  3. # netdata
  4. # real-time performance and health monitoring, done right!
  5. # (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
  6. # SPDX-License-Identifier: GPL-3.0-or-later
  7. #
  8. # Script to find a better name for cgroups
  9. #
  10. export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin"
  11. export LC_ALL=C
  12. # -----------------------------------------------------------------------------
  13. PROGRAM_NAME="$(basename "${0}")"
  14. logdate() {
  15. date "+%Y-%m-%d %H:%M:%S"
  16. }
  17. log() {
  18. local status="${1}"
  19. shift
  20. echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}"
  21. }
  22. warning() {
  23. log WARNING "${@}"
  24. }
  25. error() {
  26. log ERROR "${@}"
  27. }
  28. info() {
  29. log INFO "${@}"
  30. }
  31. fatal() {
  32. log FATAL "${@}"
  33. exit 1
  34. }
  35. function docker_like_get_name_command() {
  36. local command="${1}"
  37. local id="${2}"
  38. info "Running command: ${command} ps --filter=id=\"${id}\" --format=\"{{.Names}}\""
  39. NAME="$(${command} ps --filter=id="${id}" --format="{{.Names}}")"
  40. return 0
  41. }
  42. function docker_like_get_name_api() {
  43. local host_var="${1}"
  44. local host="${!host_var}"
  45. local path="/containers/${2}/json"
  46. if [ -z "${host}" ]; then
  47. warning "No ${host_var} is set"
  48. return 1
  49. fi
  50. if ! command -v jq > /dev/null 2>&1; then
  51. warning "Can't find jq command line tool. jq is required for netdata to retrieve container name using ${host} API, falling back to docker ps"
  52. return 1
  53. fi
  54. if [ -S "${host}" ]; then
  55. info "Running API command: curl --unix-socket \"${host}\" http://localhost${path}"
  56. JSON=$(curl -sS --unix-socket "${host}" "http://localhost${path}")
  57. else
  58. info "Running API command: curl \"${host}${path}\""
  59. JSON=$(curl -sS "${host}${path}")
  60. fi
  61. NAME=$(echo "${JSON}" | jq -r .Name,.Config.Hostname | grep -v null | head -n1 | sed 's|^/||')
  62. return 0
  63. }
  64. # get_lbl_val returns the value for the label with the given name.
  65. # Returns "null" string if the label doesn't exist.
  66. # Expected labels format: 'name="value",...'.
  67. function get_lbl_val() {
  68. local labels want_name
  69. labels="${1}"
  70. want_name="${2}"
  71. IFS=, read -ra labels <<< "$labels"
  72. local lname lval
  73. for l in "${labels[@]}"; do
  74. IFS="=" read -r lname lval <<< "$l"
  75. if [ "$want_name" = "$lname" ] && [ -n "$lval" ]; then
  76. echo "${lval:1:-1}" # trim "
  77. return 0
  78. fi
  79. done
  80. echo "null"
  81. return 1
  82. }
  83. function add_lbl_prefix() {
  84. local orig_labels prefix
  85. orig_labels="${1}"
  86. prefix="${2}"
  87. IFS=, read -ra labels <<< "$orig_labels"
  88. local new_labels
  89. for l in "${labels[@]}"; do
  90. new_labels+="${prefix}${l},"
  91. done
  92. echo "${new_labels:0:-1}" # trim last ','
  93. }
  94. # k8s_get_kubepod_name resolves */kubepods/* cgroup name.
  95. # pod level cgroup name format: 'pod_<namespace>_<pod_name>'
  96. # container level cgroup name format: 'cntr_<namespace>_<pod_name>_<container_name>'
  97. function k8s_get_kubepod_name() {
  98. # GKE /sys/fs/cgroup/*/ (cri=docker, cgroups=v1):
  99. # |-- kubepods
  100. # | |-- burstable
  101. # | | |-- pod98cee708-023b-11eb-933d-42010a800193
  102. # | | | |-- 922161c98e6ea450bf665226cdc64ca2aa3e889934c2cff0aec4325f8f78ac03
  103. # | `-- pode314bbac-d577-11ea-a171-42010a80013b
  104. # | |-- 7d505356b04507de7b710016d540b2759483ed5f9136bb01a80872b08f771930
  105. #
  106. # GKE /sys/fs/cgroup/*/ (cri=containerd, cgroups=v1):
  107. # |-- kubepods.slice
  108. # | |-- kubepods-besteffort.slice
  109. # | | |-- kubepods-besteffort-pode1465238_4518_4c21_832f_fd9f87033dad.slice
  110. # | | | |-- cri-containerd-66be9b2efdf4d85288c319b8c1a2f50d2439b5617e36f45d9d0d0be1381113be.scope
  111. # | `-- kubepods-pod91f5b561_369f_4103_8015_66391059996a.slice
  112. # | |-- cri-containerd-24c53b774a586f06abc058619b47f71d9d869ac50c92898adbd199106fd0aaeb.scope
  113. #
  114. # GKE /sys/fs/cgroup/*/ (cri=crio, cgroups=v1):
  115. # |-- kubepods.slice
  116. # | |-- kubepods-besteffort.slice
  117. # | | |-- kubepods-besteffort-podad412dfe_3589_4056_965a_592356172968.slice
  118. # | | | |-- crio-77b019312fd9825828b70214b2c94da69c30621af2a7ee06f8beace4bc9439e5.scope
  119. #
  120. # Minikube (v1.8.2) /sys/fs/cgroup/*/ (cri=docker, cgroups=v1):
  121. # |-- kubepods.slice
  122. # | |-- kubepods-besteffort.slice
  123. # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice
  124. # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope
  125. #
  126. # NOTE: cgroups plugin
  127. # - uses '_' to join dir names (so it is <parent>_<child>_<child>_...)
  128. # - replaces '.' with '-'
  129. local fn="${FUNCNAME[0]}"
  130. local id="${1}"
  131. if [[ ! $id =~ ^kubepods ]]; then
  132. warning "${fn}: '${id}' is not kubepod cgroup."
  133. return 1
  134. fi
  135. local clean_id="$id"
  136. clean_id=${clean_id//.slice/}
  137. clean_id=${clean_id//.scope/}
  138. local name pod_uid cntr_id
  139. if [[ $clean_id == "kubepods" ]]; then
  140. name="$clean_id"
  141. elif [[ $clean_id =~ .+(besteffort|burstable|guaranteed)$ ]]; then
  142. # kubepods_<QOS_CLASS>
  143. # kubepods_kubepods-<QOS_CLASS>
  144. name=${clean_id//-/_}
  145. name=${name/#kubepods_kubepods/kubepods}
  146. elif [[ $clean_id =~ .+pod[a-f0-9_-]+_(docker|crio|cri-containerd)-([a-f0-9]+)$ ]]; then
  147. # ...pod<POD_UID>_(docker|crio|cri-containerd)-<CONTAINER_ID> (POD_UID w/ "_")
  148. cntr_id=${BASH_REMATCH[2]}
  149. elif [[ $clean_id =~ .+pod[a-f0-9-]+_([a-f0-9]+)$ ]]; then
  150. # ...pod<POD_UID>_<CONTAINER_ID>
  151. cntr_id=${BASH_REMATCH[1]}
  152. elif [[ $clean_id =~ .+pod([a-f0-9_-]+)$ ]]; then
  153. # ...pod<POD_UID> (POD_UID w/ and w/o "_")
  154. pod_uid=${BASH_REMATCH[1]}
  155. pod_uid=${pod_uid//_/-}
  156. fi
  157. if [ -n "$name" ]; then
  158. echo "$name"
  159. return 0
  160. fi
  161. if [ -z "$pod_uid" ] && [ -z "$cntr_id" ]; then
  162. warning "${fn}: can't extract pod_uid or container_id from the cgroup '$id'."
  163. return 1
  164. fi
  165. [ -n "$pod_uid" ] && info "${fn}: cgroup '$id' is a pod(uid:$pod_uid)"
  166. [ -n "$cntr_id" ] && info "${fn}: cgroup '$id' is a container(id:$cntr_id)"
  167. if ! command -v jq > /dev/null 2>&1; then
  168. warning "${fn}: 'jq' command not available."
  169. return 1
  170. fi
  171. local kube_system_ns
  172. local tmp_kube_system_ns_file="${TMPDIR:-"/tmp/"}netdata-cgroups-kube-system-ns"
  173. [ -f "$tmp_kube_system_ns_file" ] && kube_system_ns=$(cat "$tmp_kube_system_ns_file" 2> /dev/null)
  174. local pods
  175. if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_PORT_443_TCP_PORT}" ]; then
  176. local token header host url
  177. token="$(< /var/run/secrets/kubernetes.io/serviceaccount/token)"
  178. header="Authorization: Bearer $token"
  179. host="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT"
  180. if [ -z "$kube_system_ns" ]; then
  181. url="https://$host/api/v1/namespaces/kube-system"
  182. # FIX: check HTTP response code
  183. if ! kube_system_ns=$(curl -sSk -H "$header" "$url" 2>&1); then
  184. warning "${fn}: error on curl '${url}': ${kube_system_ns}."
  185. else
  186. echo "$kube_system_ns" > "$tmp_kube_system_ns_file" 2> /dev/null
  187. fi
  188. fi
  189. url="https://$host/api/v1/pods"
  190. [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME"
  191. # FIX: check HTTP response code
  192. if ! pods=$(curl -sSk -H "$header" "$url" 2>&1); then
  193. warning "${fn}: error on curl '${url}': ${pods}."
  194. return 1
  195. fi
  196. elif ps -C kubelet > /dev/null 2>&1 && command -v kubectl > /dev/null 2>&1; then
  197. if [ -z "$kube_system_ns" ]; then
  198. if ! kube_system_ns=$(kubectl get namespaces kube-system -o json 2>&1); then
  199. warning "${fn}: error on 'kubectl': ${kube_system_ns}."
  200. else
  201. echo "$kube_system_ns" > "$tmp_kube_system_ns_file" 2> /dev/null
  202. fi
  203. fi
  204. [[ -z ${KUBE_CONFIG+x} ]] && KUBE_CONFIG="/etc/kubernetes/admin.conf"
  205. if ! pods=$(kubectl --kubeconfig="$KUBE_CONFIG" get pods --all-namespaces -o json 2>&1); then
  206. warning "${fn}: error on 'kubectl': ${pods}."
  207. return 1
  208. fi
  209. else
  210. warning "${fn}: not inside the k8s cluster and 'kubectl' command not available."
  211. return 1
  212. fi
  213. local kube_system_uid
  214. if [ -n "$kube_system_ns" ] && ! kube_system_uid=$(jq -r '.metadata.uid' <<< "$kube_system_ns" 2>&1); then
  215. warning "${fn}: error on 'jq' parse kube_system_ns: ${kube_system_uid}."
  216. fi
  217. local jq_filter
  218. jq_filter+='.items[] | "'
  219. jq_filter+='namespace=\"\(.metadata.namespace)\",'
  220. jq_filter+='pod_name=\"\(.metadata.name)\",'
  221. jq_filter+='pod_uid=\"\(.metadata.uid)\",'
  222. #jq_filter+='\(.metadata.labels | to_entries | map("pod_label_"+.key+"=\""+.value+"\"") | join(",") | if length > 0 then .+"," else . end)'
  223. jq_filter+='\((.metadata.ownerReferences[]? | select(.controller==true) | "controller_kind=\""+.kind+"\",controller_name=\""+.name+"\",") // "")'
  224. jq_filter+='node_name=\"\(.spec.nodeName)\",'
  225. jq_filter+='" + '
  226. jq_filter+='(.status.containerStatuses[]? | "'
  227. jq_filter+='container_name=\"\(.name)\",'
  228. jq_filter+='container_id=\"\(.containerID)\"'
  229. jq_filter+='") | '
  230. jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722
  231. local containers
  232. if ! containers=$(jq -r "${jq_filter}" <<< "$pods" 2>&1); then
  233. warning "${fn}: error on 'jq' parse pods: ${containers}."
  234. return 1
  235. fi
  236. local qos_class
  237. if [[ $clean_id =~ .+(besteffort|burstable) ]]; then
  238. qos_class="${BASH_REMATCH[1]}"
  239. else
  240. qos_class="guaranteed"
  241. fi
  242. # available labels:
  243. # namespace, pod_name, pod_uid, container_name, container_id, node_name
  244. local labels
  245. if [ -n "$cntr_id" ]; then
  246. if labels=$(grep "$cntr_id" <<< "$containers" 2> /dev/null); then
  247. labels+=',kind="container"'
  248. labels+=",qos_class=\"$qos_class\""
  249. [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\""
  250. name="cntr"
  251. name+="_$(get_lbl_val "$labels" namespace)"
  252. name+="_$(get_lbl_val "$labels" pod_name)"
  253. name+="_$(get_lbl_val "$labels" container_name)"
  254. labels=$(add_lbl_prefix "$labels" "k8s_")
  255. name+=" $labels"
  256. fi
  257. elif [ -n "$pod_uid" ]; then
  258. if labels=$(grep "$pod_uid" -m 1 <<< "$containers" 2> /dev/null); then
  259. labels="${labels%%,container_*}"
  260. labels+=',kind="pod"'
  261. labels+=",qos_class=\"$qos_class\""
  262. [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\""
  263. name="pod"
  264. name+="_$(get_lbl_val "$labels" namespace)"
  265. name+="_$(get_lbl_val "$labels" pod_name)"
  266. labels=$(add_lbl_prefix "$labels" "k8s_")
  267. name+=" $labels"
  268. fi
  269. fi
  270. # jq filter nonexistent field and nonexistent label value is 'null'
  271. if [[ $name =~ _null(_|$) ]]; then
  272. warning "${fn}: invalid name: $name (cgroup '$id')"
  273. name=""
  274. fi
  275. echo "$name"
  276. [ -n "$name" ]
  277. return
  278. }
  279. function k8s_get_name() {
  280. local fn="${FUNCNAME[0]}"
  281. local id="${1}"
  282. NAME=$(k8s_get_kubepod_name "$id")
  283. if [ -z "${NAME}" ]; then
  284. warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${id} and disabling it."
  285. NAME="${id}"
  286. NAME_NOT_FOUND=3
  287. else
  288. NAME="k8s_${NAME}"
  289. local name labels
  290. name=${NAME%% *}
  291. labels=${NAME#* }
  292. if [ "$name" != "$labels" ]; then
  293. info "${fn}: cgroup '${id}' has chart name '${name}', labels '${labels}"
  294. else
  295. info "${fn}: cgroup '${id}' has chart name '${NAME}'"
  296. fi
  297. fi
  298. }
  299. function docker_get_name() {
  300. local id="${1}"
  301. if hash docker 2> /dev/null; then
  302. docker_like_get_name_command docker "${id}"
  303. else
  304. docker_like_get_name_api DOCKER_HOST "${id}" || docker_like_get_name_command podman "${id}"
  305. fi
  306. if [ -z "${NAME}" ]; then
  307. warning "cannot find the name of docker container '${id}'"
  308. NAME_NOT_FOUND=2
  309. NAME="${id:0:12}"
  310. else
  311. info "docker container '${id}' is named '${NAME}'"
  312. fi
  313. }
  314. function docker_validate_id() {
  315. local id="${1}"
  316. if [ -n "${id}" ] && { [ ${#id} -eq 64 ] || [ ${#id} -eq 12 ]; }; then
  317. docker_get_name "${id}"
  318. else
  319. error "a docker id cannot be extracted from docker cgroup '${CGROUP}'."
  320. fi
  321. }
  322. function podman_get_name() {
  323. local id="${1}"
  324. # for Podman, prefer using the API if we can, as netdata will not normally have access
  325. # to other users' containers, so they will not be visible when running `podman ps`
  326. docker_like_get_name_api PODMAN_HOST "${id}" || docker_like_get_name_command podman "${id}"
  327. if [ -z "${NAME}" ]; then
  328. warning "cannot find the name of podman container '${id}'"
  329. NAME_NOT_FOUND=2
  330. NAME="${id:0:12}"
  331. else
  332. info "podman container '${id}' is named '${NAME}'"
  333. fi
  334. }
  335. function podman_validate_id() {
  336. local id="${1}"
  337. if [ -n "${id}" ] && [ ${#id} -eq 64 ]; then
  338. podman_get_name "${id}"
  339. else
  340. error "a podman id cannot be extracted from docker cgroup '${CGROUP}'."
  341. fi
  342. }
  343. # -----------------------------------------------------------------------------
  344. DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}"
  345. PODMAN_HOST="${PODMAN_HOST:=/run/podman/podman.sock}"
  346. CGROUP="${1}"
  347. NAME_NOT_FOUND=0
  348. NAME=
  349. # -----------------------------------------------------------------------------
  350. if [ -z "${CGROUP}" ]; then
  351. fatal "called without a cgroup name. Nothing to do."
  352. fi
  353. if [ -z "${NAME}" ]; then
  354. if [[ ${CGROUP} =~ ^.*kubepods.* ]]; then
  355. k8s_get_name "${CGROUP}"
  356. fi
  357. fi
  358. if [ -z "${NAME}" ]; then
  359. if [[ ${CGROUP} =~ ^.*docker[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then
  360. # docker containers
  361. #shellcheck disable=SC1117
  362. DOCKERID="$(echo "${CGROUP}" | sed "s|^.*docker[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")"
  363. docker_validate_id "${DOCKERID}"
  364. elif [[ ${CGROUP} =~ ^.*ecs[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then
  365. # ECS
  366. #shellcheck disable=SC1117
  367. DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ecs[-_/].*[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")"
  368. docker_validate_id "${DOCKERID}"
  369. elif [[ ${CGROUP} =~ ^.*libpod-[a-fA-F0-9]+.*$ ]]; then
  370. # Podman
  371. PODMANID="$(echo "${CGROUP}" | sed "s|^.*libpod-\([a-fA-F0-9]\+\).*$|\1|")"
  372. podman_validate_id "${PODMANID}"
  373. elif [[ ${CGROUP} =~ machine.slice[_/].*\.service ]]; then
  374. # systemd-nspawn
  375. NAME="$(echo "${CGROUP}" | sed 's/.*machine.slice[_\/]\(.*\)\.service/\1/g')"
  376. elif [[ ${CGROUP} =~ machine.slice_machine.*-lxc ]]; then
  377. # libvirtd / lxc containers
  378. # examples:
  379. # before: machine.slice machine-lxc/x2d969/x2dhubud0xians01.scope
  380. # after: lxc/hubud0xians01
  381. # before: machine.slice_machine-lxc/x2d969/x2dhubud0xians01.scope/libvirt_init.scope
  382. # after: lxc/hubud0xians01/libvirt_init
  383. NAME="lxc/$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-lxc//; s/\/x2d[[:digit:]]*//; s/\/x2d//g; s/\.scope//g')"
  384. elif [[ ${CGROUP} =~ machine.slice_machine.*-qemu ]]; then
  385. # libvirtd / qemu virtual machines
  386. # NAME="$(echo ${CGROUP} | sed 's/machine.slice_machine.*-qemu//; s/\/x2d//; s/\/x2d/\-/g; s/\.scope//g')"
  387. NAME="qemu_$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-qemu//; s/\/x2d[[:digit:]]*//; s/\/x2d//g; s/\.scope//g')"
  388. elif [[ ${CGROUP} =~ machine_.*\.libvirt-qemu ]]; then
  389. # libvirtd / qemu virtual machines
  390. NAME="qemu_$(echo "${CGROUP}" | sed 's/^machine_//; s/\.libvirt-qemu$//; s/-/_/;')"
  391. elif [[ ${CGROUP} =~ qemu.slice_([0-9]+).scope && -d /etc/pve ]]; then
  392. # Proxmox VMs
  393. FILENAME="/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf"
  394. if [[ -f $FILENAME && -r $FILENAME ]]; then
  395. NAME="qemu_$(grep -e '^name: ' "/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')"
  396. else
  397. error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group."
  398. fi
  399. elif [[ ${CGROUP} =~ lxc_([0-9]+) && -d /etc/pve ]]; then
  400. # Proxmox Containers (LXC)
  401. FILENAME="/etc/pve/lxc/${BASH_REMATCH[1]}.conf"
  402. if [[ -f ${FILENAME} && -r ${FILENAME} ]]; then
  403. NAME=$(grep -e '^hostname: ' "/etc/pve/lxc/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p')
  404. else
  405. error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group."
  406. fi
  407. elif [[ ${CGROUP} =~ lxc.payload.* ]]; then
  408. # LXC 4.0
  409. NAME="$(echo "${CGROUP}" | sed 's/lxc\.payload\.\(.*\)/\1/g')"
  410. fi
  411. [ -z "${NAME}" ] && NAME="${CGROUP}"
  412. [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}"
  413. fi
  414. info "cgroup '${CGROUP}' is called '${NAME}'"
  415. echo "${NAME}"
  416. exit ${NAME_NOT_FOUND}