cgroup-name.sh.in 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706
  1. #!/usr/bin/env bash
  2. #shellcheck disable=SC2001
  3. # netdata
  4. # real-time performance and health monitoring, done right!
  5. # (C) 2023 Netdata Inc.
  6. # SPDX-License-Identifier: GPL-3.0-or-later
  7. #
  8. # Script to find a better name for cgroups
  9. #
  10. export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@"
  11. export LC_ALL=C
  12. cmd_line="'${0}' $(printf "'%s' " "${@}")"
  13. # -----------------------------------------------------------------------------
  14. # logging
  15. PROGRAM_NAME="$(basename "${0}")"
  16. # these should be the same with syslog() priorities
  17. NDLP_EMERG=0 # system is unusable
  18. NDLP_ALERT=1 # action must be taken immediately
  19. NDLP_CRIT=2 # critical conditions
  20. NDLP_ERR=3 # error conditions
  21. NDLP_WARN=4 # warning conditions
  22. NDLP_NOTICE=5 # normal but significant condition
  23. NDLP_INFO=6 # informational
  24. NDLP_DEBUG=7 # debug-level messages
  25. # the max (numerically) log level we will log
  26. LOG_LEVEL=$NDLP_INFO
  27. set_log_min_priority() {
  28. case "${NETDATA_LOG_LEVEL,,}" in
  29. "emerg" | "emergency")
  30. LOG_LEVEL=$NDLP_EMERG
  31. ;;
  32. "alert")
  33. LOG_LEVEL=$NDLP_ALERT
  34. ;;
  35. "crit" | "critical")
  36. LOG_LEVEL=$NDLP_CRIT
  37. ;;
  38. "err" | "error")
  39. LOG_LEVEL=$NDLP_ERR
  40. ;;
  41. "warn" | "warning")
  42. LOG_LEVEL=$NDLP_WARN
  43. ;;
  44. "notice")
  45. LOG_LEVEL=$NDLP_NOTICE
  46. ;;
  47. "info")
  48. LOG_LEVEL=$NDLP_INFO
  49. ;;
  50. "debug")
  51. LOG_LEVEL=$NDLP_DEBUG
  52. ;;
  53. esac
  54. }
  55. set_log_min_priority
  56. log() {
  57. local level="${1}"
  58. shift 1
  59. [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
  60. systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG
  61. INVOCATION_ID=${NETDATA_INVOCATION_ID}
  62. SYSLOG_IDENTIFIER=${PROGRAM_NAME}
  63. PRIORITY=${level}
  64. THREAD_TAG=cgroup-name
  65. ND_LOG_SOURCE=collector
  66. ND_REQUEST=${cmd_line}
  67. MESSAGE=${*//\\n/--NEWLINE--}
  68. EOFLOG
  69. # AN EMPTY LINE IS NEEDED ABOVE
  70. }
  71. info() {
  72. log "$NDLP_INFO" "${@}"
  73. }
  74. warning() {
  75. log "$NDLP_WARN" "${@}"
  76. }
  77. error() {
  78. log "$NDLP_ERR" "${@}"
  79. }
  80. fatal() {
  81. log "$NDLP_ALERT" "${@}"
  82. exit 1
  83. }
  84. debug() {
  85. log "$NDLP_DEBUG" "${@}"
  86. }
  87. # -----------------------------------------------------------------------------
  88. function parse_docker_like_inspect_output() {
  89. local output="${1}"
  90. eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME|IMAGE_NAME)=" <<<"$output")"
  91. if [ -n "$NOMAD_NAMESPACE" ] && [ -n "$NOMAD_JOB_NAME" ] && [ -n "$NOMAD_TASK_NAME" ] && [ -n "$NOMAD_SHORT_ALLOC_ID" ]; then
  92. NAME="${NOMAD_NAMESPACE}-${NOMAD_JOB_NAME}-${NOMAD_TASK_NAME}-${NOMAD_SHORT_ALLOC_ID}"
  93. else
  94. NAME=$(echo "${CONT_NAME}" | sed 's|^/||')
  95. fi
  96. if [ -n "${IMAGE_NAME}" ]; then
  97. LABELS="image=\"${IMAGE_NAME}\""
  98. fi
  99. }
  100. function docker_like_get_name_command() {
  101. local command="${1}"
  102. local id="${2}"
  103. info "Running command: ${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' \"${id}\""
  104. if OUTPUT="$(${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}{{println}}IMAGE_NAME={{ .Config.Image}}' "${id}")" &&
  105. [ -n "$OUTPUT" ]; then
  106. parse_docker_like_inspect_output "$OUTPUT"
  107. fi
  108. return 0
  109. }
  110. function docker_like_get_name_api() {
  111. local host_var="${1}"
  112. local host="${!host_var}"
  113. local path="/containers/${2}/json"
  114. if [ -z "${host}" ]; then
  115. warning "No ${host_var} is set"
  116. return 1
  117. fi
  118. if ! command -v jq >/dev/null 2>&1; then
  119. warning "Can't find jq command line tool. jq is required for netdata to retrieve container name using ${host} API, falling back to docker ps"
  120. return 1
  121. fi
  122. if [ -S "${host}" ]; then
  123. info "Running API command: curl --unix-socket \"${host}\" http://localhost${path}"
  124. JSON=$(curl -sS --unix-socket "${host}" "http://localhost${path}")
  125. else
  126. info "Running API command: curl \"${host}${path}\""
  127. JSON=$(curl -sS "${host}${path}")
  128. fi
  129. if OUTPUT=$(echo "${JSON}" | jq -r '.Config.Env[],"CONT_NAME=\(.Name)","IMAGE_NAME=\(.Config.Image)"') && [ -n "$OUTPUT" ]; then
  130. parse_docker_like_inspect_output "$OUTPUT"
  131. fi
  132. return 0
  133. }
  134. # get_lbl_val returns the value for the label with the given name.
  135. # Returns "null" string if the label doesn't exist.
  136. # Expected labels format: 'name="value",...'.
  137. function get_lbl_val() {
  138. local labels want_name
  139. labels="${1}"
  140. want_name="${2}"
  141. IFS=, read -ra labels <<< "$labels"
  142. local lname lval
  143. for l in "${labels[@]}"; do
  144. IFS="=" read -r lname lval <<< "$l"
  145. if [ "$want_name" = "$lname" ] && [ -n "$lval" ]; then
  146. echo "${lval:1:-1}" # trim "
  147. return 0
  148. fi
  149. done
  150. echo "null"
  151. return 1
  152. }
  153. function add_lbl_prefix() {
  154. local orig_labels prefix
  155. orig_labels="${1}"
  156. prefix="${2}"
  157. IFS=, read -ra labels <<< "$orig_labels"
  158. local new_labels
  159. for l in "${labels[@]}"; do
  160. new_labels+="${prefix}${l},"
  161. done
  162. echo "${new_labels:0:-1}" # trim last ','
  163. }
  164. function remove_lbl() {
  165. local orig_labels lbl_name
  166. orig_labels="${1}"
  167. lbl_name="${2}"
  168. IFS=, read -ra labels <<< "$orig_labels"
  169. local new_labels
  170. for l in "${labels[@]}"; do
  171. IFS="=" read -r lname lval <<< "$l"
  172. [ "$lbl_name" != "$lname" ] && new_labels+="${l},"
  173. done
  174. echo "${new_labels:0:-1}" # trim last ','
  175. }
  176. function k8s_is_pause_container() {
  177. local cgroup_path="${1}"
  178. local file
  179. if [ -d "${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct" ]; then
  180. file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct/$cgroup_path/cgroup.procs"
  181. else
  182. file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/$cgroup_path/cgroup.procs"
  183. fi
  184. [ ! -f "$file" ] && return 1
  185. local procs
  186. IFS= read -rd' ' procs 2>/dev/null <"$file"
  187. #shellcheck disable=SC2206
  188. procs=($procs)
  189. [ "${#procs[@]}" -ne 1 ] && return 1
  190. IFS= read -r comm 2>/dev/null <"/proc/${procs[0]}/comm"
  191. [ "$comm" == "pause" ]
  192. return
  193. }
  194. function k8s_gcp_get_cluster_name() {
  195. local header url id loc name
  196. header="Metadata-Flavor: Google"
  197. url="http://metadata/computeMetadata/v1"
  198. if id=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/project/project-id") &&
  199. loc=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-location") &&
  200. name=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-name") &&
  201. [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ]; then
  202. echo "gke_${id}_${loc}_${name}"
  203. return 0
  204. fi
  205. return 1
  206. }
  207. # k8s_get_kubepod_name resolves */kubepods/* cgroup name.
  208. # pod level cgroup name format: 'pod_<namespace>_<pod_name>'
  209. # container level cgroup name format: 'cntr_<namespace>_<pod_name>_<container_name>'
  210. function k8s_get_kubepod_name() {
  211. # GKE /sys/fs/cgroup/*/ (cri=docker, cgroups=v1):
  212. # |-- kubepods
  213. # | |-- burstable
  214. # | | |-- pod98cee708-023b-11eb-933d-42010a800193
  215. # | | | |-- 922161c98e6ea450bf665226cdc64ca2aa3e889934c2cff0aec4325f8f78ac03
  216. # | `-- pode314bbac-d577-11ea-a171-42010a80013b
  217. # | |-- 7d505356b04507de7b710016d540b2759483ed5f9136bb01a80872b08f771930
  218. #
  219. # GKE /sys/fs/cgroup/*/ (cri=containerd, cgroups=v1):
  220. # |-- kubepods.slice
  221. # | |-- kubepods-besteffort.slice
  222. # | | |-- kubepods-besteffort-pode1465238_4518_4c21_832f_fd9f87033dad.slice
  223. # | | | |-- cri-containerd-66be9b2efdf4d85288c319b8c1a2f50d2439b5617e36f45d9d0d0be1381113be.scope
  224. # | `-- kubepods-pod91f5b561_369f_4103_8015_66391059996a.slice
  225. # | |-- cri-containerd-24c53b774a586f06abc058619b47f71d9d869ac50c92898adbd199106fd0aaeb.scope
  226. #
  227. # GKE /sys/fs/cgroup/*/ (cri=crio, cgroups=v1):
  228. # |-- kubepods.slice
  229. # | |-- kubepods-besteffort.slice
  230. # | | |-- kubepods-besteffort-podad412dfe_3589_4056_965a_592356172968.slice
  231. # | | | |-- crio-77b019312fd9825828b70214b2c94da69c30621af2a7ee06f8beace4bc9439e5.scope
  232. #
  233. # Minikube (v1.8.2) /sys/fs/cgroup/*/ (cri=docker, cgroups=v1):
  234. # |-- kubepods.slice
  235. # | |-- kubepods-besteffort.slice
  236. # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice
  237. # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope
  238. #
  239. # kind v0.14.0
  240. # |-- kubelet.slice
  241. # | |-- kubelet-kubepods.slice
  242. # | | |-- kubelet-kubepods-besteffort.slice
  243. # | | | |-- kubelet-kubepods-besteffort-pod7881ed9e_c63e_4425_b5e0_ac55a08ae939.slice
  244. # | | | | |-- cri-containerd-00c7939458bffc416bb03451526e9fde13301d6654cfeadf5b4964a7fb5be1a9.scope
  245. #
  246. # NOTE: cgroups plugin
  247. # - uses '_' to join dir names (so it is <parent>_<child>_<child>_...)
  248. # - replaces '.' with '-'
  249. local fn="${FUNCNAME[0]}"
  250. local cgroup_path="${1}"
  251. local id="${2}"
  252. if [[ ! $id =~ ^.*kubepods.* ]]; then
  253. warning "${fn}: '${id}' is not kubepod cgroup."
  254. return 1
  255. fi
  256. local clean_id="$id"
  257. clean_id=${clean_id//.slice/}
  258. clean_id=${clean_id//.scope/}
  259. local name pod_uid cntr_id
  260. if [[ $clean_id == "kubepods" ]]; then
  261. name="$clean_id"
  262. elif [[ $clean_id =~ .+(besteffort|burstable|guaranteed)$ ]]; then
  263. # kubepods_<QOS_CLASS>
  264. # kubepods_kubepods-<QOS_CLASS>
  265. name=${clean_id//-/_}
  266. name=${name/#kubepods_kubepods/kubepods}
  267. elif [[ $clean_id =~ .+pod[a-f0-9_-]+_(docker|crio|cri-containerd)-([a-f0-9]+)$ ]]; then
  268. # ...pod<POD_UID>_(docker|crio|cri-containerd)-<CONTAINER_ID> (POD_UID w/ "_")
  269. cntr_id=${BASH_REMATCH[2]}
  270. elif [[ $clean_id =~ .+pod[a-f0-9-]+_([a-f0-9]+)$ ]]; then
  271. # ...pod<POD_UID>_<CONTAINER_ID>
  272. cntr_id=${BASH_REMATCH[1]}
  273. elif [[ $clean_id =~ .+pod([a-f0-9_-]+)$ ]]; then
  274. # ...pod<POD_UID> (POD_UID w/ and w/o "_")
  275. pod_uid=${BASH_REMATCH[1]}
  276. pod_uid=${pod_uid//_/-}
  277. fi
  278. if [ -n "$name" ]; then
  279. echo "$name"
  280. return 0
  281. fi
  282. if [ -z "$pod_uid" ] && [ -z "$cntr_id" ]; then
  283. warning "${fn}: can't extract pod_uid or container_id from the cgroup '$id'."
  284. return 3
  285. fi
  286. [ -n "$pod_uid" ] && info "${fn}: cgroup '$id' is a pod(uid:$pod_uid)"
  287. [ -n "$cntr_id" ] && info "${fn}: cgroup '$id' is a container(id:$cntr_id)"
  288. if [ -n "$cntr_id" ] && k8s_is_pause_container "$cgroup_path"; then
  289. return 3
  290. fi
  291. if ! command -v jq > /dev/null 2>&1; then
  292. warning "${fn}: 'jq' command not available."
  293. return 1
  294. fi
  295. local tmp_kube_cluster_name="${TMPDIR:-"/tmp"}/netdata-cgroups-k8s-cluster-name"
  296. local tmp_kube_system_ns_uid_file="${TMPDIR:-"/tmp"}/netdata-cgroups-kubesystem-uid"
  297. local tmp_kube_containers_file="${TMPDIR:-"/tmp"}/netdata-cgroups-containers"
  298. local kube_cluster_name
  299. local kube_system_uid
  300. local labels
  301. if [ -n "$cntr_id" ] &&
  302. [ -f "$tmp_kube_cluster_name" ] &&
  303. [ -f "$tmp_kube_system_ns_uid_file" ] &&
  304. [ -f "$tmp_kube_containers_file" ] &&
  305. labels=$(grep "$cntr_id" "$tmp_kube_containers_file" 2>/dev/null); then
  306. IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file"
  307. IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name"
  308. else
  309. IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file"
  310. IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name"
  311. [ -z "$kube_cluster_name" ] && ! kube_cluster_name=$(k8s_gcp_get_cluster_name) && kube_cluster_name="unknown"
  312. local kube_system_ns
  313. local pods
  314. if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_PORT_443_TCP_PORT}" ]; then
  315. local token header host url
  316. token="$(</var/run/secrets/kubernetes.io/serviceaccount/token)"
  317. header="Authorization: Bearer $token"
  318. host="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT"
  319. if [ -z "$kube_system_uid" ]; then
  320. url="https://$host/api/v1/namespaces/kube-system"
  321. # FIX: check HTTP response code
  322. if ! kube_system_ns=$(curl --fail -sSk -H "$header" "$url" 2>&1); then
  323. warning "${fn}: error on curl '${url}': ${kube_system_ns}."
  324. fi
  325. fi
  326. local url
  327. if [ -n "${USE_KUBELET_FOR_PODS_METADATA}" ]; then
  328. url="${KUBELET_URL:-https://localhost:10250}/pods"
  329. else
  330. url="https://$host/api/v1/pods"
  331. [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME"
  332. fi
  333. # FIX: check HTTP response code
  334. if ! pods=$(curl --fail -sSk -H "$header" "$url" 2>&1); then
  335. warning "${fn}: error on curl '${url}': ${pods}."
  336. return 1
  337. fi
  338. elif ps -C kubelet >/dev/null 2>&1 && command -v kubectl >/dev/null 2>&1; then
  339. if [ -z "$kube_system_uid" ]; then
  340. if ! kube_system_ns=$(kubectl --kubeconfig="$KUBE_CONFIG" get namespaces kube-system -o json 2>&1); then
  341. warning "${fn}: error on 'kubectl': ${kube_system_ns}."
  342. fi
  343. fi
  344. [[ -z ${KUBE_CONFIG+x} ]] && KUBE_CONFIG="/etc/kubernetes/admin.conf"
  345. if ! pods=$(kubectl --kubeconfig="$KUBE_CONFIG" get pods --all-namespaces -o json 2>&1); then
  346. warning "${fn}: error on 'kubectl': ${pods}."
  347. return 1
  348. fi
  349. else
  350. warning "${fn}: not inside the k8s cluster and 'kubectl' command not available."
  351. return 1
  352. fi
  353. if [ -n "$kube_system_ns" ] && ! kube_system_uid=$(jq -r '.metadata.uid' <<<"$kube_system_ns" 2>&1); then
  354. warning "${fn}: error on 'jq' parse kube_system_ns: ${kube_system_uid}."
  355. fi
  356. local jq_filter
  357. jq_filter+='.items[] | "'
  358. jq_filter+='namespace=\"\(.metadata.namespace)\",'
  359. jq_filter+='pod_name=\"\(.metadata.name)\",'
  360. jq_filter+='pod_uid=\"\(.metadata.uid)\",'
  361. #jq_filter+='\(.metadata.labels | to_entries | map("pod_label_"+.key+"=\""+.value+"\"") | join(",") | if length > 0 then .+"," else . end)'
  362. jq_filter+='\((.metadata.ownerReferences[]? | select(.controller==true) | "controller_kind=\""+.kind+"\",controller_name=\""+.name+"\",") // "")'
  363. jq_filter+='node_name=\"\(.spec.nodeName)\",'
  364. jq_filter+='" + '
  365. jq_filter+='(.status.containerStatuses[]? | "'
  366. jq_filter+='container_name=\"\(.name)\",'
  367. jq_filter+='container_id=\"\(.containerID)\"'
  368. jq_filter+='") | '
  369. jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722
  370. local containers
  371. if ! containers=$(jq -r "${jq_filter}" <<<"$pods" 2>&1); then
  372. warning "${fn}: error on 'jq' parse pods: ${containers}."
  373. return 1
  374. fi
  375. [ -n "$kube_cluster_name" ] && echo "$kube_cluster_name" >"$tmp_kube_cluster_name" 2>/dev/null
  376. [ -n "$kube_system_ns" ] && [ -n "$kube_system_uid" ] && echo "$kube_system_uid" >"$tmp_kube_system_ns_uid_file" 2>/dev/null
  377. echo "$containers" >"$tmp_kube_containers_file" 2>/dev/null
  378. fi
  379. local qos_class
  380. if [[ $clean_id =~ .+(besteffort|burstable) ]]; then
  381. qos_class="${BASH_REMATCH[1]}"
  382. else
  383. qos_class="guaranteed"
  384. fi
  385. # available labels:
  386. # namespace, pod_name, pod_uid, container_name, container_id, node_name
  387. if [ -n "$cntr_id" ]; then
  388. if [ -n "$labels" ] || labels=$(grep "$cntr_id" <<< "$containers" 2> /dev/null); then
  389. labels+=',kind="container"'
  390. labels+=",qos_class=\"$qos_class\""
  391. [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\""
  392. [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\""
  393. name="cntr"
  394. name+="_$(get_lbl_val "$labels" namespace)"
  395. name+="_$(get_lbl_val "$labels" pod_name)"
  396. name+="_$(get_lbl_val "$labels" container_name)"
  397. labels=$(remove_lbl "$labels" "container_id")
  398. labels=$(remove_lbl "$labels" "pod_uid")
  399. labels=$(add_lbl_prefix "$labels" "k8s_")
  400. name+=" $labels"
  401. else
  402. return 2
  403. fi
  404. elif [ -n "$pod_uid" ]; then
  405. if labels=$(grep "$pod_uid" -m 1 <<< "$containers" 2> /dev/null); then
  406. labels="${labels%%,container_*}"
  407. labels+=',kind="pod"'
  408. labels+=",qos_class=\"$qos_class\""
  409. [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\""
  410. [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\""
  411. name="pod"
  412. name+="_$(get_lbl_val "$labels" namespace)"
  413. name+="_$(get_lbl_val "$labels" pod_name)"
  414. labels=$(remove_lbl "$labels" "pod_uid")
  415. labels=$(add_lbl_prefix "$labels" "k8s_")
  416. name+=" $labels"
  417. else
  418. return 2
  419. fi
  420. fi
  421. # jq filter nonexistent field and nonexistent label value is 'null'
  422. if [[ $name =~ _null(_|$) ]]; then
  423. warning "${fn}: invalid name: $name (cgroup '$id')"
  424. if [ -n "${USE_KUBELET_FOR_PODS_METADATA}" ]; then
  425. # local data is cached and may not contain the correct id
  426. return 2
  427. fi
  428. return 1
  429. fi
  430. echo "$name"
  431. [ -n "$name" ]
  432. return
  433. }
  434. function k8s_get_name() {
  435. local fn="${FUNCNAME[0]}"
  436. local cgroup_path="${1}"
  437. local id="${2}"
  438. local kubepod_name=""
  439. kubepod_name=$(k8s_get_kubepod_name "$cgroup_path" "$id")
  440. case "$?" in
  441. 0)
  442. kubepod_name="k8s_${kubepod_name}"
  443. local name labels
  444. name=${kubepod_name%% *}
  445. labels=${kubepod_name#* }
  446. if [ "$name" != "$labels" ]; then
  447. info "${fn}: cgroup '${id}' has chart name '${name}', labels '${labels}"
  448. NAME="$name"
  449. LABELS="$labels"
  450. else
  451. info "${fn}: cgroup '${id}' has chart name '${NAME}'"
  452. NAME="$name"
  453. fi
  454. EXIT_CODE=$EXIT_SUCCESS
  455. ;;
  456. 1)
  457. NAME="k8s_${id}"
  458. warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and enabling it."
  459. EXIT_CODE=$EXIT_SUCCESS
  460. ;;
  461. 2)
  462. NAME="k8s_${id}"
  463. warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and asking for retry."
  464. EXIT_CODE=$EXIT_RETRY
  465. ;;
  466. *)
  467. NAME="k8s_${id}"
  468. warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and disabling it."
  469. EXIT_CODE=$EXIT_DISABLE
  470. ;;
  471. esac
  472. }
  473. function docker_get_name() {
  474. local id="${1}"
  475. # See https://github.com/netdata/netdata/pull/13523 for details
  476. if command -v snap >/dev/null 2>&1 && snap list docker >/dev/null 2>&1; then
  477. docker_like_get_name_api DOCKER_HOST "${id}"
  478. elif hash docker 2> /dev/null; then
  479. docker_like_get_name_command docker "${id}"
  480. else
  481. docker_like_get_name_api DOCKER_HOST "${id}" || docker_like_get_name_command podman "${id}"
  482. fi
  483. if [ -z "${NAME}" ]; then
  484. warning "cannot find the name of docker container '${id}'"
  485. EXIT_CODE=$EXIT_RETRY
  486. NAME="${id:0:12}"
  487. else
  488. info "docker container '${id}' is named '${NAME}'"
  489. fi
  490. }
  491. function docker_validate_id() {
  492. local id="${1}"
  493. if [ -n "${id}" ] && { [ ${#id} -eq 64 ] || [ ${#id} -eq 12 ]; }; then
  494. docker_get_name "${id}"
  495. else
  496. error "a docker id cannot be extracted from docker cgroup '${CGROUP}'."
  497. fi
  498. }
  499. function podman_get_name() {
  500. local id="${1}"
  501. # for Podman, prefer using the API if we can, as netdata will not normally have access
  502. # to other users' containers, so they will not be visible when running `podman ps`
  503. docker_like_get_name_api PODMAN_HOST "${id}" || docker_like_get_name_command podman "${id}"
  504. if [ -z "${NAME}" ]; then
  505. warning "cannot find the name of podman container '${id}'"
  506. EXIT_CODE=$EXIT_RETRY
  507. NAME="${id:0:12}"
  508. else
  509. info "podman container '${id}' is named '${NAME}'"
  510. fi
  511. }
  512. function podman_validate_id() {
  513. local id="${1}"
  514. if [ -n "${id}" ] && [ ${#id} -eq 64 ]; then
  515. podman_get_name "${id}"
  516. else
  517. error "a podman id cannot be extracted from docker cgroup '${CGROUP}'."
  518. fi
  519. }
  520. # -----------------------------------------------------------------------------
  521. DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}"
  522. PODMAN_HOST="${PODMAN_HOST:=/run/podman/podman.sock}"
  523. CGROUP_PATH="${1}" # the path as it is (e.g. '/docker/efcf4c409')
  524. CGROUP="${2}" # the modified path (e.g. 'docker_efcf4c409')
  525. EXIT_SUCCESS=0
  526. EXIT_RETRY=2
  527. EXIT_DISABLE=3
  528. EXIT_CODE=$EXIT_SUCCESS
  529. NAME=
  530. LABELS=
  531. # -----------------------------------------------------------------------------
  532. if [ -z "${CGROUP}" ]; then
  533. fatal "called without a cgroup name. Nothing to do."
  534. fi
  535. if [ -z "${NAME}" ]; then
  536. if [[ ${CGROUP} =~ ^.*kubepods.* ]]; then
  537. k8s_get_name "${CGROUP_PATH}" "${CGROUP}"
  538. fi
  539. fi
  540. if [ -z "${NAME}" ]; then
  541. if [[ ${CGROUP} =~ ^.*docker[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then
  542. # docker containers
  543. #shellcheck disable=SC1117
  544. DOCKERID="$(echo "${CGROUP}" | sed "s|^.*docker[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")"
  545. docker_validate_id "${DOCKERID}"
  546. elif [[ ${CGROUP} =~ ^.*ecs[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then
  547. # ECS
  548. #shellcheck disable=SC1117
  549. DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ecs[-_/].*[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")"
  550. docker_validate_id "${DOCKERID}"
  551. elif [[ ${CGROUP} =~ system.slice_containerd.service_cpuset_[a-fA-F0-9]+[-_\.]?.*$ ]]; then
  552. # docker containers under containerd
  553. #shellcheck disable=SC1117
  554. DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ystem.slice_containerd.service_cpuset_\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")"
  555. docker_validate_id "${DOCKERID}"
  556. elif [[ ${CGROUP} =~ ^.*libpod-[a-fA-F0-9]+.*$ ]]; then
  557. # Podman
  558. PODMANID="$(echo "${CGROUP}" | sed "s|^.*libpod-\([a-fA-F0-9]\+\).*$|\1|")"
  559. podman_validate_id "${PODMANID}"
  560. elif [[ ${CGROUP} =~ machine.slice[_/].*\.service ]]; then
  561. # systemd-nspawn
  562. NAME="$(echo "${CGROUP}" | sed 's/.*machine.slice[_\/]\(.*\)\.service/\1/g')"
  563. elif [[ ${CGROUP} =~ machine.slice_machine.*-lxc ]]; then
  564. # libvirtd / lxc containers
  565. # machine.slice machine-lxc/x2d969/x2dhubud0xians01.scope => lxc/hubud0xians01
  566. # machine.slice_machine-lxc/x2d969/x2dhubud0xians01.scope/libvirt_init.scope => lxc/hubud0xians01/libvirt_init
  567. NAME="lxc/$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-lxc//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')"
  568. elif [[ ${CGROUP} =~ machine.slice_machine.*-qemu ]]; then
  569. # libvirtd / qemu virtual machines
  570. # machine.slice_machine-qemu_x2d1_x2dopnsense.scope => qemu_opnsense
  571. NAME="qemu_$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-qemu//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')"
  572. elif [[ ${CGROUP} =~ machine_.*\.libvirt-qemu ]]; then
  573. # libvirtd / qemu virtual machines
  574. NAME="qemu_$(echo "${CGROUP}" | sed 's/^machine_//; s/\.libvirt-qemu$//; s/-/_/;')"
  575. elif [[ ${CGROUP} =~ qemu.slice_([0-9]+).scope && -d "${NETDATA_HOST_PREFIX}/etc/pve" ]]; then
  576. # Proxmox VMs
  577. FILENAME="${NETDATA_HOST_PREFIX}/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf"
  578. if [[ -f $FILENAME && -r $FILENAME ]]; then
  579. NAME="qemu_$(grep -e '^name: ' "${FILENAME}" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')"
  580. else
  581. error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group."
  582. fi
  583. elif [[ ${CGROUP} =~ lxc_([0-9]+) && -d "${NETDATA_HOST_PREFIX}/etc/pve" ]]; then
  584. # Proxmox Containers (LXC)
  585. FILENAME="${NETDATA_HOST_PREFIX}/etc/pve/lxc/${BASH_REMATCH[1]}.conf"
  586. if [[ -f ${FILENAME} && -r ${FILENAME} ]]; then
  587. NAME=$(grep -e '^hostname: ' "${FILENAME}" | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p')
  588. else
  589. error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group."
  590. fi
  591. elif [[ ${CGROUP} =~ lxc.payload.* ]]; then
  592. # LXC 4.0
  593. NAME="$(echo "${CGROUP}" | sed 's/lxc\.payload\.\(.*\)/\1/g')"
  594. fi
  595. [ -z "${NAME}" ] && NAME="${CGROUP}"
  596. [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}"
  597. fi
  598. NAME="${NAME// /_}"
  599. info "cgroup '${CGROUP}' is called '${NAME}', labels '${LABELS}'"
  600. if [ -n "$LABELS" ]; then
  601. echo "${NAME} ${LABELS}"
  602. else
  603. echo "${NAME}"
  604. fi
  605. exit ${EXIT_CODE}