123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442 |
- #!/usr/bin/env bash
- # netdata
- # real-time performance and health monitoring, done right!
- # (C) 2017 Costa Tsaousis <costa@tsaousis.gr>
- # SPDX-License-Identifier: GPL-3.0-or-later
- # Exit code: 0 - Success
- # Exit code: 1 - Unknown argument
- # Exit code: 2 - Problems with claiming working directory
- # Exit code: 3 - Missing dependencies
- # Exit code: 4 - Failure to connect to endpoint
- # Exit code: 5 - The CLI didn't work
- # Exit code: 6 - Wrong user
- # Exit code: 7 - Unknown HTTP error message
- #
- # OK: Agent claimed successfully
- # HTTP Status code: 204
- # Exit code: 0
- #
- # Unknown HTTP error message
- # HTTP Status code: 422
- # Exit code: 7
- ERROR_KEYS[7]="None"
- ERROR_MESSAGES[7]="Unknown HTTP error message"
- # Error: The agent id is invalid; it does not fulfill the constraints
- # HTTP Status code: 422
- # Exit code: 8
- ERROR_KEYS[8]="ErrInvalidNodeID"
- ERROR_MESSAGES[8]="invalid node id"
- # Error: The agent hostname is invalid; it does not fulfill the constraints
- # HTTP Status code: 422
- # Exit code: 9
- ERROR_KEYS[9]="ErrInvalidNodeName"
- ERROR_MESSAGES[9]="invalid node name"
- # Error: At least one of the given rooms ids is invalid; it does not fulfill the constraints
- # HTTP Status code: 422
- # Exit code: 10
- ERROR_KEYS[10]="ErrInvalidRoomID"
- ERROR_MESSAGES[10]="invalid room id"
- # Error: Invalid public key; the public key is empty or not present
- # HTTP Status code: 422
- # Exit code: 11
- ERROR_KEYS[11]="ErrInvalidPublicKey"
- ERROR_MESSAGES[11]="invalid public key"
- #
- # Error: Expired, missing or invalid token
- # HTTP Status code: 403
- # Exit code: 12
- ERROR_KEYS[12]="ErrForbidden"
- ERROR_MESSAGES[12]="token expired/token not found/invalid token"
- # Error: Duplicate agent id; an agent with the same id is already registered in the cloud
- # HTTP Status code: 409
- # Exit code: 13
- ERROR_KEYS[13]="ErrAlreadyClaimed"
- ERROR_MESSAGES[13]="already claimed"
- # Error: The node claiming process is still in progress.
- # HTTP Status code: 102
- # Exit code: 14
- ERROR_KEYS[14]="ErrProcessingClaim"
- ERROR_MESSAGES[14]="processing claiming"
- # Error: Internal server error. Any other unexpected error (DB problems, etc.)
- # HTTP Status code: 500
- # Exit code: 15
- ERROR_KEYS[15]="ErrInternalServerError"
- ERROR_MESSAGES[15]="Internal Server Error"
- # Error: There was a timeout processing the claim.
- # HTTP Status code: 504
- # Exit code: 16
- ERROR_KEYS[16]="ErrGatewayTimeout"
- ERROR_MESSAGES[16]="Gateway Timeout"
- # Error: The service cannot handle the claiming request at this time.
- # HTTP Status code: 503
- # Exit code: 17
- ERROR_KEYS[17]="ErrServiceUnavailable"
- ERROR_MESSAGES[17]="Service Unavailable"
- # Exit code: 18 - Agent unique id is not generated yet.
- NETDATA_RUNNING=1
- get_config_value() {
- conf_file="${1}"
- section="${2}"
- key_name="${3}"
- if [ "${NETDATA_RUNNING}" -eq 1 ]; then
- config_result=$(@sbindir_POST@/netdatacli 2>/dev/null read-config "$conf_file|$section|$key_name"; exit $?)
- result="$?"
- if [ "${result}" -ne 0 ]; then
- echo >&2 "Unable to communicate with Netdata daemon, querying config from disk instead."
- NETDATA_RUNNING=0
- fi
- fi
- if [ "${NETDATA_RUNNING}" -eq 0 ]; then
- config_result=$(@sbindir_POST@/netdata 2>/dev/null -W get2 "$conf_file" "$section" "$key_name" unknown_default)
- fi
- echo "$config_result"
- }
- if command -v curl >/dev/null 2>&1 ; then
- URLTOOL="curl"
- elif command -v wget >/dev/null 2>&1 ; then
- URLTOOL="wget"
- else
- echo >&2 "I need curl or wget to proceed, but neither is available on this system."
- exit 3
- fi
- if ! command -v openssl >/dev/null 2>&1 ; then
- echo >&2 "I need openssl to proceed, but it is not available on this system."
- exit 3
- fi
- # shellcheck disable=SC2050
- if [ "@enable_cloud_POST@" = "no" ]; then
- echo >&2 "This agent was built with --disable-cloud and cannot be claimed"
- exit 3
- fi
- # shellcheck disable=SC2050
- if [ "@enable_aclk_POST@" != "yes" ]; then
- echo >&2 "This agent was built without the dependencies for Cloud and cannot be claimed"
- exit 3
- fi
- # -----------------------------------------------------------------------------
- # defaults to allow running this script by hand
- [ -z "${NETDATA_VARLIB_DIR}" ] && NETDATA_VARLIB_DIR="@varlibdir_POST@"
- MACHINE_GUID_FILE="@registrydir_POST@/netdata.public.unique.id"
- CLAIMING_DIR="${NETDATA_VARLIB_DIR}/cloud.d"
- TOKEN="unknown"
- URL_BASE=$(get_config_value cloud global "cloud base url")
- [ -z "$URL_BASE" ] && URL_BASE="https://app.netdata.cloud" # Cover post-install with --dont-start
- ID="unknown"
- ROOMS=""
- [ -z "$HOSTNAME" ] && HOSTNAME=$(hostname)
- CLOUD_CERTIFICATE_FILE="${CLAIMING_DIR}/cloud_fullchain.pem"
- VERBOSE=0
- INSECURE=0
- RELOAD=1
- NETDATA_USER=$(get_config_value netdata global "run as user")
- [ -z "$EUID" ] && EUID="$(id -u)"
- gen_id() {
- local id
- if command -v uuidgen > /dev/null 2>&1; then
- id="$(uuidgen | tr '[:upper:]' '[:lower:]')"
- elif [ -r /proc/sys/kernel/random/uuid ]; then
- id="$(cat /proc/sys/kernel/random/uuid)"
- else
- echo >&2 "Unable to generate machine ID."
- exit 18
- fi
- if [ "${id}" = "8a795b0c-2311-11e6-8563-000c295076a6" ] || [ "${id}" = "4aed1458-1c3e-11e6-a53f-000c290fc8f5" ]; then
- gen_id
- else
- echo "${id}"
- fi
- }
- # get the MACHINE_GUID by default
- if [ -r "${MACHINE_GUID_FILE}" ]; then
- ID="$(cat "${MACHINE_GUID_FILE}")"
- MGUID=$ID
- elif [ -f "${MACHINE_GUID_FILE}" ]; then
- echo >&2 "netdata.public.unique.id is not readable. Please make sure you have rights to read it (Filename: ${MACHINE_GUID_FILE})."
- exit 18
- else
- if mkdir -p "${MACHINE_GUID_FILE%/*}" && /bin/echo -n "$(gen_id)" > "${MACHINE_GUID_FILE}"; then
- ID="$(cat "${MACHINE_GUID_FILE}")"
- MGUID=$ID
- else
- echo >&2 "Failed to write new machine GUID. Please make sure you have rights to write to ${MACHINE_GUID_FILE}."
- exit 18
- fi
- fi
- # get token from file
- if [ -r "${CLAIMING_DIR}/token" ]; then
- TOKEN="$(cat "${CLAIMING_DIR}/token")"
- fi
- # get rooms from file
- if [ -r "${CLAIMING_DIR}/rooms" ]; then
- ROOMS="$(cat "${CLAIMING_DIR}/rooms")"
- fi
- for arg in "$@"
- do
- case $arg in
- -token=*) TOKEN=${arg:7} ;;
- -url=*) [ -n "${arg:5}" ] && URL_BASE=${arg:5} ;;
- -id=*) ID=$(echo "${arg:4}" | tr '[:upper:]' '[:lower:]');;
- -rooms=*) ROOMS=${arg:7} ;;
- -hostname=*) HOSTNAME=${arg:10} ;;
- -verbose) VERBOSE=1 ;;
- -insecure) INSECURE=1 ;;
- -proxy=*) PROXY=${arg:7} ;;
- -noproxy) NOPROXY=yes ;;
- -noreload) RELOAD=0 ;;
- -user=*) NETDATA_USER=${arg:6} ;;
- -daemon-not-running) NETDATA_RUNNING=0 ;;
- *) echo >&2 "Unknown argument ${arg}"
- exit 1 ;;
- esac
- shift 1
- done
- if [ "$EUID" != "0" ] && [ "$(whoami)" != "$NETDATA_USER" ]; then
- echo >&2 "This script must be run by the $NETDATA_USER user account"
- exit 6
- fi
- # if curl not installed give warning SOCKS can't be used
- if [[ "${URLTOOL}" != "curl" && "${PROXY:0:5}" = socks ]] ; then
- echo >&2 "wget doesn't support SOCKS. Please install curl or disable SOCKS proxy."
- exit 1
- fi
- echo >&2 "Token: ****************"
- echo >&2 "Base URL: $URL_BASE"
- echo >&2 "Id: $ID"
- echo >&2 "Rooms: $ROOMS"
- echo >&2 "Hostname: $HOSTNAME"
- echo >&2 "Proxy: $PROXY"
- echo >&2 "Netdata user: $NETDATA_USER"
- # create the claiming directory for this user
- if [ ! -d "${CLAIMING_DIR}" ] ; then
- mkdir -p "${CLAIMING_DIR}" && chmod 0770 "${CLAIMING_DIR}"
- # shellcheck disable=SC2181
- if [ $? -ne 0 ] ; then
- echo >&2 "Failed to create claiming working directory ${CLAIMING_DIR}"
- exit 2
- fi
- fi
- if [ ! -w "${CLAIMING_DIR}" ] ; then
- echo >&2 "No write permission in claiming working directory ${CLAIMING_DIR}"
- exit 2
- fi
- if [ ! -f "${CLAIMING_DIR}/private.pem" ] ; then
- echo >&2 "Generating private/public key for the first time."
- if ! openssl genrsa -out "${CLAIMING_DIR}/private.pem" 2048 ; then
- echo >&2 "Failed to generate private/public key pair."
- exit 2
- fi
- fi
- if [ ! -f "${CLAIMING_DIR}/public.pem" ] ; then
- echo >&2 "Extracting public key from private key."
- if ! openssl rsa -in "${CLAIMING_DIR}/private.pem" -outform PEM -pubout -out "${CLAIMING_DIR}/public.pem" ; then
- echo >&2 "Failed to extract public key."
- exit 2
- fi
- fi
- TARGET_URL="${URL_BASE%/}/api/v1/spaces/nodes/${ID}"
- # shellcheck disable=SC2002
- KEY=$(cat "${CLAIMING_DIR}/public.pem" | tr '\n' '!' | sed -e 's/!/\\n/g')
- # shellcheck disable=SC2001
- [ -n "$ROOMS" ] && ROOMS=\"$(echo "$ROOMS" | sed s'/,/", "/g')\"
- cat > "${CLAIMING_DIR}/tmpin.txt" <<EMBED_JSON
- {
- "node": {
- "id": "$ID",
- "hostname": "$HOSTNAME"
- },
- "token": "$TOKEN",
- "rooms" : [ $ROOMS ],
- "publicKey" : "$KEY",
- "mGUID" : "$MGUID"
- }
- EMBED_JSON
- if [ "${VERBOSE}" == 1 ] ; then
- echo "Request to server:"
- cat "${CLAIMING_DIR}/tmpin.txt"
- fi
- if [ "${URLTOOL}" = "curl" ] ; then
- URLCOMMAND="curl --connect-timeout 30 --retry 0 -s -i -X PUT -d \"@${CLAIMING_DIR}/tmpin.txt\""
- if [ "${NOPROXY}" = "yes" ] ; then
- URLCOMMAND="${URLCOMMAND} -x \"\""
- elif [ -n "${PROXY}" ] ; then
- URLCOMMAND="${URLCOMMAND} -x \"${PROXY}\""
- fi
- else
- URLCOMMAND="wget -T 15 -O - -q --server-response --content-on-error=on --method=PUT \
- --body-file=\"${CLAIMING_DIR}/tmpin.txt\""
- if [ "${NOPROXY}" = "yes" ] ; then
- URLCOMMAND="${URLCOMMAND} --no-proxy"
- elif [ "${PROXY:0:4}" = http ] ; then
- URLCOMMAND="export http_proxy=${PROXY}; ${URLCOMMAND}"
- fi
- fi
- if [ "${INSECURE}" == 1 ] ; then
- if [ "${URLTOOL}" = "curl" ] ; then
- URLCOMMAND="${URLCOMMAND} --insecure"
- else
- URLCOMMAND="${URLCOMMAND} --no-check-certificate"
- fi
- fi
- if [ -r "${CLOUD_CERTIFICATE_FILE}" ] ; then
- if [ "${URLTOOL}" = "curl" ] ; then
- URLCOMMAND="${URLCOMMAND} --cacert \"${CLOUD_CERTIFICATE_FILE}\""
- else
- URLCOMMAND="${URLCOMMAND} --ca-certificate \"${CLOUD_CERTIFICATE_FILE}\""
- fi
- fi
- if [ "${VERBOSE}" == 1 ]; then
- echo "${URLCOMMAND} \"${TARGET_URL}\""
- fi
- attempt_contact () {
- if [ "${URLTOOL}" = "curl" ] ; then
- eval "${URLCOMMAND} \"${TARGET_URL}\"" >"${CLAIMING_DIR}/tmpout.txt"
- else
- eval "${URLCOMMAND} \"${TARGET_URL}\"" >"${CLAIMING_DIR}/tmpout.txt" 2>&1
- fi
- URLCOMMAND_EXIT_CODE=$?
- if [ "${URLTOOL}" = "wget" ] && [ "${URLCOMMAND_EXIT_CODE}" -eq 8 ] ; then
- # We consider the server issuing an error response a successful attempt at communicating
- URLCOMMAND_EXIT_CODE=0
- fi
- # Check if URLCOMMAND connected and received reply
- if [ "${URLCOMMAND_EXIT_CODE}" -ne 0 ] ; then
- echo >&2 "Failed to connect to ${URL_BASE}, return code ${URLCOMMAND_EXIT_CODE}"
- rm -f "${CLAIMING_DIR}/tmpout.txt"
- return 4
- fi
- if [ "${VERBOSE}" == 1 ] ; then
- echo "Response from server:"
- cat "${CLAIMING_DIR}/tmpout.txt"
- fi
- return 0
- }
- for i in {1..3}
- do
- if attempt_contact ; then
- echo "Connection attempt $i successful"
- break
- fi
- echo "Connection attempt $i failed. Retry in ${i}s."
- if [ "$i" -eq 5 ] ; then
- rm -f "${CLAIMING_DIR}/tmpin.txt"
- exit 4
- fi
- sleep "$i"
- done
- rm -f "${CLAIMING_DIR}/tmpin.txt"
- ERROR_KEY=$(grep "\"errorMsgKey\":" "${CLAIMING_DIR}/tmpout.txt" | awk -F "errorMsgKey\":\"" '{print $2}' | awk -F "\"" '{print $1}')
- case ${ERROR_KEY} in
- "ErrInvalidNodeID") EXIT_CODE=8 ;;
- "ErrInvalidNodeName") EXIT_CODE=9 ;;
- "ErrInvalidRoomID") EXIT_CODE=10 ;;
- "ErrInvalidPublicKey") EXIT_CODE=11 ;;
- "ErrForbidden") EXIT_CODE=12 ;;
- "ErrAlreadyClaimed") EXIT_CODE=13 ;;
- "ErrProcessingClaim") EXIT_CODE=14 ;;
- "ErrInternalServerError") EXIT_CODE=15 ;;
- "ErrGatewayTimeout") EXIT_CODE=16 ;;
- "ErrServiceUnavailable") EXIT_CODE=17 ;;
- *) EXIT_CODE=7 ;;
- esac
- HTTP_STATUS_CODE=$(grep "HTTP" "${CLAIMING_DIR}/tmpout.txt" | tail -1 | awk -F " " '{print $2}')
- if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
- EXIT_CODE=0
- fi
- if [ "${HTTP_STATUS_CODE}" = "204" ] || [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then
- rm -f "${CLAIMING_DIR}/tmpout.txt"
- if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
- echo -n "${ID}" >"${CLAIMING_DIR}/claimed_id" || (echo >&2 "Claiming failed"; set -e; exit 2)
- fi
- rm -f "${CLAIMING_DIR}/token" || (echo >&2 "Claiming failed"; set -e; exit 2)
- # Rewrite the cloud.conf on the disk
- cat > "$CLAIMING_DIR/cloud.conf" <<HERE_DOC
- [global]
- enabled = yes
- cloud base url = $URL_BASE
- HERE_DOC
- if [ "$EUID" == "0" ]; then
- chown -R "${NETDATA_USER}:${NETDATA_USER}" ${CLAIMING_DIR} || (echo >&2 "Claiming failed"; set -e; exit 2)
- fi
- if [ "${RELOAD}" == "0" ] ; then
- exit $EXIT_CODE
- fi
- if [ -z "${PROXY}" ]; then
- PROXYMSG=""
- else
- PROXYMSG="You have attempted to claim this node through a proxy - please update your the proxy setting in your netdata.conf to ${PROXY}. "
- fi
- # Update cloud.conf in the agent memory
- @sbindir_POST@/netdatacli write-config 'cloud|global|enabled|yes' && \
- @sbindir_POST@/netdatacli write-config "cloud|global|cloud base url|$URL_BASE" && \
- @sbindir_POST@/netdatacli reload-claiming-state && \
- if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
- echo >&2 "${PROXYMSG}Node was successfully claimed."
- else
- echo >&2 "The agent cloud base url is set to the url provided."
- echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored."
- echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
- fi && exit $EXIT_CODE
- if [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then
- echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored."
- echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
- exit $EXIT_CODE
- fi
- echo >&2 "${PROXYMSG}The claim was successful but the agent could not be notified ($?)- it requires a restart to connect to the cloud."
- [ "$NETDATA_RUNNING" -eq 0 ] && exit 0 || exit 5
- fi
- echo >&2 "Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
- if [ "${VERBOSE}" == 1 ]; then
- echo >&2 "Error key was:\"${ERROR_KEYS[$EXIT_CODE]}\""
- fi
- rm -f "${CLAIMING_DIR}/tmpout.txt"
- exit $EXIT_CODE
|