check-urls.sh 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/bin/env bash
  2. #
  3. # Check for broken URLs in Marlin files
  4. #
  5. [ -d "Marlin/src" ] || { echo "Run this script from the Marlin project folder!" ; exit 1 ; }
  6. UA="Mozilla/5.0 (Linux; Android 10; SM-G996U Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36"
  7. UTMP=$(mktemp)
  8. #echo "[debug 1] UTMP = ${UTMP}"
  9. echo "Gathering URLs. Please wait..."
  10. grep -R -E "https?:\/\/[^ \"''\(\)\<\>]+" . 2>/dev/null \
  11. | grep -v "Binary file" \
  12. | sed -E "s/\/https?:\/\//\//" \
  13. | sed -E 's/.*\((https?:\/\/[^ ]+)\).*$/\1/' \
  14. | sed -E 's/.*\[(https?:\/\/[^ ]+)\].*$/\1/' \
  15. | sed -E 's/.*(https?:\/\/[^ \"''()<>]+).*/\1/' \
  16. | grep -vE "(127\.0\.0\.1|localhost|myserver|doc\.qt\.io|docs\.google\.com|raw\.githubusercontent\.com|[\${}])" \
  17. | sed -E 's/]$//' | sed -E "s/'$//" | sed -E "s/[#.',]+$//" \
  18. | sed -E 's/youtu\.be\/(.+)/www.youtube.com\/watch?v=\1/' \
  19. | sort -u -R \
  20. >"$UTMP"
  21. #echo "[debug 2] link count = $(wc -l $UTMP)"
  22. ISERR=
  23. declare -a BADURLS
  24. while IFS= read -r URL
  25. do
  26. #echo -n "Checking ${URL} ... "
  27. HEAD=$(curl -s -I -A "${UA}" --request GET "${URL}" 2>/dev/null) ; HERR=$?
  28. if [[ $HERR > 0 ]]; then
  29. # Error 92 may be domain blocking curl / wget
  30. [[ $HERR == 92 ]] || { ISERR=1 ; BADURLS+=($URL) ; }
  31. echo "[FAIL ($HERR)]"
  32. else
  33. HEAD1=$(echo $HEAD | head -n1)
  34. EMSG=
  35. WHERE=
  36. case "$HEAD1" in
  37. *" 301"*) EMSG="[Moved Permanently]" ; WHERE=1 ;;
  38. *" 302"*) EMSG="[Moved Temporarily]" ; WHERE=1 ;;
  39. *" 303"*) echo "[See Other]" ;;
  40. *" 400"*) EMSG="[Invalid Request]" ;;
  41. *" 403"*) EMSG="[Forbidden]" ;;
  42. *" 404"*) EMSG="[Not Found]" ;;
  43. *" 503"*) EMSG="[Unavailable]" ;;
  44. *" 200"*) echo "[ OK ]" ;;
  45. *) EMSG="[Other Err]" ;;
  46. esac
  47. if [[ -n $EMSG ]]; then
  48. if [[ -n $WHERE ]]; then
  49. [[ ${HEAD,,} =~ "location: " ]] && EMSG+=" to $(echo "$HEAD" | grep -i "location: " | sed -E 's/location: (.*)/\1/')"
  50. else
  51. ISERR=1 ; BADURLS+=($URL)
  52. fi
  53. echo $EMSG
  54. fi
  55. fi
  56. done <"$UTMP"
  57. #echo "[debug 3]"
  58. if [[ -n $ISERR ]]; then
  59. # Join bad URLs into a bulleted markdown list
  60. printf -v BADSTR -- "- %s\n" "${BADURLS[@]}"
  61. echo -e "\nURL Checker reports one or more URLs could not be reached:\n${BADSTR}"
  62. exit 1
  63. fi
  64. echo -e "\nURL Check Passed."
  65. exit 0