|
@@ -133,6 +133,22 @@ def expose_url(url):
|
|
|
return url
|
|
|
|
|
|
|
|
|
+def get_domain_key(url):
|
|
|
+ domain = urlparse(url).netloc
|
|
|
+ return f"source:blacklist:v2:{md5_text(domain).hexdigest()}"
|
|
|
+
|
|
|
+
|
|
|
+def lock_domain(url, error=None):
|
|
|
+ error = dict(error or {})
|
|
|
+ if error.get("type") is None:
|
|
|
+ error["type"] = EventError.UNKNOWN_ERROR
|
|
|
+ if error.get("url") is None:
|
|
|
+ error["url"] = expose_url(url)
|
|
|
+ domain_key = get_domain_key(url)
|
|
|
+ cache.set(domain_key, error, 300)
|
|
|
+ logger.warning("source.disabled", extra=error)
|
|
|
+
|
|
|
+
|
|
|
def fetch_file(
|
|
|
url,
|
|
|
domain_lock_enabled=True,
|
|
@@ -148,8 +164,7 @@ def fetch_file(
|
|
|
"""
|
|
|
# lock down domains that are problematic
|
|
|
if domain_lock_enabled:
|
|
|
- domain = urlparse(url).netloc
|
|
|
- domain_key = f"source:blacklist:v2:{md5_text(domain).hexdigest()}"
|
|
|
+ domain_key = get_domain_key(url)
|
|
|
domain_result = cache.get(domain_key)
|
|
|
if domain_result:
|
|
|
domain_result["url"] = url
|
|
@@ -201,19 +216,17 @@ def fetch_file(
|
|
|
except Exception as exc:
|
|
|
logger.debug("Unable to fetch %r", url, exc_info=True)
|
|
|
if isinstance(exc, RestrictedIPAddress):
|
|
|
- error = {"type": EventError.RESTRICTED_IP, "url": expose_url(url)}
|
|
|
+ error = {"type": EventError.RESTRICTED_IP}
|
|
|
elif isinstance(exc, SuspiciousOperation):
|
|
|
- error = {"type": EventError.SECURITY_VIOLATION, "url": expose_url(url)}
|
|
|
+ error = {"type": EventError.SECURITY_VIOLATION}
|
|
|
elif isinstance(exc, (Timeout, ReadTimeout)):
|
|
|
error = {
|
|
|
"type": EventError.FETCH_TIMEOUT,
|
|
|
- "url": expose_url(url),
|
|
|
"timeout": settings.SENTRY_SOURCE_FETCH_TIMEOUT,
|
|
|
}
|
|
|
elif isinstance(exc, OverflowError):
|
|
|
error = {
|
|
|
"type": EventError.FETCH_TOO_LARGE,
|
|
|
- "url": expose_url(url),
|
|
|
# We want size in megabytes to format nicely
|
|
|
"max_size": float(settings.SENTRY_SOURCE_FETCH_MAX_SIZE) / 1024 / 1024,
|
|
|
}
|
|
@@ -221,16 +234,14 @@ def fetch_file(
|
|
|
error = {
|
|
|
"type": EventError.FETCH_GENERIC_ERROR,
|
|
|
"value": str(type(exc)),
|
|
|
- "url": expose_url(url),
|
|
|
}
|
|
|
else:
|
|
|
logger.exception(str(exc))
|
|
|
- error = {"type": EventError.UNKNOWN_ERROR, "url": expose_url(url)}
|
|
|
+ error = {"type": EventError.UNKNOWN_ERROR}
|
|
|
|
|
|
# TODO(dcramer): we want to be less aggressive on disabling domains
|
|
|
if domain_lock_enabled:
|
|
|
- cache.set(domain_key, error or "", 300)
|
|
|
- logger.warning("source.disabled", extra=error)
|
|
|
+ lock_domain(url, error)
|
|
|
raise CannotFetch(error)
|
|
|
|
|
|
headers = {k.lower(): v for k, v in response.headers.items()}
|