1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454 |
- # -*- test-case-name: twisted.web.test.test_http -*-
- # Copyright (c) Twisted Matrix Laboratories.
- # See LICENSE for details.
- """
- HyperText Transfer Protocol implementation.
- This is the basic server-side protocol implementation used by the Twisted
- Web server. It can parse HTTP 1.0 requests and supports many HTTP 1.1
- features as well. Additionally, some functionality implemented here is
- also useful for HTTP clients (such as the chunked encoding parser).
- @var CACHED: A marker value to be returned from cache-related request methods
- to indicate to the caller that a cached response will be usable and no
- response body should be generated.
- @var FOUND: An HTTP response code indicating a temporary redirect.
- @var NOT_MODIFIED: An HTTP response code indicating that a requested
- pre-condition (for example, the condition represented by an
- I{If-Modified-Since} header is present in the request) has succeeded. This
- indicates a response body cached by the client can be used.
- @var PRECONDITION_FAILED: An HTTP response code indicating that a requested
- pre-condition (for example, the condition represented by an I{If-None-Match}
- header is present in the request) has failed. This should typically
- indicate that the server has not taken the requested action.
- @var maxChunkSizeLineLength: Maximum allowable length of the CRLF-terminated
- line that indicates the size of a chunk and the extensions associated with
- it, as in the HTTP 1.1 chunked I{Transfer-Encoding} (RFC 7230 section 4.1).
- This limits how much data may be buffered when decoding the line.
- """
- from __future__ import annotations
- __all__ = [
- "SWITCHING",
- "OK",
- "CREATED",
- "ACCEPTED",
- "NON_AUTHORITATIVE_INFORMATION",
- "NO_CONTENT",
- "RESET_CONTENT",
- "PARTIAL_CONTENT",
- "MULTI_STATUS",
- "MULTIPLE_CHOICE",
- "MOVED_PERMANENTLY",
- "FOUND",
- "SEE_OTHER",
- "NOT_MODIFIED",
- "USE_PROXY",
- "TEMPORARY_REDIRECT",
- "PERMANENT_REDIRECT",
- "BAD_REQUEST",
- "UNAUTHORIZED",
- "PAYMENT_REQUIRED",
- "FORBIDDEN",
- "NOT_FOUND",
- "NOT_ALLOWED",
- "NOT_ACCEPTABLE",
- "PROXY_AUTH_REQUIRED",
- "REQUEST_TIMEOUT",
- "CONFLICT",
- "GONE",
- "LENGTH_REQUIRED",
- "PRECONDITION_FAILED",
- "REQUEST_ENTITY_TOO_LARGE",
- "REQUEST_URI_TOO_LONG",
- "UNSUPPORTED_MEDIA_TYPE",
- "REQUESTED_RANGE_NOT_SATISFIABLE",
- "EXPECTATION_FAILED",
- "IM_A_TEAPOT",
- "INTERNAL_SERVER_ERROR",
- "NOT_IMPLEMENTED",
- "BAD_GATEWAY",
- "SERVICE_UNAVAILABLE",
- "GATEWAY_TIMEOUT",
- "HTTP_VERSION_NOT_SUPPORTED",
- "INSUFFICIENT_STORAGE_SPACE",
- "NOT_EXTENDED",
- "RESPONSES",
- "CACHED",
- "urlparse",
- "parse_qs",
- "datetimeToString",
- "datetimeToLogString",
- "timegm",
- "stringToDatetime",
- "toChunk",
- "fromChunk",
- "parseContentRange",
- "StringTransport",
- "HTTPClient",
- "NO_BODY_CODES",
- "Request",
- "PotentialDataLoss",
- "HTTPChannel",
- "HTTPFactory",
- ]
- import base64
- import binascii
- import calendar
- import math
- import os
- import re
- import tempfile
- import time
- import warnings
- from email import message_from_bytes
- from email.message import EmailMessage, Message
- from io import BufferedIOBase, BytesIO, TextIOWrapper
- from typing import (
- AnyStr,
- Callable,
- Dict,
- List,
- Optional,
- Protocol as TypingProtocol,
- Tuple,
- )
- from urllib.parse import (
- ParseResultBytes,
- unquote_to_bytes as unquote,
- urlparse as _urlparse,
- )
- from zope.interface import Attribute, Interface, implementer, provider
- from incremental import Version
- from twisted.internet import address, interfaces, protocol
- from twisted.internet._producer_helpers import _PullToPush
- from twisted.internet.defer import Deferred
- from twisted.internet.interfaces import IAddress, IDelayedCall, IProtocol, IReactorTime
- from twisted.internet.protocol import Protocol
- from twisted.logger import Logger
- from twisted.protocols import basic, policies
- from twisted.python import log
- from twisted.python.compat import nativeString, networkString
- from twisted.python.components import proxyForInterface
- from twisted.python.deprecate import deprecated, deprecatedModuleAttribute
- from twisted.python.failure import Failure
- from twisted.web._responses import (
- ACCEPTED,
- BAD_GATEWAY,
- BAD_REQUEST,
- CONFLICT,
- CREATED,
- EXPECTATION_FAILED,
- FORBIDDEN,
- FOUND,
- GATEWAY_TIMEOUT,
- GONE,
- HTTP_VERSION_NOT_SUPPORTED,
- IM_A_TEAPOT,
- INSUFFICIENT_STORAGE_SPACE,
- INTERNAL_SERVER_ERROR,
- LENGTH_REQUIRED,
- MOVED_PERMANENTLY,
- MULTI_STATUS,
- MULTIPLE_CHOICE,
- NO_CONTENT,
- NON_AUTHORITATIVE_INFORMATION,
- NOT_ACCEPTABLE,
- NOT_ALLOWED,
- NOT_EXTENDED,
- NOT_FOUND,
- NOT_IMPLEMENTED,
- NOT_MODIFIED,
- OK,
- PARTIAL_CONTENT,
- PAYMENT_REQUIRED,
- PERMANENT_REDIRECT,
- PRECONDITION_FAILED,
- PROXY_AUTH_REQUIRED,
- REQUEST_ENTITY_TOO_LARGE,
- REQUEST_TIMEOUT,
- REQUEST_URI_TOO_LONG,
- REQUESTED_RANGE_NOT_SATISFIABLE,
- RESET_CONTENT,
- RESPONSES,
- SEE_OTHER,
- SERVICE_UNAVAILABLE,
- SWITCHING,
- TEMPORARY_REDIRECT,
- UNAUTHORIZED,
- UNSUPPORTED_MEDIA_TYPE,
- USE_PROXY,
- )
- from twisted.web.http_headers import Headers, _sanitizeLinearWhitespace
- from twisted.web.iweb import IAccessLogFormatter, INonQueuedRequestFactory, IRequest
- try:
- from twisted.web._http2 import H2Connection
- H2_ENABLED = True
- except ImportError:
- H2_ENABLED = False
- # A common request timeout -- 1 minute. This is roughly what nginx uses, and
- # so it seems to be a good choice for us too.
- _REQUEST_TIMEOUT = 1 * 60
- protocol_version = "HTTP/1.1"
- CACHED = """Magic constant returned by http.Request methods to set cache
- validation headers when the request is conditional and the value fails
- the condition."""
- # backwards compatibility
- responses = RESPONSES
- # datetime parsing and formatting
- weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
- monthname = [
- None,
- "Jan",
- "Feb",
- "Mar",
- "Apr",
- "May",
- "Jun",
- "Jul",
- "Aug",
- "Sep",
- "Oct",
- "Nov",
- "Dec",
- ]
- weekdayname_lower = [name.lower() for name in weekdayname]
- monthname_lower = [name and name.lower() for name in monthname]
- def _parseRequestLine(line: bytes) -> tuple[bytes, bytes, bytes]:
- """
- Parse an HTTP request line, which looks like:
- GET /foo/bar HTTP/1.1
- This function attempts to validate the well-formedness of
- the line. RFC 9112 section 3 provides this ABNF:
- request-line = method SP request-target SP HTTP-version
- We allow any method that is a valid token:
- method = token
- token = 1*tchar
- tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
- / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
- / DIGIT / ALPHA
- We allow any non-empty request-target that contains only printable
- ASCII characters (no whitespace).
- The RFC defines HTTP-version like this:
- HTTP-version = HTTP-name "/" DIGIT "." DIGIT
- HTTP-name = %s"HTTP"
- However, this function is more strict than the RFC: we only allow
- HTTP versions of 1.0 and 1.1, as later versions of HTTP don't use
- a request line.
- @returns: C{(method, request, version)} three-tuple
- @raises: L{ValueError} when malformed
- """
- method, request, version = line.split(b" ")
- if not _istoken(method):
- raise ValueError("Invalid method")
- for c in request:
- if c <= 32 or c > 176:
- raise ValueError("Invalid request-target")
- if request == b"":
- raise ValueError("Empty request-target")
- if version != b"HTTP/1.1" and version != b"HTTP/1.0":
- raise ValueError("Invalid version")
- return method, request, version
- def _parseContentType(line: bytes) -> bytes:
- """
- Parse the Content-Type header.
- """
- msg = EmailMessage()
- msg["content-type"] = line.decode("charmap")
- key = msg.get_content_type()
- encodedKey = key.encode("charmap")
- return encodedKey
- class _MultiPartParseException(Exception):
- """
- Failed to parse the multipart/form-data payload.
- """
- def _getMultiPartArgs(content: bytes, ctype: bytes) -> dict[bytes, list[bytes]]:
- """
- Parse the content of a multipart/form-data request.
- """
- result = {}
- multiPartHeaders = b"MIME-Version: 1.0\r\n" + b"Content-Type: " + ctype + b"\r\n"
- msg = message_from_bytes(multiPartHeaders + content)
- if not msg.is_multipart():
- raise _MultiPartParseException("Not a multipart.")
- part: Message
- # "per Python docs, a list of Message objects when is_multipart() is True,
- # or a string when is_multipart() is False"
- for part in msg.get_payload(): # type:ignore[assignment]
- name: str | None = part.get_param(
- "name", header="content-disposition"
- ) # type:ignore[assignment]
- if not name:
- continue
- payload: bytes = part.get_payload(decode=True) # type:ignore[assignment]
- result[name.encode("utf8")] = [payload]
- return result
- def urlparse(url):
- """
- Parse an URL into six components.
- This is similar to C{urlparse.urlparse}, but rejects C{str} input
- and always produces C{bytes} output.
- @type url: C{bytes}
- @raise TypeError: The given url was a C{str} string instead of a
- C{bytes}.
- @return: The scheme, net location, path, params, query string, and fragment
- of the URL - all as C{bytes}.
- @rtype: C{ParseResultBytes}
- """
- if isinstance(url, str):
- raise TypeError("url must be bytes, not unicode")
- scheme, netloc, path, params, query, fragment = _urlparse(url)
- if isinstance(scheme, str):
- scheme = scheme.encode("ascii")
- netloc = netloc.encode("ascii")
- path = path.encode("ascii")
- query = query.encode("ascii")
- fragment = fragment.encode("ascii")
- return ParseResultBytes(scheme, netloc, path, params, query, fragment)
- def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
- """
- Like C{cgi.parse_qs}, but with support for parsing byte strings on Python 3.
- This was created to help with Python 2 to Python 3 migration.
- Consider using L{urllib.parse.parse_qs}.
- @type qs: C{bytes}
- """
- d = {}
- items = [s2 for s1 in qs.split(b"&") for s2 in s1.split(b";")]
- for item in items:
- try:
- k, v = item.split(b"=", 1)
- except ValueError:
- if strict_parsing:
- raise
- continue
- if v or keep_blank_values:
- k = unquote(k.replace(b"+", b" "))
- v = unquote(v.replace(b"+", b" "))
- if k in d:
- d[k].append(v)
- else:
- d[k] = [v]
- return d
- def datetimeToString(msSinceEpoch=None):
- """
- Convert seconds since epoch to HTTP datetime string.
- @rtype: C{bytes}
- """
- if msSinceEpoch == None:
- msSinceEpoch = time.time()
- year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
- s = networkString(
- "%s, %02d %3s %4d %02d:%02d:%02d GMT"
- % (weekdayname[wd], day, monthname[month], year, hh, mm, ss)
- )
- return s
- def datetimeToLogString(msSinceEpoch=None):
- """
- Convert seconds since epoch to log datetime string.
- @rtype: C{str}
- """
- if msSinceEpoch == None:
- msSinceEpoch = time.time()
- year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
- s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % (
- day,
- monthname[month],
- year,
- hh,
- mm,
- ss,
- )
- return s
- def timegm(year, month, day, hour, minute, second):
- """
- Convert time tuple in GMT to seconds since epoch, GMT
- """
- EPOCH = 1970
- if year < EPOCH:
- raise ValueError("Years prior to %d not supported" % (EPOCH,))
- assert 1 <= month <= 12
- days = 365 * (year - EPOCH) + calendar.leapdays(EPOCH, year)
- for i in range(1, month):
- days = days + calendar.mdays[i]
- if month > 2 and calendar.isleap(year):
- days = days + 1
- days = days + day - 1
- hours = days * 24 + hour
- minutes = hours * 60 + minute
- seconds = minutes * 60 + second
- return seconds
- def stringToDatetime(dateString):
- """
- Convert an HTTP date string (one of three formats) to seconds since epoch.
- @type dateString: C{bytes}
- """
- parts = dateString.decode("ascii").split()
- if not parts[0][0:3].lower() in weekdayname_lower:
- # Weekday is stupid. Might have been omitted.
- try:
- return stringToDatetime(b"Sun, " + dateString)
- except ValueError:
- # Guess not.
- pass
- partlen = len(parts)
- if (partlen == 5 or partlen == 6) and parts[1].isdigit():
- # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT
- # (Note: "GMT" is literal, not a variable timezone)
- # (also handles without "GMT")
- # This is the normal format
- day = parts[1]
- month = parts[2]
- year = parts[3]
- time = parts[4]
- elif (partlen == 3 or partlen == 4) and parts[1].find("-") != -1:
- # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT
- # (Note: "GMT" is literal, not a variable timezone)
- # (also handles without without "GMT")
- # Two digit year, yucko.
- day, month, year = parts[1].split("-")
- time = parts[2]
- year = int(year)
- if year < 69:
- year = year + 2000
- elif year < 100:
- year = year + 1900
- elif len(parts) == 5:
- # 3rd date format: Sun Nov 6 08:49:37 1994
- # ANSI C asctime() format.
- day = parts[2]
- month = parts[1]
- year = parts[4]
- time = parts[3]
- else:
- raise ValueError("Unknown datetime format %r" % dateString)
- day = int(day)
- month = int(monthname_lower.index(month.lower()))
- year = int(year)
- hour, min, sec = map(int, time.split(":"))
- return int(timegm(year, month, day, hour, min, sec))
- def toChunk(data):
- """
- Convert string to a chunk.
- @type data: C{bytes}
- @returns: a tuple of C{bytes} representing the chunked encoding of data
- """
- return (networkString(f"{len(data):x}"), b"\r\n", data, b"\r\n")
- def _istoken(b: bytes) -> bool:
- """
- Is the string a token per RFC 9110 section 5.6.2?
- """
- for c in b:
- if c not in (
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" # ALPHA
- b"0123456789" # DIGIT
- b"!#$%^'*+-.^_`|~"
- ):
- return False
- return b != b""
- def _ishexdigits(b: bytes) -> bool:
- """
- Is the string case-insensitively hexidecimal?
- It must be composed of one or more characters in the ranges a-f, A-F
- and 0-9.
- """
- for c in b:
- if c not in b"0123456789abcdefABCDEF":
- return False
- return b != b""
- def _hexint(b: bytes) -> int:
- """
- Decode a hexadecimal integer.
- Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has
- a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when
- parsing network protocols.
- """
- if not _ishexdigits(b):
- raise ValueError(b)
- return int(b, 16)
- def fromChunk(data: bytes) -> Tuple[bytes, bytes]:
- """
- Convert chunk to string.
- Note that this function is not specification compliant: it doesn't handle
- chunk extensions.
- @type data: C{bytes}
- @return: tuple of (result, remaining) - both C{bytes}.
- @raise ValueError: If the given data is not a correctly formatted chunked
- byte string.
- """
- prefix, rest = data.split(b"\r\n", 1)
- length = _hexint(prefix)
- if length < 0:
- raise ValueError("Chunk length must be >= 0, not %d" % (length,))
- if rest[length : length + 2] != b"\r\n":
- raise ValueError("chunk must end with CRLF")
- return rest[:length], rest[length + 2 :]
- def parseContentRange(header):
- """
- Parse a content-range header into (start, end, realLength).
- realLength might be None if real length is not known ('*').
- """
- kind, other = header.strip().split()
- if kind.lower() != "bytes":
- raise ValueError("a range of type %r is not supported")
- startend, realLength = other.split("/")
- start, end = map(int, startend.split("-"))
- if realLength == "*":
- realLength = None
- else:
- realLength = int(realLength)
- return (start, end, realLength)
- class _IDeprecatedHTTPChannelToRequestInterface(Interface):
- """
- The interface L{HTTPChannel} expects of L{Request}.
- """
- requestHeaders = Attribute(
- "A L{http_headers.Headers} instance giving all received HTTP request "
- "headers."
- )
- responseHeaders = Attribute(
- "A L{http_headers.Headers} instance holding all HTTP response "
- "headers to be sent."
- )
- def connectionLost(reason):
- """
- The underlying connection has been lost.
- @param reason: A failure instance indicating the reason why
- the connection was lost.
- @type reason: L{twisted.python.failure.Failure}
- """
- def gotLength(length):
- """
- Called when L{HTTPChannel} has determined the length, if any,
- of the incoming request's body.
- @param length: The length of the request's body.
- @type length: L{int} if the request declares its body's length
- and L{None} if it does not.
- """
- def handleContentChunk(data):
- """
- Deliver a received chunk of body data to the request. Note
- this does not imply chunked transfer encoding.
- @param data: The received chunk.
- @type data: L{bytes}
- """
- def parseCookies():
- """
- Parse the request's cookies out of received headers.
- """
- def requestReceived(command, path, version):
- """
- Called when the entire request, including its body, has been
- received.
- @param command: The request's HTTP command.
- @type command: L{bytes}
- @param path: The request's path. Note: this is actually what
- RFC7320 calls the URI.
- @type path: L{bytes}
- @param version: The request's HTTP version.
- @type version: L{bytes}
- """
- def __eq__(other: object) -> bool:
- """
- Determines if two requests are the same object.
- @param other: Another object whose identity will be compared
- to this instance's.
- @return: L{True} when the two are the same object and L{False}
- when not.
- """
- def __ne__(other: object) -> bool:
- """
- Determines if two requests are not the same object.
- @param other: Another object whose identity will be compared
- to this instance's.
- @return: L{True} when the two are not the same object and
- L{False} when they are.
- """
- def __hash__():
- """
- Generate a hash value for the request.
- @return: The request's hash value.
- @rtype: L{int}
- """
- class StringTransport:
- """
- I am a BytesIO wrapper that conforms for the transport API. I support
- the `writeSequence' method.
- """
- def __init__(self):
- self.s = BytesIO()
- def writeSequence(self, seq):
- self.s.write(b"".join(seq))
- def __getattr__(self, attr):
- return getattr(self.__dict__["s"], attr)
- class HTTPClient(basic.LineReceiver):
- """
- A client for HTTP 1.0.
- Notes:
- You probably want to send a 'Host' header with the name of the site you're
- connecting to, in order to not break name based virtual hosting.
- @ivar length: The length of the request body in bytes.
- @type length: C{int}
- @ivar firstLine: Are we waiting for the first header line?
- @type firstLine: C{bool}
- @ivar __buffer: The buffer that stores the response to the HTTP request.
- @type __buffer: A C{BytesIO} object.
- @ivar _header: Part or all of an HTTP request header.
- @type _header: C{bytes}
- """
- length = None
- firstLine = True
- __buffer = None
- _header = b""
- def sendCommand(self, command, path):
- self.transport.writeSequence([command, b" ", path, b" HTTP/1.0\r\n"])
- def sendHeader(self, name, value):
- if not isinstance(value, bytes):
- # XXX Deprecate this case
- value = networkString(str(value))
- santizedName = _sanitizeLinearWhitespace(name)
- santizedValue = _sanitizeLinearWhitespace(value)
- self.transport.writeSequence([santizedName, b": ", santizedValue, b"\r\n"])
- def endHeaders(self):
- self.transport.write(b"\r\n")
- def extractHeader(self, header):
- """
- Given a complete HTTP header, extract the field name and value and
- process the header.
- @param header: a complete HTTP request header of the form
- 'field-name: value'.
- @type header: C{bytes}
- """
- key, val = header.split(b":", 1)
- val = val.lstrip()
- self.handleHeader(key, val)
- if key.lower() == b"content-length":
- self.length = int(val)
- def lineReceived(self, line):
- """
- Parse the status line and headers for an HTTP request.
- @param line: Part of an HTTP request header. Request bodies are parsed
- in L{HTTPClient.rawDataReceived}.
- @type line: C{bytes}
- """
- if self.firstLine:
- self.firstLine = False
- l = line.split(None, 2)
- version = l[0]
- status = l[1]
- try:
- message = l[2]
- except IndexError:
- # sometimes there is no message
- message = b""
- self.handleStatus(version, status, message)
- return
- if not line:
- if self._header != b"":
- # Only extract headers if there are any
- self.extractHeader(self._header)
- self.__buffer = BytesIO()
- self.handleEndHeaders()
- self.setRawMode()
- return
- if line.startswith(b"\t") or line.startswith(b" "):
- # This line is part of a multiline header. According to RFC 822, in
- # "unfolding" multiline headers you do not strip the leading
- # whitespace on the continuing line.
- self._header = self._header + line
- elif self._header:
- # This line starts a new header, so process the previous one.
- self.extractHeader(self._header)
- self._header = line
- else: # First header
- self._header = line
- def connectionLost(self, reason):
- self.handleResponseEnd()
- def handleResponseEnd(self):
- """
- The response has been completely received.
- This callback may be invoked more than once per request.
- """
- if self.__buffer is not None:
- b = self.__buffer.getvalue()
- self.__buffer = None
- self.handleResponse(b)
- def handleResponsePart(self, data):
- self.__buffer.write(data)
- def connectionMade(self):
- pass
- def handleStatus(self, version, status, message):
- """
- Called when the status-line is received.
- @param version: e.g. 'HTTP/1.0'
- @param status: e.g. '200'
- @type status: C{bytes}
- @param message: e.g. 'OK'
- """
- def handleHeader(self, key, val):
- """
- Called every time a header is received.
- """
- def handleEndHeaders(self):
- """
- Called when all headers have been received.
- """
- def rawDataReceived(self, data):
- if self.length is not None:
- data, rest = data[: self.length], data[self.length :]
- self.length -= len(data)
- else:
- rest = b""
- self.handleResponsePart(data)
- if self.length == 0:
- self.handleResponseEnd()
- self.setLineMode(rest)
- deprecatedModuleAttribute(
- Version("Twisted", 24, 7, 0),
- "Use twisted.web.client.Agent instead.",
- __name__,
- HTTPClient.__name__,
- )
- # response codes that must have empty bodies
- NO_BODY_CODES = (204, 304)
- # Sentinel object that detects people explicitly passing `queued` to Request.
- _QUEUED_SENTINEL = object()
- def _getContentFile(length):
- """
- Get a writeable file-like object to which request content can be written.
- """
- if length is not None and length < 100000:
- return BytesIO()
- return tempfile.TemporaryFile()
- _hostHeaderExpression = re.compile(rb"^\[?(?P<host>.*?)\]?(:\d+)?$")
- @implementer(interfaces.IConsumer, _IDeprecatedHTTPChannelToRequestInterface)
- class Request:
- """
- A HTTP request.
- Subclasses should override the process() method to determine how
- the request will be processed.
- @ivar method: The HTTP method that was used, e.g. C{b'GET'}.
- @type method: L{bytes}
- @ivar uri: The full encoded URI which was requested (including query
- arguments), e.g. C{b'/a/b%20/c?q=v'}.
- @type uri: L{bytes}
- @ivar path: The encoded path of the request URI (not including query
- arguments), e.g. C{b'/a/b%20/c'}.
- @type path: L{bytes}
- @ivar args: A mapping of decoded query argument names as L{bytes} to
- corresponding query argument values as L{list}s of L{bytes}.
- For example, for a URI with C{foo=bar&foo=baz&quux=spam}
- as its query part C{args} will be C{{b'foo': [b'bar', b'baz'],
- b'quux': [b'spam']}}.
- @type args: L{dict} of L{bytes} to L{list} of L{bytes}
- @ivar content: A file-like object giving the request body. This may be
- a file on disk, an L{io.BytesIO}, or some other type. The
- implementation is free to decide on a per-request basis.
- @type content: L{typing.BinaryIO}
- @ivar cookies: The cookies that will be sent in the response.
- @type cookies: L{list} of L{bytes}
- @type requestHeaders: L{http_headers.Headers}
- @ivar requestHeaders: All received HTTP request headers.
- @type responseHeaders: L{http_headers.Headers}
- @ivar responseHeaders: All HTTP response headers to be sent.
- @ivar notifications: A L{list} of L{Deferred}s which are waiting for
- notification that the response to this request has been finished
- (successfully or with an error). Don't use this attribute directly,
- instead use the L{Request.notifyFinish} method.
- @ivar _disconnected: A flag which is C{False} until the connection over
- which this request was received is closed and which is C{True} after
- that.
- @type _disconnected: L{bool}
- @ivar _log: A logger instance for request related messages.
- @type _log: L{twisted.logger.Logger}
- """
- producer = None
- finished = 0
- code = OK
- code_message = RESPONSES[OK]
- method = b"(no method yet)"
- clientproto = b"(no clientproto yet)"
- uri = b"(no uri yet)"
- startedWriting = 0
- chunked = 0
- sentLength = 0 # content-length of response, or total bytes sent via chunking
- etag = None
- lastModified = None
- args = None
- path = None
- content = None
- _forceSSL = 0
- _disconnected = False
- _log = Logger()
- def __init__(self, channel: HTTPChannel, queued: object = _QUEUED_SENTINEL) -> None:
- """
- @param channel: the channel we're connected to.
- @param queued: (deprecated) are we in the request queue, or can we
- start writing to the transport?
- """
- self.notifications: List[Deferred[None]] = []
- self.channel = channel
- # Cache the client and server information, we'll need this
- # later to be serialized and sent with the request so CGIs
- # will work remotely
- self.client = self.channel.getPeer()
- self.host = self.channel.getHost()
- self.requestHeaders: Headers = Headers()
- self.received_cookies: Dict[bytes, bytes] = {}
- self.responseHeaders: Headers = Headers()
- self.cookies: List[bytes] = [] # outgoing cookies
- self.transport = self.channel.transport
- if queued is _QUEUED_SENTINEL:
- queued = False
- self.queued = queued
- def _cleanup(self):
- """
- Called when have finished responding and are no longer queued.
- """
- if self.producer:
- self._log.failure(
- "",
- Failure(RuntimeError(f"Producer was not unregistered for {self.uri}")),
- )
- self.unregisterProducer()
- self.channel.requestDone(self)
- del self.channel
- if self.content is not None:
- try:
- self.content.close()
- except OSError:
- # win32 suckiness, no idea why it does this
- pass
- del self.content
- for d in self.notifications:
- d.callback(None)
- self.notifications = []
- # methods for channel - end users should not use these
- @deprecated(Version("Twisted", 16, 3, 0))
- def noLongerQueued(self):
- """
- Notify the object that it is no longer queued.
- We start writing whatever data we have to the transport, etc.
- This method is not intended for users.
- In 16.3 this method was changed to become a no-op, as L{Request}
- objects are now never queued.
- """
- pass
- def gotLength(self, length):
- """
- Called when HTTP channel got length of content in this request.
- This method is not intended for users.
- @param length: The length of the request body, as indicated by the
- request headers. L{None} if the request headers do not indicate a
- length.
- """
- self.content = _getContentFile(length)
- def parseCookies(self):
- """
- Parse cookie headers.
- This method is not intended for users.
- """
- cookieheaders = self.requestHeaders.getRawHeaders(b"cookie")
- if cookieheaders is None:
- return
- for cookietxt in cookieheaders:
- if cookietxt:
- for cook in cookietxt.split(b";"):
- cook = cook.lstrip()
- try:
- k, v = cook.split(b"=", 1)
- self.received_cookies[k] = v
- except ValueError:
- pass
- def handleContentChunk(self, data):
- """
- Write a chunk of data.
- This method is not intended for users.
- """
- self.content.write(data)
- def requestReceived(self, command, path, version):
- """
- Called by channel when all data has been received.
- This method is not intended for users.
- @type command: C{bytes}
- @param command: The HTTP verb of this request. This has the case
- supplied by the client (eg, it maybe "get" rather than "GET").
- @type path: C{bytes}
- @param path: The URI of this request.
- @type version: C{bytes}
- @param version: The HTTP version of this request.
- """
- clength = self.content.tell()
- self.content.seek(0, 0)
- self.args = {}
- self.method, self.uri = command, path
- self.clientproto = version
- x = self.uri.split(b"?", 1)
- if len(x) == 1:
- self.path = self.uri
- else:
- self.path, argstring = x
- self.args = parse_qs(argstring, 1)
- # Argument processing
- args = self.args
- ctype = self.requestHeaders.getRawHeaders(b"content-type")
- if ctype is not None:
- ctype = ctype[0]
- if self.method == b"POST" and ctype and clength:
- mfd = b"multipart/form-data"
- key = _parseContentType(ctype)
- if key == b"application/x-www-form-urlencoded":
- args.update(parse_qs(self.content.read(), 1))
- elif key == mfd:
- try:
- self.content.seek(0)
- content = self.content.read()
- self.args.update(_getMultiPartArgs(content, ctype))
- except _MultiPartParseException:
- # It was a bad request.
- self.channel._respondToBadRequestAndDisconnect()
- return
- self.content.seek(0, 0)
- self.process()
- def __repr__(self) -> str:
- """
- Return a string description of the request including such information
- as the request method and request URI.
- @return: A string loosely describing this L{Request} object.
- @rtype: L{str}
- """
- return "<{} at 0x{:x} method={} uri={} clientproto={}>".format(
- self.__class__.__name__,
- id(self),
- nativeString(self.method),
- nativeString(self.uri),
- nativeString(self.clientproto),
- )
- def process(self):
- """
- Override in subclasses.
- This method is not intended for users.
- """
- pass
- # consumer interface
- def registerProducer(self, producer, streaming):
- """
- Register a producer.
- """
- if self.producer:
- raise ValueError(
- "registering producer %s before previous one (%s) was "
- "unregistered" % (producer, self.producer)
- )
- self.streamingProducer = streaming
- self.producer = producer
- self.channel.registerProducer(producer, streaming)
- def unregisterProducer(self):
- """
- Unregister the producer.
- """
- self.channel.unregisterProducer()
- self.producer = None
- # The following is the public interface that people should be
- # writing to.
- def getHeader(self, key: AnyStr) -> Optional[AnyStr]:
- """
- Get an HTTP request header.
- @type key: C{bytes} or C{str}
- @param key: The name of the header to get the value of.
- @rtype: C{bytes} or C{str} or L{None}
- @return: The value of the specified header, or L{None} if that header
- was not present in the request. The string type of the result
- matches the type of C{key}.
- """
- value = self.requestHeaders.getRawHeaders(key)
- if value is not None:
- return value[-1]
- return None
- def getCookie(self, key):
- """
- Get a cookie that was sent from the network.
- @type key: C{bytes}
- @param key: The name of the cookie to get.
- @rtype: C{bytes} or C{None}
- @returns: The value of the specified cookie, or L{None} if that cookie
- was not present in the request.
- """
- return self.received_cookies.get(key)
- def notifyFinish(self) -> Deferred[None]:
- """
- Notify when the response to this request has finished.
- @note: There are some caveats around the reliability of the delivery of
- this notification.
- 1. If this L{Request}'s channel is paused, the notification
- will not be delivered. This can happen in one of two ways;
- either you can call C{request.transport.pauseProducing}
- yourself, or,
- 2. In order to deliver this notification promptly when a client
- disconnects, the reactor must continue reading from the
- transport, so that it can tell when the underlying network
- connection has gone away. Twisted Web will only keep
- reading up until a finite (small) maximum buffer size before
- it gives up and pauses the transport itself. If this
- occurs, you will not discover that the connection has gone
- away until a timeout fires or until the application attempts
- to send some data via L{Request.write}.
- 3. It is theoretically impossible to distinguish between
- successfully I{sending} a response and the peer successfully
- I{receiving} it. There are several networking edge cases
- where the L{Deferred}s returned by C{notifyFinish} will
- indicate success, but the data will never be received.
- There are also edge cases where the connection will appear
- to fail, but in reality the response was delivered. As a
- result, the information provided by the result of the
- L{Deferred}s returned by this method should be treated as a
- guess; do not make critical decisions in your applications
- based upon it.
- @rtype: L{Deferred}
- @return: A L{Deferred} which will be triggered when the request is
- finished -- with a L{None} value if the request finishes
- successfully or with an error if the request is interrupted by an
- error (for example, the client closing the connection prematurely).
- """
- self.notifications.append(Deferred())
- return self.notifications[-1]
- def finish(self):
- """
- Indicate that all response data has been written to this L{Request}.
- """
- if self._disconnected:
- raise RuntimeError(
- "Request.finish called on a request after its connection was lost; "
- "use Request.notifyFinish to keep track of this."
- )
- if self.finished:
- warnings.warn("Warning! request.finish called twice.", stacklevel=2)
- return
- if not self.startedWriting:
- # write headers
- self.write(b"")
- if self.chunked:
- # write last chunk and closing CRLF
- self.channel.write(b"0\r\n\r\n")
- # log request
- if hasattr(self.channel, "factory") and self.channel.factory is not None:
- self.channel.factory.log(self)
- self.finished = 1
- if not self.queued:
- self._cleanup()
- def write(self, data):
- """
- Write some data as a result of an HTTP request. The first
- time this is called, it writes out response data.
- @type data: C{bytes}
- @param data: Some bytes to be sent as part of the response body.
- """
- if self.finished:
- raise RuntimeError(
- "Request.write called on a request after " "Request.finish was called."
- )
- if self._disconnected:
- # Don't attempt to write any data to a disconnected client.
- # The RuntimeError exception will be thrown as usual when
- # request.finish is called
- return
- if not self.startedWriting:
- self.startedWriting = 1
- version = self.clientproto
- code = b"%d" % (self.code,)
- reason = self.code_message
- # if we don't have a content length, we send data in
- # chunked mode, so that we can support pipelining in
- # persistent connections.
- if (
- (version == b"HTTP/1.1")
- and (self.responseHeaders.getRawHeaders(b"content-length") is None)
- and self.method != b"HEAD"
- and self.code not in NO_BODY_CODES
- ):
- self.responseHeaders.setRawHeaders("Transfer-Encoding", [b"chunked"])
- self.chunked = 1
- if self.lastModified is not None:
- if self.responseHeaders.hasHeader(b"last-modified"):
- self._log.info(
- "Warning: last-modified specified both in"
- " header list and lastModified attribute."
- )
- else:
- self.responseHeaders.setRawHeaders(
- b"last-modified", [datetimeToString(self.lastModified)]
- )
- if self.etag is not None:
- self.responseHeaders.setRawHeaders(b"ETag", [self.etag])
- if self.cookies:
- self.responseHeaders.setRawHeaders(b"Set-Cookie", self.cookies)
- self.channel.writeHeaders(version, code, reason, self.responseHeaders)
- # if this is a "HEAD" request, we shouldn't return any data
- if self.method == b"HEAD":
- self.write = lambda data: None
- return
- # for certain result codes, we should never return any data
- if self.code in NO_BODY_CODES:
- self.write = lambda data: None
- return
- self.sentLength = self.sentLength + len(data)
- if data:
- if self.chunked:
- self.channel.writeSequence(toChunk(data))
- else:
- self.channel.write(data)
- def addCookie(
- self,
- k,
- v,
- expires=None,
- domain=None,
- path=None,
- max_age=None,
- comment=None,
- secure=None,
- httpOnly=False,
- sameSite=None,
- ):
- """
- Set an outgoing HTTP cookie.
- In general, you should consider using sessions instead of cookies, see
- L{twisted.web.server.Request.getSession} and the
- L{twisted.web.server.Session} class for details.
- @param k: cookie name
- @type k: L{bytes} or L{str}
- @param v: cookie value
- @type v: L{bytes} or L{str}
- @param expires: cookie expire attribute value in
- "Wdy, DD Mon YYYY HH:MM:SS GMT" format
- @type expires: L{bytes} or L{str}
- @param domain: cookie domain
- @type domain: L{bytes} or L{str}
- @param path: cookie path
- @type path: L{bytes} or L{str}
- @param max_age: cookie expiration in seconds from reception
- @type max_age: L{bytes} or L{str}
- @param comment: cookie comment
- @type comment: L{bytes} or L{str}
- @param secure: direct browser to send the cookie on encrypted
- connections only
- @type secure: L{bool}
- @param httpOnly: direct browser not to expose cookies through channels
- other than HTTP (and HTTPS) requests
- @type httpOnly: L{bool}
- @param sameSite: One of L{None} (default), C{'lax'} or C{'strict'}.
- Direct browsers not to send this cookie on cross-origin requests.
- Please see:
- U{https://tools.ietf.org/html/draft-west-first-party-cookies-07}
- @type sameSite: L{None}, L{bytes} or L{str}
- @raise ValueError: If the value for C{sameSite} is not supported.
- """
- def _ensureBytes(val):
- """
- Ensure that C{val} is bytes, encoding using UTF-8 if
- needed.
- @param val: L{bytes} or L{str}
- @return: L{bytes}
- """
- if val is None:
- # It's None, so we don't want to touch it
- return val
- if isinstance(val, bytes):
- return val
- else:
- return val.encode("utf8")
- def _sanitize(val):
- r"""
- Replace linear whitespace (C{\r}, C{\n}, C{\r\n}) and
- semicolons C{;} in C{val} with a single space.
- @param val: L{bytes}
- @return: L{bytes}
- """
- return _sanitizeLinearWhitespace(val).replace(b";", b" ")
- cookie = _sanitize(_ensureBytes(k)) + b"=" + _sanitize(_ensureBytes(v))
- if expires is not None:
- cookie = cookie + b"; Expires=" + _sanitize(_ensureBytes(expires))
- if domain is not None:
- cookie = cookie + b"; Domain=" + _sanitize(_ensureBytes(domain))
- if path is not None:
- cookie = cookie + b"; Path=" + _sanitize(_ensureBytes(path))
- if max_age is not None:
- cookie = cookie + b"; Max-Age=" + _sanitize(_ensureBytes(max_age))
- if comment is not None:
- cookie = cookie + b"; Comment=" + _sanitize(_ensureBytes(comment))
- if secure:
- cookie = cookie + b"; Secure"
- if httpOnly:
- cookie = cookie + b"; HttpOnly"
- if sameSite:
- sameSite = _ensureBytes(sameSite).lower()
- if sameSite not in [b"lax", b"strict"]:
- raise ValueError("Invalid value for sameSite: " + repr(sameSite))
- cookie += b"; SameSite=" + sameSite
- self.cookies.append(cookie)
- def setResponseCode(self, code: int, message: Optional[bytes] = None) -> None:
- """
- Set the HTTP response code.
- @type code: L{int}
- @type message: L{bytes}
- """
- self.code = code
- if message is not None:
- self.code_message = message
- else:
- self.code_message = RESPONSES.get(code, b"Unknown Status")
- def setHeader(self, name, value):
- """
- Set an HTTP response header. Overrides any previously set values for
- this header.
- @type name: L{bytes} or L{str}
- @param name: The name of the header for which to set the value.
- @type value: L{bytes} or L{str}
- @param value: The value to set for the named header. A L{str} will be
- UTF-8 encoded, which may not interoperable with other
- implementations. Avoid passing non-ASCII characters if possible.
- """
- self.responseHeaders.setRawHeaders(name, [value])
- def redirect(self, url):
- """
- Utility function that does a redirect.
- Set the response code to L{FOUND} and the I{Location} header to the
- given URL.
- The request should have C{finish()} called after this.
- @param url: I{Location} header value.
- @type url: L{bytes} or L{str}
- """
- self.setResponseCode(FOUND)
- self.setHeader(b"location", url)
- def setLastModified(self, when):
- """
- Set the C{Last-Modified} time for the response to this request.
- If I am called more than once, I ignore attempts to set
- Last-Modified earlier, only replacing the Last-Modified time
- if it is to a later value.
- If I am a conditional request, I may modify my response code
- to L{NOT_MODIFIED} if appropriate for the time given.
- @param when: The last time the resource being returned was
- modified, in seconds since the epoch.
- @type when: number
- @return: If I am a I{If-Modified-Since} conditional request and
- the time given is not newer than the condition, I return
- L{http.CACHED<CACHED>} to indicate that you should write no
- body. Otherwise, I return a false value.
- """
- # time.time() may be a float, but the HTTP-date strings are
- # only good for whole seconds.
- when = int(math.ceil(when))
- if (not self.lastModified) or (self.lastModified < when):
- self.lastModified = when
- modifiedSince = self.getHeader(b"if-modified-since")
- if modifiedSince:
- firstPart = modifiedSince.split(b";", 1)[0]
- try:
- modifiedSince = stringToDatetime(firstPart)
- except ValueError:
- return None
- if modifiedSince >= self.lastModified:
- self.setResponseCode(NOT_MODIFIED)
- return CACHED
- return None
- def setETag(self, etag):
- """
- Set an C{entity tag} for the outgoing response.
- That's \"entity tag\" as in the HTTP/1.1 C{ETag} header, \"used
- for comparing two or more entities from the same requested
- resource.\"
- If I am a conditional request, I may modify my response code
- to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate
- for the tag given.
- @param etag: The entity tag for the resource being returned.
- @type etag: string
- @return: If I am a C{If-None-Match} conditional request and
- the tag matches one in the request, I return
- L{http.CACHED<CACHED>} to indicate that you should write
- no body. Otherwise, I return a false value.
- """
- if etag:
- self.etag = etag
- tags = self.getHeader(b"if-none-match")
- if tags:
- tags = tags.split()
- if (etag in tags) or (b"*" in tags):
- self.setResponseCode(
- ((self.method in (b"HEAD", b"GET")) and NOT_MODIFIED)
- or PRECONDITION_FAILED
- )
- return CACHED
- return None
- def getAllHeaders(self):
- """
- Return dictionary mapping the names of all received headers to the last
- value received for each.
- Since this method does not return all header information,
- C{self.requestHeaders.getAllRawHeaders()} may be preferred.
- """
- headers = {}
- for k, v in self.requestHeaders.getAllRawHeaders():
- headers[k.lower()] = v[-1]
- return headers
- def getRequestHostname(self):
- """
- Get the hostname that the HTTP client passed in to the request.
- @see: L{IRequest.getRequestHostname}
- @returns: the requested hostname
- @rtype: C{bytes}
- """
- host = self.getHeader(b"host")
- if host is not None:
- match = _hostHeaderExpression.match(host)
- if match is not None:
- return match.group("host")
- return networkString(self.getHost().host)
- def getHost(self):
- """
- Get my originally requesting transport's host.
- Don't rely on the 'transport' attribute, since Request objects may be
- copied remotely. For information on this method's return value, see
- L{twisted.internet.tcp.Port}.
- """
- return self.host
- def setHost(self, host, port, ssl=0):
- """
- Change the host and port the request thinks it's using.
- This method is useful for working with reverse HTTP proxies (e.g.
- both Squid and Apache's mod_proxy can do this), when the address
- the HTTP client is using is different than the one we're listening on.
- For example, Apache may be listening on https://www.example.com/, and
- then forwarding requests to http://localhost:8080/, but we don't want
- HTML produced by Twisted to say b'http://localhost:8080/', they should
- say b'https://www.example.com/', so we do::
- request.setHost(b'www.example.com', 443, ssl=1)
- @type host: C{bytes}
- @param host: The value to which to change the host header.
- @type ssl: C{bool}
- @param ssl: A flag which, if C{True}, indicates that the request is
- considered secure (if C{True}, L{isSecure} will return C{True}).
- """
- self._forceSSL = ssl # set first so isSecure will work
- if self.isSecure():
- default = 443
- else:
- default = 80
- if port == default:
- hostHeader = host
- else:
- hostHeader = b"%b:%d" % (host, port)
- self.requestHeaders.setRawHeaders(b"host", [hostHeader])
- self.host = address.IPv4Address("TCP", host, port)
- @deprecated(Version("Twisted", 18, 4, 0), replacement="getClientAddress")
- def getClientIP(self):
- """
- Return the IP address of the client who submitted this request.
- This method is B{deprecated}. Use L{getClientAddress} instead.
- @returns: the client IP address
- @rtype: C{str}
- """
- if isinstance(self.client, (address.IPv4Address, address.IPv6Address)):
- return self.client.host
- else:
- return None
- def getClientAddress(self):
- """
- Return the address of the client who submitted this request.
- This may not be a network address (e.g., a server listening on
- a UNIX domain socket will cause this to return
- L{UNIXAddress}). Callers must check the type of the returned
- address.
- @since: 18.4
- @return: the client's address.
- @rtype: L{IAddress}
- """
- return self.client
- def isSecure(self):
- """
- Return L{True} if this request is using a secure transport.
- Normally this method returns L{True} if this request's L{HTTPChannel}
- instance is using a transport that implements
- L{interfaces.ISSLTransport}.
- This will also return L{True} if L{Request.setHost} has been called
- with C{ssl=True}.
- @returns: L{True} if this request is secure
- @rtype: C{bool}
- """
- if self._forceSSL:
- return True
- channel = getattr(self, "channel", None)
- if channel is None:
- return False
- return channel.isSecure()
- def _authorize(self):
- # Authorization, (mostly) per the RFC
- try:
- authh = self.getHeader(b"Authorization")
- if not authh:
- self.user = self.password = b""
- return
- bas, upw = authh.split()
- if bas.lower() != b"basic":
- raise ValueError()
- upw = base64.b64decode(upw)
- self.user, self.password = upw.split(b":", 1)
- except (binascii.Error, ValueError):
- self.user = self.password = b""
- except BaseException:
- self._log.failure("")
- self.user = self.password = b""
- def getUser(self):
- """
- Return the HTTP user sent with this request, if any.
- If no user was supplied, return the empty string.
- @returns: the HTTP user, if any
- @rtype: C{bytes}
- """
- try:
- return self.user
- except BaseException:
- pass
- self._authorize()
- return self.user
- def getPassword(self):
- """
- Return the HTTP password sent with this request, if any.
- If no password was supplied, return the empty string.
- @returns: the HTTP password, if any
- @rtype: C{bytes}
- """
- try:
- return self.password
- except BaseException:
- pass
- self._authorize()
- return self.password
- def connectionLost(self, reason):
- """
- There is no longer a connection for this request to respond over.
- Clean up anything which can't be useful anymore.
- """
- self._disconnected = True
- self.channel = None
- if self.content is not None:
- self.content.close()
- for d in self.notifications:
- d.errback(reason)
- self.notifications = []
- def loseConnection(self):
- """
- Pass the loseConnection through to the underlying channel.
- """
- if self.channel is not None:
- self.channel.loseConnection()
- def __eq__(self, other: object) -> bool:
- """
- Determines if two requests are the same object.
- @param other: Another object whose identity will be compared
- to this instance's.
- @return: L{True} when the two are the same object and L{False}
- when not.
- @rtype: L{bool}
- """
- # When other is not an instance of request, return
- # NotImplemented so that Python uses other.__eq__ to perform
- # the comparison. This ensures that a Request proxy generated
- # by proxyForInterface compares equal to an actual Request
- # instanceby turning request != proxy into proxy != request.
- if isinstance(other, Request):
- return self is other
- return NotImplemented
- def __hash__(self):
- """
- A C{Request} is hashable so that it can be used as a mapping key.
- @return: A C{int} based on the instance's identity.
- """
- return id(self)
- class _DataLoss(Exception):
- """
- L{_DataLoss} indicates that not all of a message body was received. This
- is only one of several possible exceptions which may indicate that data
- was lost. Because of this, it should not be checked for by
- specifically; any unexpected exception should be treated as having
- caused data loss.
- """
- class PotentialDataLoss(Exception):
- """
- L{PotentialDataLoss} may be raised by a transfer encoding decoder's
- C{noMoreData} method to indicate that it cannot be determined if the
- entire response body has been delivered. This only occurs when making
- requests to HTTP servers which do not set I{Content-Length} or a
- I{Transfer-Encoding} in the response because in this case the end of the
- response is indicated by the connection being closed, an event which may
- also be due to a transient network problem or other error.
- """
- class _MalformedChunkedDataError(Exception):
- """
- C{_ChunkedTransferDecoder} raises L{_MalformedChunkedDataError} from its
- C{dataReceived} method when it encounters malformed data. This exception
- indicates a client-side error. If this exception is raised, the connection
- should be dropped with a 400 error.
- """
- class _IdentityTransferDecoder:
- """
- Protocol for accumulating bytes up to a specified length. This handles the
- case where no I{Transfer-Encoding} is specified.
- @ivar contentLength: Counter keeping track of how many more bytes there are
- to receive.
- @ivar dataCallback: A one-argument callable which will be invoked each
- time application data is received.
- @ivar finishCallback: A one-argument callable which will be invoked when
- the terminal chunk is received. It will be invoked with all bytes
- which were delivered to this protocol which came after the terminal
- chunk.
- """
- def __init__(self, contentLength, dataCallback, finishCallback):
- self.contentLength = contentLength
- self.dataCallback = dataCallback
- self.finishCallback = finishCallback
- def dataReceived(self, data):
- """
- Interpret the next chunk of bytes received. Either deliver them to the
- data callback or invoke the finish callback if enough bytes have been
- received.
- @raise RuntimeError: If the finish callback has already been invoked
- during a previous call to this methood.
- """
- if self.dataCallback is None:
- raise RuntimeError(
- "_IdentityTransferDecoder cannot decode data after finishing"
- )
- if self.contentLength is None:
- self.dataCallback(data)
- elif len(data) < self.contentLength:
- self.contentLength -= len(data)
- self.dataCallback(data)
- else:
- # Make the state consistent before invoking any code belonging to
- # anyone else in case noMoreData ends up being called beneath this
- # stack frame.
- contentLength = self.contentLength
- dataCallback = self.dataCallback
- finishCallback = self.finishCallback
- self.dataCallback = self.finishCallback = None
- self.contentLength = 0
- dataCallback(data[:contentLength])
- finishCallback(data[contentLength:])
- def noMoreData(self):
- """
- All data which will be delivered to this decoder has been. Check to
- make sure as much data as was expected has been received.
- @raise PotentialDataLoss: If the content length is unknown.
- @raise _DataLoss: If the content length is known and fewer than that
- many bytes have been delivered.
- @return: L{None}
- """
- finishCallback = self.finishCallback
- self.dataCallback = self.finishCallback = None
- if self.contentLength is None:
- finishCallback(b"")
- raise PotentialDataLoss()
- elif self.contentLength != 0:
- raise _DataLoss()
- maxChunkSizeLineLength = 1024
- _chunkExtChars = (
- b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
- b"abcdefghijklmnopqrstuvwxyz{|}~"
- b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
- )
- """
- Characters that are valid in a chunk extension.
- See RFC 7230 section 4.1.1::
- chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
- chunk-ext-name = token
- chunk-ext-val = token / quoted-string
- And section 3.2.6::
- token = 1*tchar
- tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
- / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
- / DIGIT / ALPHA
- ; any VCHAR, except delimiters
- quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
- qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
- obs-text = %x80-FF
- We don't check if chunk extensions are well-formed beyond validating that they
- don't contain characters outside this range.
- """
- class _ChunkedTransferDecoder:
- """
- Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230,
- section 4.1. This protocol can interpret the contents of a request or
- response body which uses the I{chunked} Transfer-Encoding. It cannot
- interpret any of the rest of the HTTP protocol.
- It may make sense for _ChunkedTransferDecoder to be an actual IProtocol
- implementation. Currently, the only user of this class will only ever
- call dataReceived on it. However, it might be an improvement if the
- user could connect this to a transport and deliver connection lost
- notification. This way, `dataCallback` becomes `self.transport.write`
- and perhaps `finishCallback` becomes `self.transport.loseConnection()`
- (although I'm not sure where the extra data goes in that case). This
- could also allow this object to indicate to the receiver of data that
- the stream was not completely received, an error case which should be
- noticed. -exarkun
- @ivar dataCallback: A one-argument callable which will be invoked each
- time application data is received. This callback is not reentrant.
- @ivar finishCallback: A one-argument callable which will be invoked when
- the terminal chunk is received. It will be invoked with all bytes
- which were delivered to this protocol which came after the terminal
- chunk.
- @ivar length: Counter keeping track of how many more bytes in a chunk there
- are to receive.
- @ivar state: One of C{'CHUNK_LENGTH'}, C{'CRLF'}, C{'TRAILER'},
- C{'BODY'}, or C{'FINISHED'}. For C{'CHUNK_LENGTH'}, data for the
- chunk length line is currently being read. For C{'CRLF'}, the CR LF
- pair which follows each chunk is being read. For C{'TRAILER'}, the CR
- LF pair which follows the terminal 0-length chunk is currently being
- read. For C{'BODY'}, the contents of a chunk are being read. For
- C{'FINISHED'}, the last chunk has been completely read and no more
- input is valid.
- @ivar _buffer: Accumulated received data for the current state. At each
- state transition this is truncated at the front so that index 0 is
- where the next state shall begin.
- @ivar _start: While in the C{'CHUNK_LENGTH'} and C{'TRAILER'} states,
- tracks the index into the buffer at which search for CRLF should resume.
- Resuming the search at this position avoids doing quadratic work if the
- chunk length line arrives over many calls to C{dataReceived}.
- @ivar _trailerHeaders: Accumulates raw/unparsed trailer headers.
- See https://github.com/twisted/twisted/issues/12014
- @ivar _maxTrailerHeadersSize: Maximum bytes for trailer header from the
- response.
- @type _maxTrailerHeadersSize: C{int}
- @ivar _receivedTrailerHeadersSize: Bytes received so far for the tailer headers.
- @type _receivedTrailerHeadersSize: C{int}
- """
- state = "CHUNK_LENGTH"
- def __init__(
- self,
- dataCallback: Callable[[bytes], None],
- finishCallback: Callable[[bytes], None],
- ) -> None:
- self.dataCallback = dataCallback
- self.finishCallback = finishCallback
- self._buffer = bytearray()
- self._start = 0
- self._trailerHeaders: List[bytearray] = []
- self._maxTrailerHeadersSize = 2**16
- self._receivedTrailerHeadersSize = 0
- def _dataReceived_CHUNK_LENGTH(self) -> bool:
- """
- Read the chunk size line, ignoring any extensions.
- @returns: C{True} once the line has been read and removed from
- C{self._buffer}. C{False} when more data is required.
- @raises _MalformedChunkedDataError: when the chunk size cannot be
- decoded or the length of the line exceeds L{maxChunkSizeLineLength}.
- """
- eolIndex = self._buffer.find(b"\r\n", self._start)
- if eolIndex >= maxChunkSizeLineLength or (
- eolIndex == -1 and len(self._buffer) > maxChunkSizeLineLength
- ):
- raise _MalformedChunkedDataError(
- "Chunk size line exceeds maximum of {} bytes.".format(
- maxChunkSizeLineLength
- )
- )
- if eolIndex == -1:
- # Restart the search upon receipt of more data at the start of the
- # new data, minus one in case the last character of the buffer is
- # CR.
- self._start = len(self._buffer) - 1
- return False
- endOfLengthIndex = self._buffer.find(b";", 0, eolIndex)
- if endOfLengthIndex == -1:
- endOfLengthIndex = eolIndex
- rawLength = self._buffer[0:endOfLengthIndex]
- try:
- length = _hexint(rawLength)
- except ValueError:
- raise _MalformedChunkedDataError("Chunk-size must be an integer.")
- ext = self._buffer[endOfLengthIndex + 1 : eolIndex]
- if ext and ext.translate(None, _chunkExtChars) != b"":
- raise _MalformedChunkedDataError(
- f"Invalid characters in chunk extensions: {ext!r}."
- )
- if length == 0:
- self.state = "TRAILER"
- else:
- self.state = "BODY"
- self.length = length
- del self._buffer[0 : eolIndex + 2]
- self._start = 0
- return True
- def _dataReceived_CRLF(self) -> bool:
- """
- Await the carriage return and line feed characters that are the end of
- chunk marker that follow the chunk data.
- @returns: C{True} when the CRLF have been read, otherwise C{False}.
- @raises _MalformedChunkedDataError: when anything other than CRLF are
- received.
- """
- if len(self._buffer) < 2:
- return False
- if not self._buffer.startswith(b"\r\n"):
- raise _MalformedChunkedDataError("Chunk did not end with CRLF")
- self.state = "CHUNK_LENGTH"
- del self._buffer[0:2]
- return True
- def _dataReceived_TRAILER(self) -> bool:
- """
- Collect trailer headers if received and finish at the terminal zero-length
- chunk. Then invoke C{finishCallback} and switch to state C{'FINISHED'}.
- @returns: C{False}, as there is either insufficient data to continue,
- or no data remains.
- """
- eolIndex = self._buffer.find(b"\r\n", self._start)
- if eolIndex == -1:
- # Still no end of network line marker found.
- #
- # Check if we've run up against the trailer size limit: if the next
- # read contains the terminating CRLF then we'll have this many bytes
- # of trailers (including the CRLFs).
- minTrailerSize = (
- self._receivedTrailerHeadersSize
- + len(self._buffer)
- + (1 if self._buffer.endswith(b"\r") else 2)
- )
- if minTrailerSize > self._maxTrailerHeadersSize:
- raise _MalformedChunkedDataError("Trailer headers data is too long.")
- # Continue processing more data.
- return False
- if eolIndex > 0:
- # A trailer header was detected.
- self._trailerHeaders.append(self._buffer[0:eolIndex])
- del self._buffer[0 : eolIndex + 2]
- self._start = 0
- self._receivedTrailerHeadersSize += eolIndex + 2
- if self._receivedTrailerHeadersSize > self._maxTrailerHeadersSize:
- raise _MalformedChunkedDataError("Trailer headers data is too long.")
- return True
- # eolIndex in this part of code is equal to 0
- data = memoryview(self._buffer)[2:].tobytes()
- del self._buffer[:]
- self.state = "FINISHED"
- self.finishCallback(data)
- return False
- def _dataReceived_BODY(self) -> bool:
- """
- Deliver any available chunk data to the C{dataCallback}. When all the
- remaining data for the chunk arrives, switch to state C{'CRLF'}.
- @returns: C{True} to continue processing of any buffered data.
- """
- if len(self._buffer) >= self.length:
- chunk = memoryview(self._buffer)[: self.length].tobytes()
- del self._buffer[: self.length]
- self.state = "CRLF"
- self.dataCallback(chunk)
- else:
- chunk = bytes(self._buffer)
- self.length -= len(chunk)
- del self._buffer[:]
- self.dataCallback(chunk)
- return True
- def _dataReceived_FINISHED(self) -> bool:
- """
- Once C{finishCallback} has been invoked receipt of additional data
- raises L{RuntimeError} because it represents a programming error in
- the caller.
- """
- raise RuntimeError(
- "_ChunkedTransferDecoder.dataReceived called after last "
- "chunk was processed"
- )
- def dataReceived(self, data: bytes) -> None:
- """
- Interpret data from a request or response body which uses the
- I{chunked} Transfer-Encoding.
- """
- self._buffer += data
- goOn = True
- while goOn and self._buffer:
- goOn = getattr(self, "_dataReceived_" + self.state)()
- def noMoreData(self) -> None:
- """
- Verify that all data has been received. If it has not been, raise
- L{_DataLoss}.
- """
- if self.state != "FINISHED":
- raise _DataLoss(
- "Chunked decoder in %r state, still expecting more data to "
- "get to 'FINISHED' state." % (self.state,)
- )
- @implementer(interfaces.IPushProducer)
- class _NoPushProducer:
- """
- A no-op version of L{interfaces.IPushProducer}, used to abstract over the
- possibility that a L{HTTPChannel} transport does not provide
- L{IPushProducer}.
- """
- def pauseProducing(self):
- """
- Pause producing data.
- Tells a producer that it has produced too much data to process for
- the time being, and to stop until resumeProducing() is called.
- """
- def resumeProducing(self):
- """
- Resume producing data.
- This tells a producer to re-add itself to the main loop and produce
- more data for its consumer.
- """
- def registerProducer(self, producer, streaming):
- """
- Register to receive data from a producer.
- @param producer: The producer to register.
- @param streaming: Whether this is a streaming producer or not.
- """
- def unregisterProducer(self):
- """
- Stop consuming data from a producer, without disconnecting.
- """
- def stopProducing(self):
- """
- IProducer.stopProducing
- """
- @implementer(interfaces.ITransport, interfaces.IPushProducer, interfaces.IConsumer)
- class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
- """
- A receiver for HTTP requests.
- The L{HTTPChannel} provides L{interfaces.ITransport} and
- L{interfaces.IConsumer} to the L{Request} objects it creates. It also
- implements L{interfaces.IPushProducer} to C{self.transport}, allowing the
- transport to pause it.
- @ivar MAX_LENGTH: Maximum length for initial request line and each line
- from the header.
- @ivar _transferDecoder: L{None} or a decoder instance if the request body
- uses the I{chunked} Transfer-Encoding.
- @type _transferDecoder: L{_ChunkedTransferDecoder}
- @ivar maxHeaders: Maximum number of headers allowed per request.
- @type maxHeaders: C{int}
- @ivar totalHeadersSize: Maximum bytes for request line plus all headers
- from the request.
- @type totalHeadersSize: C{int}
- @ivar _receivedHeaderSize: Bytes received so far for the header.
- @type _receivedHeaderSize: C{int}
- @ivar _handlingRequest: Whether a request is currently being processed.
- @type _handlingRequest: L{bool}
- @ivar _dataBuffer: Any data that has been received from the connection
- while processing an outstanding request.
- @type _dataBuffer: L{list} of L{bytes}
- @ivar _networkProducer: Either the transport, if it provides
- L{interfaces.IPushProducer}, or a null implementation of
- L{interfaces.IPushProducer}. Used to attempt to prevent the transport
- from producing excess data when we're responding to a request.
- @type _networkProducer: L{interfaces.IPushProducer}
- @ivar _requestProducer: If the L{Request} object or anything it calls
- registers itself as an L{interfaces.IProducer}, it will be stored here.
- This is used to create a producing pipeline: pause/resume producing
- methods will be propagated from the C{transport}, through the
- L{HTTPChannel} instance, to the c{_requestProducer}.
- The reason we proxy through the producing methods rather than the old
- behaviour (where we literally just set the L{Request} object as the
- producer on the transport) is because we want to be able to exert
- backpressure on the client to prevent it from sending in arbitrarily
- many requests without ever reading responses. Essentially, if the
- client never reads our responses we will eventually stop reading its
- requests.
- @type _requestProducer: L{interfaces.IPushProducer}
- @ivar _requestProducerStreaming: A boolean that tracks whether the producer
- on the L{Request} side of this channel has registered itself as a
- L{interfaces.IPushProducer} or an L{interfaces.IPullProducer}.
- @type _requestProducerStreaming: L{bool} or L{None}
- @ivar _waitingForTransport: A boolean that tracks whether the transport has
- asked us to stop producing. This is used to keep track of what we're
- waiting for: if the transport has asked us to stop producing then we
- don't want to unpause the transport until it asks us to produce again.
- @type _waitingForTransport: L{bool}
- @ivar abortTimeout: The number of seconds to wait after we attempt to shut
- the transport down cleanly to give up and forcibly terminate it. This
- is only used when we time a connection out, to prevent errors causing
- the FD to get leaked. If this is L{None}, we will wait forever.
- @type abortTimeout: L{int}
- @ivar _abortingCall: The L{twisted.internet.base.DelayedCall} that will be
- used to forcibly close the transport if it doesn't close cleanly.
- @type _abortingCall: L{twisted.internet.base.DelayedCall}
- @ivar _optimisticEagerReadSize: When a resource takes a long time to answer
- a request (via L{twisted.web.server.NOT_DONE_YET}, hopefully one day by
- a L{Deferred}), we would like to be able to let that resource know
- about the underlying transport disappearing as promptly as possible,
- via L{Request.notifyFinish}, and therefore via
- C{self.requests[...].connectionLost()} on this L{HTTPChannel}.
- However, in order to simplify application logic, we implement
- head-of-line blocking, and do not relay pipelined requests to the
- application until the previous request has been answered. This means
- that said application cannot dispose of any entity-body that comes in
- from those subsequent requests, which may be arbitrarily large, and it
- may need to be buffered in memory.
- To implement this tradeoff between prompt notification when possible
- (in the most frequent case of non-pipelined requests) and correct
- behavior when not (say, if a client sends a very long-running GET
- request followed by a PUT request with a very large body) we will
- continue reading pipelined requests into C{self._dataBuffer} up to a
- given limit.
- C{_optimisticEagerReadSize} is the number of bytes we will accept from
- the client and buffer before pausing the transport.
- This behavior has been in place since Twisted 17.9.0 .
- @type _optimisticEagerReadSize: L{int}
- """
- maxHeaders = 500
- totalHeadersSize = 16384
- abortTimeout = 15
- length = 0
- persistent = 1
- __header = b""
- __first_line = 1
- __content = None
- # set in instances or subclasses
- requestFactory = Request
- _savedTimeOut = None
- _receivedHeaderCount = 0
- _receivedHeaderSize = 0
- _requestProducer = None
- _requestProducerStreaming = None
- _waitingForTransport = False
- _abortingCall = None
- _optimisticEagerReadSize = 0x4000
- _log = Logger()
- def __init__(self):
- # the request queue
- self.requests = []
- self._handlingRequest = False
- self._dataBuffer = []
- self._transferDecoder = None
- def connectionMade(self):
- self.setTimeout(self.timeOut)
- self._networkProducer = interfaces.IPushProducer(
- self.transport, _NoPushProducer()
- )
- self._networkProducer.registerProducer(self, True)
- def dataReceived(self, data):
- self.resetTimeout()
- basic.LineReceiver.dataReceived(self, data)
- def lineReceived(self, line):
- """
- Called for each line from request until the end of headers when
- it enters binary mode.
- """
- self._receivedHeaderSize += len(line)
- if self._receivedHeaderSize > self.totalHeadersSize:
- self._respondToBadRequestAndDisconnect()
- return
- if self.__first_line:
- # if this connection is not persistent, drop any data which
- # the client (illegally) sent after the last request.
- if not self.persistent:
- self.dataReceived = self.lineReceived = lambda *args: None
- return
- # IE sends an extraneous empty line (\r\n) after a POST request;
- # eat up such a line, but only ONCE
- if not line and self.__first_line == 1:
- self.__first_line = 2
- return
- # create a new Request object
- if INonQueuedRequestFactory.providedBy(self.requestFactory):
- request = self.requestFactory(self)
- else:
- request = self.requestFactory(self, len(self.requests))
- self.requests.append(request)
- self.__first_line = 0
- try:
- command, request, version = _parseRequestLine(line)
- except ValueError:
- self._respondToBadRequestAndDisconnect()
- return
- self._command = command
- self._path = request
- self._version = version
- elif line == b"":
- # End of headers.
- if self.__header:
- ok = self.headerReceived(self.__header)
- # If the last header we got is invalid, we MUST NOT proceed
- # with processing. We'll have sent a 400 anyway, so just stop.
- if not ok:
- return
- self.__header = b""
- self.allHeadersReceived()
- if self.length == 0:
- self.allContentReceived()
- else:
- self.setRawMode()
- elif line[0] in b" \t":
- # Continuation of a multi line header.
- self.__header += b" " + line.lstrip(b" \t")
- # Regular header line.
- # Processing of header line is delayed to allow accumulating multi
- # line headers.
- else:
- if self.__header:
- self.headerReceived(self.__header)
- self.__header = line
- def _finishRequestBody(self, data):
- self._dataBuffer.append(data)
- self.allContentReceived()
- def _maybeChooseTransferDecoder(self, header, data):
- """
- If the provided header is C{content-length} or
- C{transfer-encoding}, choose the appropriate decoder if any.
- Returns L{True} if the request can proceed and L{False} if not.
- """
- def fail():
- self._respondToBadRequestAndDisconnect()
- self.length = None
- return False
- # Can this header determine the length?
- if header == b"content-length":
- if not data.isdigit():
- return fail()
- try:
- length = int(data)
- except ValueError:
- return fail()
- newTransferDecoder = _IdentityTransferDecoder(
- length, self.requests[-1].handleContentChunk, self._finishRequestBody
- )
- elif header == b"transfer-encoding":
- # XXX Rather poorly tested code block, apparently only exercised by
- # test_chunkedEncoding
- if data.lower() == b"chunked":
- length = None
- newTransferDecoder = _ChunkedTransferDecoder(
- self.requests[-1].handleContentChunk, self._finishRequestBody
- )
- elif data.lower() == b"identity":
- return True
- else:
- return fail()
- else:
- # It's not a length related header, so exit
- return True
- if self._transferDecoder is not None:
- return fail()
- else:
- self.length = length
- self._transferDecoder = newTransferDecoder
- return True
- def headerReceived(self, line):
- """
- Do pre-processing (for content-length) and store this header away.
- Enforce the per-request header limit.
- @type line: C{bytes}
- @param line: A line from the header section of a request, excluding the
- line delimiter.
- @return: A flag indicating whether the header was valid.
- @rtype: L{bool}
- """
- try:
- header, data = line.split(b":", 1)
- except ValueError:
- self._respondToBadRequestAndDisconnect()
- return False
- # Header names must be tokens, per RFC 9110 section 5.1.
- if not _istoken(header):
- self._respondToBadRequestAndDisconnect()
- return False
- header = header.lower()
- data = data.strip(b" \t")
- if not self._maybeChooseTransferDecoder(header, data):
- return False
- self.requests[-1].requestHeaders.addRawHeader(header, data)
- self._receivedHeaderCount += 1
- if self._receivedHeaderCount > self.maxHeaders:
- self._respondToBadRequestAndDisconnect()
- return False
- return True
- def allContentReceived(self):
- command = self._command
- path = self._path
- version = self._version
- # reset ALL state variables, so we don't interfere with next request
- self.length = 0
- self._receivedHeaderCount = 0
- self._receivedHeaderSize = 0
- self.__first_line = 1
- self._transferDecoder = None
- del self._command, self._path, self._version
- # Disable the idle timeout, in case this request takes a long
- # time to finish generating output.
- if self.timeOut:
- self._savedTimeOut = self.setTimeout(None)
- self._handlingRequest = True
- # We go into raw mode here even though we will be receiving lines next
- # in the protocol; however, this data will be buffered and then passed
- # back to line mode in the setLineMode call in requestDone.
- self.setRawMode()
- req = self.requests[-1]
- req.requestReceived(command, path, version)
- def rawDataReceived(self, data: bytes) -> None:
- """
- This is called when this HTTP/1.1 parser is in raw mode rather than
- line mode.
- It may be in raw mode for one of two reasons:
- 1. All the headers of a request have been received and this
- L{HTTPChannel} is currently receiving its body.
- 2. The full content of a request has been received and is currently
- being processed asynchronously, and this L{HTTPChannel} is
- buffering the data of all subsequent requests to be parsed
- later.
- In the second state, the data will be played back later.
- @note: This isn't really a public API, and should be invoked only by
- L{LineReceiver}'s line parsing logic. If you wish to drive an
- L{HTTPChannel} from a custom data source, call C{dataReceived} on
- it directly.
- @see: L{LineReceive.rawDataReceived}
- """
- if self._handlingRequest:
- self._dataBuffer.append(data)
- if (
- sum(map(len, self._dataBuffer)) > self._optimisticEagerReadSize
- ) and not self._waitingForTransport:
- # If we received more data than a small limit while processing
- # the head-of-line request, apply TCP backpressure to our peer
- # to get them to stop sending more request data until we're
- # ready. See docstring for _optimisticEagerReadSize above.
- self._networkProducer.pauseProducing()
- return
- try:
- self._transferDecoder.dataReceived(data)
- except _MalformedChunkedDataError:
- self._respondToBadRequestAndDisconnect()
- def allHeadersReceived(self):
- req = self.requests[-1]
- req.parseCookies()
- self.persistent = self.checkPersistence(req, self._version)
- req.gotLength(self.length)
- # Handle 'Expect: 100-continue' with automated 100 response code,
- # a simplistic implementation of RFC 2686 8.2.3:
- expectContinue = req.requestHeaders.getRawHeaders(b"expect")
- if (
- expectContinue
- and expectContinue[0].lower() == b"100-continue"
- and self._version == b"HTTP/1.1"
- ):
- self._send100Continue()
- def checkPersistence(self, request, version):
- """
- Check if the channel should close or not.
- @param request: The request most recently received over this channel
- against which checks will be made to determine if this connection
- can remain open after a matching response is returned.
- @type version: C{bytes}
- @param version: The version of the request.
- @rtype: C{bool}
- @return: A flag which, if C{True}, indicates that this connection may
- remain open to receive another request; if C{False}, the connection
- must be closed in order to indicate the completion of the response
- to C{request}.
- """
- connection = request.requestHeaders.getRawHeaders(b"connection")
- if connection:
- tokens = [t.lower() for t in connection[0].split(b" ")]
- else:
- tokens = []
- # Once any HTTP 0.9 or HTTP 1.0 request is received, the connection is
- # no longer allowed to be persistent. At this point in processing the
- # request, we don't yet know if it will be possible to set a
- # Content-Length in the response. If it is not, then the connection
- # will have to be closed to end an HTTP 0.9 or HTTP 1.0 response.
- # If the checkPersistence call happened later, after the Content-Length
- # has been determined (or determined not to be set), it would probably
- # be possible to have persistent connections with HTTP 0.9 and HTTP 1.0.
- # This may not be worth the effort, though. Just use HTTP 1.1, okay?
- if version == b"HTTP/1.1":
- if b"close" in tokens:
- request.responseHeaders.setRawHeaders(b"connection", [b"close"])
- return False
- else:
- return True
- else:
- return False
- def requestDone(self, request):
- """
- Called by first request in queue when it is done.
- """
- if request != self.requests[0]:
- raise TypeError
- del self.requests[0]
- # We should only resume the producer if we're not waiting for the
- # transport.
- if not self._waitingForTransport:
- self._networkProducer.resumeProducing()
- if self.persistent:
- self._handlingRequest = False
- if self._savedTimeOut:
- self.setTimeout(self._savedTimeOut)
- # Receive our buffered data, if any.
- data = b"".join(self._dataBuffer)
- self._dataBuffer = []
- self.setLineMode(data)
- else:
- self.loseConnection()
- def timeoutConnection(self):
- self._log.info("Timing out client: {peer}", peer=str(self.transport.getPeer()))
- if self.abortTimeout is not None:
- # We use self.callLater because that's what TimeoutMixin does.
- self._abortingCall = self.callLater(
- self.abortTimeout, self.forceAbortClient
- )
- self.loseConnection()
- def forceAbortClient(self):
- """
- Called if C{abortTimeout} seconds have passed since the timeout fired,
- and the connection still hasn't gone away. This can really only happen
- on extremely bad connections or when clients are maliciously attempting
- to keep connections open.
- """
- self._log.info(
- "Forcibly timing out client: {peer}", peer=str(self.transport.getPeer())
- )
- # We want to lose track of the _abortingCall so that no-one tries to
- # cancel it.
- self._abortingCall = None
- self.transport.abortConnection()
- def connectionLost(self, reason):
- self.setTimeout(None)
- for request in self.requests:
- request.connectionLost(reason)
- # If we were going to force-close the transport, we don't have to now.
- if self._abortingCall is not None:
- self._abortingCall.cancel()
- self._abortingCall = None
- def isSecure(self):
- """
- Return L{True} if this channel is using a secure transport.
- Normally this method returns L{True} if this instance is using a
- transport that implements L{interfaces.ISSLTransport}.
- @returns: L{True} if this request is secure
- @rtype: C{bool}
- """
- if interfaces.ISSLTransport(self.transport, None) is not None:
- return True
- return False
- def writeHeaders(self, version, code, reason, headers):
- """Called by L{Request} objects to write a complete set of HTTP headers to
- a transport.
- @param version: The HTTP version in use.
- @type version: L{bytes}
- @param code: The HTTP status code to write.
- @type code: L{bytes}
- @param reason: The HTTP reason phrase to write.
- @type reason: L{bytes}
- @param headers: The headers to write to the transport.
- @type headers: L{twisted.web.http_headers.Headers}, or (for backwards
- compatibility purposes only) any iterable of two-tuples of
- L{bytes}, representing header names and header values. The latter
- option is not actually used by Twisted.
- """
- if not isinstance(headers, Headers):
- # Turn into Headers instance for security reasons, to make sure we
- # quite and sanitize everything. This variant should be removed
- # eventually, it's only here for backwards compatibility.
- sanitizedHeaders = Headers()
- for name, value in headers:
- sanitizedHeaders.addRawHeader(name, value)
- headers = sanitizedHeaders
- headerSequence = [version, b" ", code, b" ", reason, b"\r\n"]
- for name, values in headers.getAllRawHeaders():
- for value in values:
- headerSequence.extend((name, b": ", value, b"\r\n"))
- headerSequence.append(b"\r\n")
- self.transport.writeSequence(headerSequence)
- def write(self, data):
- """
- Called by L{Request} objects to write response data.
- @param data: The data chunk to write to the stream.
- @type data: L{bytes}
- @return: L{None}
- """
- self.transport.write(data)
- def writeSequence(self, iovec):
- """
- Write a list of strings to the HTTP response.
- @param iovec: A list of byte strings to write to the stream.
- @type iovec: L{list} of L{bytes}
- @return: L{None}
- """
- self.transport.writeSequence(iovec)
- def getPeer(self):
- """
- Get the remote address of this connection.
- @return: An L{IAddress} provider.
- """
- return self.transport.getPeer()
- def getHost(self):
- """
- Get the local address of this connection.
- @return: An L{IAddress} provider.
- """
- return self.transport.getHost()
- def loseConnection(self):
- """
- Closes the connection. Will write any data that is pending to be sent
- on the network, but if this response has not yet been written to the
- network will not write anything.
- @return: L{None}
- """
- self._networkProducer.unregisterProducer()
- return self.transport.loseConnection()
- def registerProducer(self, producer, streaming):
- """
- Register to receive data from a producer.
- This sets self to be a consumer for a producer. When this object runs
- out of data (as when a send(2) call on a socket succeeds in moving the
- last data from a userspace buffer into a kernelspace buffer), it will
- ask the producer to resumeProducing().
- For L{IPullProducer} providers, C{resumeProducing} will be called once
- each time data is required.
- For L{IPushProducer} providers, C{pauseProducing} will be called
- whenever the write buffer fills up and C{resumeProducing} will only be
- called when it empties.
- @type producer: L{IProducer} provider
- @param producer: The L{IProducer} that will be producing data.
- @type streaming: L{bool}
- @param streaming: C{True} if C{producer} provides L{IPushProducer},
- C{False} if C{producer} provides L{IPullProducer}.
- @raise RuntimeError: If a producer is already registered.
- @return: L{None}
- """
- if self._requestProducer is not None:
- raise RuntimeError(
- "Cannot register producer %s, because producer %s was never "
- "unregistered." % (producer, self._requestProducer)
- )
- if not streaming:
- producer = _PullToPush(producer, self)
- self._requestProducer = producer
- self._requestProducerStreaming = streaming
- if not streaming:
- producer.startStreaming()
- def unregisterProducer(self):
- """
- Stop consuming data from a producer, without disconnecting.
- @return: L{None}
- """
- if self._requestProducer is None:
- return
- if not self._requestProducerStreaming:
- self._requestProducer.stopStreaming()
- self._requestProducer = None
- self._requestProducerStreaming = None
- def stopProducing(self):
- """
- Stop producing data.
- The HTTPChannel doesn't *actually* implement this, beacuse the
- assumption is that it will only be called just before C{loseConnection}
- is called. There's nothing sensible we can do other than call
- C{loseConnection} anyway.
- """
- if self._requestProducer is not None:
- self._requestProducer.stopProducing()
- def pauseProducing(self):
- """
- Pause producing data.
- This will be called by the transport when the send buffers have been
- filled up. We want to simultaneously pause the producing L{Request}
- object and also pause our transport.
- The logic behind pausing the transport is specifically to avoid issues
- like https://twistedmatrix.com/trac/ticket/8868. In this case, our
- inability to send does not prevent us handling more requests, which
- means we increasingly queue up more responses in our send buffer
- without end. The easiest way to handle this is to ensure that if we are
- unable to send our responses, we will not read further data from the
- connection until the client pulls some data out. This is a bit of a
- blunt instrument, but it's ok.
- Note that this potentially interacts with timeout handling in a
- positive way. Once the transport is paused the client may run into a
- timeout which will cause us to tear the connection down. That's a good
- thing!
- """
- self._waitingForTransport = True
- # The first step is to tell any producer we might currently have
- # registered to stop producing. If we can slow our applications down
- # we should.
- if self._requestProducer is not None:
- self._requestProducer.pauseProducing()
- # The next step here is to pause our own transport, as discussed in the
- # docstring.
- if not self._handlingRequest:
- self._networkProducer.pauseProducing()
- def resumeProducing(self):
- """
- Resume producing data.
- This will be called by the transport when the send buffer has dropped
- enough to actually send more data. When this happens we can unpause any
- outstanding L{Request} producers we have, and also unpause our
- transport.
- """
- self._waitingForTransport = False
- if self._requestProducer is not None:
- self._requestProducer.resumeProducing()
- # We only want to resume the network producer if we're not currently
- # waiting for a response to show up.
- if not self._handlingRequest:
- self._networkProducer.resumeProducing()
- def _send100Continue(self):
- """
- Sends a 100 Continue response, used to signal to clients that further
- processing will be performed.
- """
- self.transport.write(b"HTTP/1.1 100 Continue\r\n\r\n")
- def _respondToBadRequestAndDisconnect(self):
- """
- This is a quick and dirty way of responding to bad requests.
- As described by HTTP standard we should be patient and accept the
- whole request from the client before sending a polite bad request
- response, even in the case when clients send tons of data.
- """
- self.transport.write(b"HTTP/1.1 400 Bad Request\r\n\r\n")
- self.loseConnection()
- def _escape(s):
- """
- Return a string like python repr, but always escaped as if surrounding
- quotes were double quotes.
- @param s: The string to escape.
- @type s: L{bytes} or L{str}
- @return: An escaped string.
- @rtype: L{str}
- """
- if not isinstance(s, bytes):
- s = s.encode("ascii")
- r = repr(s)
- if not isinstance(r, str):
- r = r.decode("ascii")
- if r.startswith("b"):
- r = r[1:]
- if r.startswith("'"):
- return r[1:-1].replace('"', '\\"').replace("\\'", "'")
- return r[1:-1]
- @provider(IAccessLogFormatter)
- def combinedLogFormatter(timestamp, request):
- """
- @return: A combined log formatted log line for the given request.
- @see: L{IAccessLogFormatter}
- """
- clientAddr = request.getClientAddress()
- if isinstance(
- clientAddr, (address.IPv4Address, address.IPv6Address, _XForwardedForAddress)
- ):
- ip = clientAddr.host
- else:
- ip = b"-"
- referrer = _escape(request.getHeader(b"referer") or b"-")
- agent = _escape(request.getHeader(b"user-agent") or b"-")
- line = (
- '"%(ip)s" - - %(timestamp)s "%(method)s %(uri)s %(protocol)s" '
- '%(code)d %(length)s "%(referrer)s" "%(agent)s"'
- % dict(
- ip=_escape(ip),
- timestamp=timestamp,
- method=_escape(request.method),
- uri=_escape(request.uri),
- protocol=_escape(request.clientproto),
- code=request.code,
- length=request.sentLength or "-",
- referrer=referrer,
- agent=agent,
- )
- )
- return line
- @implementer(interfaces.IAddress)
- class _XForwardedForAddress:
- """
- L{IAddress} which represents the client IP to log for a request, as gleaned
- from an X-Forwarded-For header.
- @ivar host: An IP address or C{b"-"}.
- @type host: L{bytes}
- @see: L{proxiedLogFormatter}
- """
- def __init__(self, host):
- self.host = host
- class _XForwardedForRequest(proxyForInterface(IRequest, "_request")): # type: ignore[misc]
- """
- Add a layer on top of another request that only uses the value of an
- X-Forwarded-For header as the result of C{getClientAddress}.
- """
- def getClientAddress(self):
- """
- The client address (the first address) in the value of the
- I{X-Forwarded-For header}. If the header is not present, the IP is
- considered to be C{b"-"}.
- @return: L{_XForwardedForAddress} which wraps the client address as
- expected by L{combinedLogFormatter}.
- """
- host = (
- self._request.requestHeaders.getRawHeaders(b"x-forwarded-for", [b"-"])[0]
- .split(b",")[0]
- .strip()
- )
- return _XForwardedForAddress(host)
- # These are missing from the interface. Forward them manually.
- @property
- def clientproto(self):
- """
- @return: The protocol version in the request.
- @rtype: L{bytes}
- """
- return self._request.clientproto
- @property
- def code(self):
- """
- @return: The response code for the request.
- @rtype: L{int}
- """
- return self._request.code
- @property
- def sentLength(self):
- """
- @return: The number of bytes sent in the response body.
- @rtype: L{int}
- """
- return self._request.sentLength
- @provider(IAccessLogFormatter)
- def proxiedLogFormatter(timestamp, request):
- """
- @return: A combined log formatted log line for the given request but use
- the value of the I{X-Forwarded-For} header as the value for the client
- IP address.
- @see: L{IAccessLogFormatter}
- """
- return combinedLogFormatter(timestamp, _XForwardedForRequest(request))
- class _GenericHTTPChannelProtocol(proxyForInterface(IProtocol, "_channel")): # type: ignore[misc]
- """
- A proxy object that wraps one of the HTTP protocol objects, and switches
- between them depending on TLS negotiated protocol.
- @ivar _negotiatedProtocol: The protocol negotiated with ALPN or NPN, if
- any.
- @type _negotiatedProtocol: Either a bytestring containing the ALPN token
- for the negotiated protocol, or L{None} if no protocol has yet been
- negotiated.
- @ivar _channel: The object capable of behaving like a L{HTTPChannel} that
- is backing this object. By default this is a L{HTTPChannel}, but if a
- HTTP protocol upgrade takes place this may be a different channel
- object. Must implement L{IProtocol}.
- @type _channel: L{HTTPChannel}
- @ivar _requestFactory: A callable to use to build L{IRequest} objects.
- @type _requestFactory: L{IRequest}
- @ivar _site: A reference to the creating L{twisted.web.server.Site} object.
- @type _site: L{twisted.web.server.Site}
- @ivar _factory: A reference to the creating L{HTTPFactory} object.
- @type _factory: L{HTTPFactory}
- @ivar _timeOut: A timeout value to pass to the backing channel.
- @type _timeOut: L{int} or L{None}
- @ivar _callLater: A value for the C{callLater} callback.
- @type _callLater: L{callable}
- """
- _negotiatedProtocol = None
- _requestFactory = Request
- _factory = None
- _site = None
- _timeOut = None
- _callLater = None
- @property
- def factory(self):
- """
- @see: L{_genericHTTPChannelProtocolFactory}
- """
- return self._channel.factory
- @factory.setter
- def factory(self, value):
- self._factory = value
- self._channel.factory = value
- @property
- def requestFactory(self):
- """
- A callable to use to build L{IRequest} objects.
- Retries the object from the current backing channel.
- """
- return self._channel.requestFactory
- @requestFactory.setter
- def requestFactory(self, value):
- """
- A callable to use to build L{IRequest} objects.
- Sets the object on the backing channel and also stores the value for
- propagation to any new channel.
- @param value: The new callable to use.
- @type value: A L{callable} returning L{IRequest}
- """
- self._requestFactory = value
- self._channel.requestFactory = value
- @property
- def site(self):
- """
- A reference to the creating L{twisted.web.server.Site} object.
- Returns the site object from the backing channel.
- """
- return self._channel.site
- @site.setter
- def site(self, value):
- """
- A reference to the creating L{twisted.web.server.Site} object.
- Sets the object on the backing channel and also stores the value for
- propagation to any new channel.
- @param value: The L{twisted.web.server.Site} object to set.
- @type value: L{twisted.web.server.Site}
- """
- self._site = value
- self._channel.site = value
- @property
- def timeOut(self):
- """
- The idle timeout for the backing channel.
- """
- return self._channel.timeOut
- @timeOut.setter
- def timeOut(self, value):
- """
- The idle timeout for the backing channel.
- Sets the idle timeout on both the backing channel and stores it for
- propagation to any new backing channel.
- @param value: The timeout to set.
- @type value: L{int} or L{float}
- """
- self._timeOut = value
- self._channel.timeOut = value
- @property
- def callLater(self):
- """
- A value for the C{callLater} callback. This callback is used by the
- L{twisted.protocols.policies.TimeoutMixin} to handle timeouts.
- """
- return self._channel.callLater
- @callLater.setter
- def callLater(self, value):
- """
- Sets the value for the C{callLater} callback. This callback is used by
- the L{twisted.protocols.policies.TimeoutMixin} to handle timeouts.
- @param value: The new callback to use.
- @type value: L{callable}
- """
- self._callLater = value
- self._channel.callLater = value
- def dataReceived(self, data):
- """
- An override of L{IProtocol.dataReceived} that checks what protocol we're
- using.
- """
- if self._negotiatedProtocol is None:
- negotiatedProtocol = getattr(
- self._channel.transport, "negotiatedProtocol", b"http/1.1"
- )
- if negotiatedProtocol is None:
- negotiatedProtocol = b"http/1.1"
- if negotiatedProtocol == b"h2":
- if not H2_ENABLED:
- raise ValueError("Negotiated HTTP/2 without support.")
- # We need to make sure that the HTTPChannel is unregistered
- # from the transport so that the H2Connection can register
- # itself if possible.
- networkProducer = self._channel._networkProducer
- networkProducer.unregisterProducer()
- # Cancel the old channel's timeout.
- self._channel.setTimeout(None)
- transport = self._channel.transport
- self._channel = H2Connection()
- self._channel.requestFactory = self._requestFactory
- self._channel.site = self._site
- self._channel.factory = self._factory
- self._channel.timeOut = self._timeOut
- self._channel.callLater = self._callLater
- self._channel.makeConnection(transport)
- # Register the H2Connection as the transport's
- # producer, so that the transport can apply back
- # pressure.
- networkProducer.registerProducer(self._channel, True)
- else:
- # Only HTTP/2 and HTTP/1.1 are supported right now.
- assert (
- negotiatedProtocol == b"http/1.1"
- ), "Unsupported protocol negotiated"
- self._negotiatedProtocol = negotiatedProtocol
- return self._channel.dataReceived(data)
- def _genericHTTPChannelProtocolFactory(self):
- """
- Returns an appropriately initialized _GenericHTTPChannelProtocol.
- """
- return _GenericHTTPChannelProtocol(HTTPChannel())
- class _MinimalLogFile(TypingProtocol):
- def write(self, data: str, /) -> object:
- """
- Write some data.
- """
- def close(self) -> None:
- """
- Close the file.
- """
- value: type[_MinimalLogFile] = TextIOWrapper
- class HTTPFactory(protocol.ServerFactory):
- """
- Factory for HTTP server.
- @ivar _logDateTime: A cached datetime string for log messages, updated by
- C{_logDateTimeCall}.
- @type _logDateTime: C{str}
- @ivar _logDateTimeCall: A delayed call for the next update to the cached
- log datetime string.
- @type _logDateTimeCall: L{IDelayedCall} provided
- @ivar _logFormatter: See the C{logFormatter} parameter to L{__init__}
- @ivar _nativeize: A flag that indicates whether the log file being written
- to wants native strings (C{True}) or bytes (C{False}). This is only to
- support writing to L{twisted.python.log} which, unfortunately, works
- with native strings.
- @ivar reactor: An L{IReactorTime} provider used to manage connection
- timeouts and compute logging timestamps.
- """
- # We need to ignore the mypy error here, because
- # _genericHTTPChannelProtocolFactory is a callable which returns a proxy
- # to a Protocol, instead of a concrete Protocol object, as expected in
- # the protocol.Factory interface
- protocol = _genericHTTPChannelProtocolFactory # type: ignore[assignment]
- logPath = None
- _logFile: _MinimalLogFile | None = None
- timeOut: int | float | None = _REQUEST_TIMEOUT
- def __init__(
- self,
- logPath: str | bytes | None = None,
- timeout: int | float = _REQUEST_TIMEOUT,
- logFormatter: IAccessLogFormatter | None = None,
- reactor: IReactorTime | None = None,
- ):
- """
- @param logPath: File path to which access log messages will be written
- or C{None} to disable logging.
- @type logPath: L{str} or L{bytes}
- @param timeout: The initial value of L{timeOut}, which defines the idle
- connection timeout in seconds, or C{None} to disable the idle
- timeout.
- @type timeout: L{float}
- @param logFormatter: An object to format requests into log lines for
- the access log. L{combinedLogFormatter} when C{None} is passed.
- @type logFormatter: L{IAccessLogFormatter} provider
- @param reactor: An L{IReactorTime} provider used to manage connection
- timeouts and compute logging timestamps. Defaults to the global
- reactor.
- """
- if reactor is None:
- from twisted.internet import reactor # type:ignore[assignment]
- self.reactor: IReactorTime = reactor # type:ignore[assignment]
- if logPath is not None:
- logPath = os.path.abspath(logPath)
- self.logPath = logPath
- self.timeOut = timeout
- if logFormatter is None:
- logFormatter = combinedLogFormatter
- self._logFormatter = logFormatter
- # For storing the cached log datetime and the callback to update it
- self._logDateTime: str | None = None
- self._logDateTimeCall: IDelayedCall | None = None
- logFile = property()
- """
- A file (object with C{write(data: str)} and C{close()} methods) that will
- be used for logging HTTP requests and responses in the standard U{Combined
- Log Format <https://en.wikipedia.org/wiki/Common_Log_Format>} .
- @note: for backwards compatibility purposes, this may be I{set} to an
- object with a C{write(data: bytes)} method, but these will be detected
- (by checking if it's an instance of L{BufferedIOBase}) and replaced
- with a L{TextIOWrapper} when retrieved by getting the attribute again.
- """
- @logFile.getter
- def _get_logFile(self) -> _MinimalLogFile:
- if self._logFile is None:
- raise AttributeError("no log file present")
- return self._logFile
- @_get_logFile.setter
- def _set_logFile(self, newLogFile: BufferedIOBase | _MinimalLogFile) -> None:
- if isinstance(newLogFile, BufferedIOBase):
- newLogFile = TextIOWrapper(
- newLogFile, # type:ignore[arg-type]
- "utf-8",
- write_through=True,
- newline="\n",
- )
- self._logFile = newLogFile
- logFile = _set_logFile
- def _updateLogDateTime(self) -> None:
- """
- Update log datetime periodically, so we aren't always recalculating it.
- """
- self._logDateTime = datetimeToLogString(self.reactor.seconds())
- self._logDateTimeCall = self.reactor.callLater(1, self._updateLogDateTime)
- def buildProtocol(self, addr: IAddress) -> Protocol | None:
- p = protocol.ServerFactory.buildProtocol(self, addr)
- # This is a bit of a hack to ensure that the HTTPChannel timeouts
- # occur on the same reactor as the one we're using here. This could
- # ideally be resolved by passing the reactor more generally to the
- # HTTPChannel, but that won't work for the TimeoutMixin until we fix
- # https://twistedmatrix.com/trac/ticket/8488
- p.callLater = self.reactor.callLater # type:ignore[union-attr]
- # timeOut needs to be on the Protocol instance cause
- # TimeoutMixin expects it there
- p.timeOut = self.timeOut # type:ignore[union-attr]
- return p
- def startFactory(self) -> None:
- """
- Set up request logging if necessary.
- """
- if self._logDateTimeCall is None:
- self._updateLogDateTime()
- self._logFile = self._openLogFile(self.logPath) if self.logPath else log.logfile
- def stopFactory(self) -> None:
- if self._logFile is not None:
- if self._logFile != log.logfile:
- self._logFile.close()
- self._logFile = None
- if self._logDateTimeCall is not None and self._logDateTimeCall.active():
- self._logDateTimeCall.cancel()
- self._logDateTimeCall = None
- def _openLogFile(self, path: str | bytes) -> _MinimalLogFile:
- """
- Override in subclasses, e.g. to use L{twisted.python.logfile}.
- """
- return open(path, "a", 1, newline="\n")
- def log(self, request: Request) -> None:
- """
- Write a line representing C{request} to the access log file.
- @param request: The request object about which to log.
- """
- logFile = self._logFile
- if logFile is not None:
- line = self._logFormatter(self._logDateTime, request) + "\n"
- logFile.write(line)
|