123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294 |
- # -*- test-case-name: twisted.web.test.test_http_headers -*-
- # Copyright (c) Twisted Matrix Laboratories.
- # See LICENSE for details.
- """
- An API for storing HTTP header names and values.
- """
- from __future__ import division, absolute_import
- from twisted.python.compat import comparable, cmp, unicode
- def _dashCapitalize(name):
- """
- Return a byte string which is capitalized using '-' as a word separator.
- @param name: The name of the header to capitalize.
- @type name: L{bytes}
- @return: The given header capitalized using '-' as a word separator.
- @rtype: L{bytes}
- """
- return b'-'.join([word.capitalize() for word in name.split(b'-')])
- def _sanitizeLinearWhitespace(headerComponent):
- r"""
- Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key
- or value with a single space. If C{headerComponent} is not
- L{bytes}, it is passed through unchanged.
- @param headerComponent: The header key or value to sanitize.
- @type headerComponent: L{bytes}
- @return: The sanitized header key or value.
- @rtype: L{bytes}
- """
- return b' '.join(headerComponent.splitlines())
- @comparable
- class Headers(object):
- """
- Stores HTTP headers in a key and multiple value format.
- Most methods accept L{bytes} and L{unicode}, with an internal L{bytes}
- representation. When passed L{unicode}, header names (e.g. 'Content-Type')
- are encoded using ISO-8859-1 and header values (e.g.
- 'text/html;charset=utf-8') are encoded using UTF-8. Some methods that return
- values will return them in the same type as the name given.
- If the header keys or values cannot be encoded or decoded using the rules
- above, using just L{bytes} arguments to the methods of this class will
- ensure no decoding or encoding is done, and L{Headers} will treat the keys
- and values as opaque byte strings.
- @cvar _caseMappings: A L{dict} that maps lowercase header names
- to their canonicalized representation.
- @ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of
- header values as L{bytes}.
- """
- _caseMappings = {
- b'content-md5': b'Content-MD5',
- b'dnt': b'DNT',
- b'etag': b'ETag',
- b'p3p': b'P3P',
- b'te': b'TE',
- b'www-authenticate': b'WWW-Authenticate',
- b'x-xss-protection': b'X-XSS-Protection'}
- def __init__(self, rawHeaders=None):
- self._rawHeaders = {}
- if rawHeaders is not None:
- for name, values in rawHeaders.items():
- self.setRawHeaders(name, values)
- def __repr__(self):
- """
- Return a string fully describing the headers set on this object.
- """
- return '%s(%r)' % (self.__class__.__name__, self._rawHeaders,)
- def __cmp__(self, other):
- """
- Define L{Headers} instances as being equal to each other if they have
- the same raw headers.
- """
- if isinstance(other, Headers):
- return cmp(
- sorted(self._rawHeaders.items()),
- sorted(other._rawHeaders.items()))
- return NotImplemented
- def _encodeName(self, name):
- """
- Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 encoded
- bytestring if required.
- @param name: A HTTP header name
- @type name: L{unicode} or L{bytes}
- @return: C{name}, encoded if required, lowercased
- @rtype: L{bytes}
- """
- if isinstance(name, unicode):
- return name.lower().encode('iso-8859-1')
- return name.lower()
- def _encodeValue(self, value):
- """
- Encode a single header value to a UTF-8 encoded bytestring if required.
- @param value: A single HTTP header value.
- @type value: L{bytes} or L{unicode}
- @return: C{value}, encoded if required
- @rtype: L{bytes}
- """
- if isinstance(value, unicode):
- return value.encode('utf8')
- return value
- def _encodeValues(self, values):
- """
- Encode a L{list} of header values to a L{list} of UTF-8 encoded
- bytestrings if required.
- @param values: A list of HTTP header values.
- @type values: L{list} of L{bytes} or L{unicode} (mixed types allowed)
- @return: C{values}, with each item encoded if required
- @rtype: L{list} of L{bytes}
- """
- newValues = []
- for value in values:
- newValues.append(self._encodeValue(value))
- return newValues
- def _decodeValues(self, values):
- """
- Decode a L{list} of header values into a L{list} of Unicode strings.
- @param values: A list of HTTP header values.
- @type values: L{list} of UTF-8 encoded L{bytes}
- @return: C{values}, with each item decoded
- @rtype: L{list} of L{unicode}
- """
- newValues = []
- for value in values:
- newValues.append(value.decode('utf8'))
- return newValues
- def copy(self):
- """
- Return a copy of itself with the same headers set.
- @return: A new L{Headers}
- """
- return self.__class__(self._rawHeaders)
- def hasHeader(self, name):
- """
- Check for the existence of a given header.
- @type name: L{bytes} or L{unicode}
- @param name: The name of the HTTP header to check for.
- @rtype: L{bool}
- @return: C{True} if the header exists, otherwise C{False}.
- """
- return self._encodeName(name) in self._rawHeaders
- def removeHeader(self, name):
- """
- Remove the named header from this header object.
- @type name: L{bytes} or L{unicode}
- @param name: The name of the HTTP header to remove.
- @return: L{None}
- """
- self._rawHeaders.pop(self._encodeName(name), None)
- def setRawHeaders(self, name, values):
- """
- Sets the raw representation of the given header.
- @type name: L{bytes} or L{unicode}
- @param name: The name of the HTTP header to set the values for.
- @type values: L{list} of L{bytes} or L{unicode} strings
- @param values: A list of strings each one being a header value of
- the given name.
- @return: L{None}
- """
- if not isinstance(values, list):
- raise TypeError("Header entry %r should be list but found "
- "instance of %r instead" % (name, type(values)))
- name = _sanitizeLinearWhitespace(self._encodeName(name))
- encodedValues = [_sanitizeLinearWhitespace(v)
- for v in self._encodeValues(values)]
- self._rawHeaders[name] = self._encodeValues(encodedValues)
- def addRawHeader(self, name, value):
- """
- Add a new raw value for the given header.
- @type name: L{bytes} or L{unicode}
- @param name: The name of the header for which to set the value.
- @type value: L{bytes} or L{unicode}
- @param value: The value to set for the named header.
- """
- values = self.getRawHeaders(name)
- if values is not None:
- values.append(value)
- else:
- values = [value]
- self.setRawHeaders(name, values)
- def getRawHeaders(self, name, default=None):
- """
- Returns a list of headers matching the given name as the raw string
- given.
- @type name: L{bytes} or L{unicode}
- @param name: The name of the HTTP header to get the values of.
- @param default: The value to return if no header with the given C{name}
- exists.
- @rtype: L{list} of strings, same type as C{name} (except when
- C{default} is returned).
- @return: If the named header is present, a L{list} of its
- values. Otherwise, C{default}.
- """
- encodedName = self._encodeName(name)
- values = self._rawHeaders.get(encodedName, default)
- if isinstance(name, unicode) and values is not default:
- return self._decodeValues(values)
- return values
- def getAllRawHeaders(self):
- """
- Return an iterator of key, value pairs of all headers contained in this
- object, as L{bytes}. The keys are capitalized in canonical
- capitalization.
- """
- for k, v in self._rawHeaders.items():
- yield self._canonicalNameCaps(k), v
- def _canonicalNameCaps(self, name):
- """
- Return the canonical name for the given header.
- @type name: L{bytes}
- @param name: The all-lowercase header name to capitalize in its
- canonical form.
- @rtype: L{bytes}
- @return: The canonical name of the header.
- """
- return self._caseMappings.get(name, _dashCapitalize(name))
- __all__ = ['Headers']
|