__init__.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. """ Standard "encodings" Package
  2. Standard Python encoding modules are stored in this package
  3. directory.
  4. Codec modules must have names corresponding to normalized encoding
  5. names as defined in the normalize_encoding() function below, e.g.
  6. 'utf-8' must be implemented by the module 'utf_8.py'.
  7. Each codec module must export the following interface:
  8. * getregentry() -> codecs.CodecInfo object
  9. The getregentry() API must return a CodecInfo object with encoder, decoder,
  10. incrementalencoder, incrementaldecoder, streamwriter and streamreader
  11. attributes which adhere to the Python Codec Interface Standard.
  12. In addition, a module may optionally also define the following
  13. APIs which are then used by the package's codec search function:
  14. * getaliases() -> sequence of encoding name strings to use as aliases
  15. Alias names returned by getaliases() must be normalized encoding
  16. names as defined by normalize_encoding().
  17. Written by Marc-Andre Lemburg (mal@lemburg.com).
  18. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  19. """#"
  20. import codecs
  21. import sys
  22. from . import aliases
  23. _cache = {}
  24. _unknown = '--unknown--'
  25. _import_tail = ['*']
  26. _aliases = aliases.aliases
  27. class CodecRegistryError(LookupError, SystemError):
  28. pass
  29. def normalize_encoding(encoding):
  30. """ Normalize an encoding name.
  31. Normalization works as follows: all non-alphanumeric
  32. characters except the dot used for Python package names are
  33. collapsed and replaced with a single underscore, e.g. ' -;#'
  34. becomes '_'. Leading and trailing underscores are removed.
  35. Note that encoding names should be ASCII only.
  36. """
  37. if isinstance(encoding, bytes):
  38. encoding = str(encoding, "ascii")
  39. chars = []
  40. punct = False
  41. for c in encoding:
  42. if c.isalnum() or c == '.':
  43. if punct and chars:
  44. chars.append('_')
  45. if c.isascii():
  46. chars.append(c)
  47. punct = False
  48. else:
  49. punct = True
  50. return ''.join(chars)
  51. def search_function(encoding):
  52. # Cache lookup
  53. entry = _cache.get(encoding, _unknown)
  54. if entry is not _unknown:
  55. return entry
  56. # Import the module:
  57. #
  58. # First try to find an alias for the normalized encoding
  59. # name and lookup the module using the aliased name, then try to
  60. # lookup the module using the standard import scheme, i.e. first
  61. # try in the encodings package, then at top-level.
  62. #
  63. norm_encoding = normalize_encoding(encoding)
  64. aliased_encoding = _aliases.get(norm_encoding) or \
  65. _aliases.get(norm_encoding.replace('.', '_'))
  66. if aliased_encoding is not None:
  67. modnames = [aliased_encoding,
  68. norm_encoding]
  69. else:
  70. modnames = [norm_encoding]
  71. for modname in modnames:
  72. if not modname or '.' in modname:
  73. continue
  74. try:
  75. # Import is absolute to prevent the possibly malicious import of a
  76. # module with side-effects that is not in the 'encodings' package.
  77. mod = __import__('encodings.' + modname, fromlist=_import_tail,
  78. level=0)
  79. except ImportError:
  80. # ImportError may occur because 'encodings.(modname)' does not exist,
  81. # or because it imports a name that does not exist (see mbcs and oem)
  82. pass
  83. else:
  84. break
  85. else:
  86. mod = None
  87. try:
  88. getregentry = mod.getregentry
  89. except AttributeError:
  90. # Not a codec module
  91. mod = None
  92. if mod is None:
  93. # Cache misses
  94. _cache[encoding] = None
  95. return None
  96. # Now ask the module for the registry entry
  97. entry = getregentry()
  98. if not isinstance(entry, codecs.CodecInfo):
  99. if not 4 <= len(entry) <= 7:
  100. raise CodecRegistryError('module "%s" (%s) failed to register'
  101. % (mod.__name__, mod.__file__))
  102. if not callable(entry[0]) or not callable(entry[1]) or \
  103. (entry[2] is not None and not callable(entry[2])) or \
  104. (entry[3] is not None and not callable(entry[3])) or \
  105. (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
  106. (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
  107. raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
  108. % (mod.__name__, mod.__file__))
  109. if len(entry)<7 or entry[6] is None:
  110. entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
  111. entry = codecs.CodecInfo(*entry)
  112. # Cache the codec registry entry
  113. _cache[encoding] = entry
  114. # Register its aliases (without overwriting previously registered
  115. # aliases)
  116. try:
  117. codecaliases = mod.getaliases()
  118. except AttributeError:
  119. pass
  120. else:
  121. for alias in codecaliases:
  122. if alias not in _aliases:
  123. _aliases[alias] = modname
  124. # Return the registry entry
  125. return entry
  126. # Register the search_function in the Python codec registry
  127. codecs.register(search_function)
  128. if sys.platform == 'win32':
  129. # bpo-671666, bpo-46668: If Python does not implement a codec for current
  130. # Windows ANSI code page, use the "mbcs" codec instead:
  131. # WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP.
  132. # Python does not support custom code pages.
  133. def _alias_mbcs(encoding):
  134. try:
  135. import _winapi
  136. ansi_code_page = "cp%s" % _winapi.GetACP()
  137. if encoding == ansi_code_page:
  138. import encodings.mbcs
  139. return encodings.mbcs.getregentry()
  140. except ImportError:
  141. # Imports may fail while we are shutting down
  142. pass
  143. codecs.register(_alias_mbcs)