strings.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. import locale
  2. import logging
  3. import six
  4. import sys
  5. import codecs
  6. import library.python.func
  7. logger = logging.getLogger(__name__)
  8. DEFAULT_ENCODING = 'utf-8'
  9. ENCODING_ERRORS_POLICY = 'replace'
  10. def left_strip(el, prefix):
  11. """
  12. Strips prefix at the left of el
  13. """
  14. if el.startswith(prefix):
  15. return el[len(prefix):]
  16. return el
  17. # Explicit to-text conversion
  18. # Chooses between str/unicode, i.e. six.binary_type/six.text_type
  19. def to_basestring(value):
  20. if isinstance(value, (six.binary_type, six.text_type)):
  21. return value
  22. try:
  23. if six.PY2:
  24. return unicode(value)
  25. else:
  26. return str(value)
  27. except UnicodeDecodeError:
  28. try:
  29. return str(value)
  30. except UnicodeEncodeError:
  31. return repr(value)
  32. to_text = to_basestring
  33. def to_unicode(value, from_enc=DEFAULT_ENCODING):
  34. if isinstance(value, six.text_type):
  35. return value
  36. if isinstance(value, six.binary_type):
  37. if six.PY2:
  38. return unicode(value, from_enc, ENCODING_ERRORS_POLICY)
  39. else:
  40. return value.decode(from_enc, errors=ENCODING_ERRORS_POLICY)
  41. return six.text_type(value)
  42. # Optional from_enc enables transcoding
  43. def to_str(value, to_enc=DEFAULT_ENCODING, from_enc=None):
  44. if isinstance(value, six.binary_type):
  45. if from_enc is None or to_enc == from_enc:
  46. # Unknown input encoding or input and output encoding are the same
  47. return value
  48. value = to_unicode(value, from_enc=from_enc)
  49. if isinstance(value, six.text_type):
  50. return value.encode(to_enc, ENCODING_ERRORS_POLICY)
  51. return six.binary_type(value)
  52. def _convert_deep(x, enc, convert, relaxed=True):
  53. if x is None:
  54. return None
  55. if isinstance(x, (six.text_type, six.binary_type)):
  56. return convert(x, enc)
  57. if isinstance(x, dict):
  58. return {convert(k, enc): _convert_deep(v, enc, convert, relaxed) for k, v in six.iteritems(x)}
  59. if isinstance(x, list):
  60. return [_convert_deep(e, enc, convert, relaxed) for e in x]
  61. if isinstance(x, tuple):
  62. return tuple([_convert_deep(e, enc, convert, relaxed) for e in x])
  63. if relaxed:
  64. return x
  65. raise TypeError('unsupported type')
  66. def unicodize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
  67. return _convert_deep(x, enc, to_unicode, relaxed)
  68. def stringize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
  69. return _convert_deep(x, enc, to_str, relaxed)
  70. @library.python.func.memoize()
  71. def locale_encoding():
  72. try:
  73. loc = locale.getdefaultlocale()[1]
  74. if loc:
  75. codecs.lookup(loc)
  76. return loc
  77. except LookupError as e:
  78. logger.debug('Cannot get system locale: %s', e)
  79. return None
  80. except ValueError as e:
  81. logger.warn('Cannot get system locale: %s', e)
  82. return None
  83. def fs_encoding():
  84. return sys.getfilesystemencoding()
  85. def guess_default_encoding():
  86. enc = locale_encoding()
  87. return enc if enc else DEFAULT_ENCODING
  88. @library.python.func.memoize()
  89. def get_stream_encoding(stream):
  90. if stream.encoding:
  91. try:
  92. codecs.lookup(stream.encoding)
  93. return stream.encoding
  94. except LookupError:
  95. pass
  96. return DEFAULT_ENCODING
  97. def encode(value, encoding=DEFAULT_ENCODING):
  98. if isinstance(value, six.binary_type):
  99. value = value.decode(encoding, errors='ignore')
  100. return value.encode(encoding)