unicode.pxd 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. cdef extern from *:
  2. # Return true if the object o is a Unicode object or an instance
  3. # of a Unicode subtype. Changed in version 2.2: Allowed subtypes
  4. # to be accepted.
  5. bint PyUnicode_Check(object o)
  6. # Return true if the object o is a Unicode object, but not an
  7. # instance of a subtype. New in version 2.2.
  8. bint PyUnicode_CheckExact(object o)
  9. # Return the size of the object. o has to be a PyUnicodeObject
  10. # (not checked).
  11. #
  12. # Deprecated since version 3.3, will be removed in version 3.10:
  13. # Part of the old-style Unicode API, please migrate to using
  14. # PyUnicode_GET_LENGTH().
  15. Py_ssize_t PyUnicode_GET_SIZE(object o)
  16. # Return the length of the Unicode string, in code points. o has
  17. # to be a Unicode object in the “canonical” representation (not
  18. # checked).
  19. #
  20. # New in version 3.3.
  21. Py_ssize_t PyUnicode_GET_LENGTH(object o)
  22. # Return the size of the object's internal buffer in bytes. o has
  23. # to be a PyUnicodeObject (not checked).
  24. Py_ssize_t PyUnicode_GET_DATA_SIZE(object o)
  25. # Return a pointer to the internal Py_UNICODE buffer of the
  26. # object. o has to be a PyUnicodeObject (not checked).
  27. Py_UNICODE* PyUnicode_AS_UNICODE(object o)
  28. # Return a pointer to the internal buffer of the object. o has to
  29. # be a PyUnicodeObject (not checked).
  30. char* PyUnicode_AS_DATA(object o)
  31. # Return 1 or 0 depending on whether ch is a whitespace character.
  32. bint Py_UNICODE_ISSPACE(Py_UCS4 ch)
  33. # Return 1 or 0 depending on whether ch is a lowercase character.
  34. bint Py_UNICODE_ISLOWER(Py_UCS4 ch)
  35. # Return 1 or 0 depending on whether ch is an uppercase character.
  36. bint Py_UNICODE_ISUPPER(Py_UCS4 ch)
  37. # Return 1 or 0 depending on whether ch is a titlecase character.
  38. bint Py_UNICODE_ISTITLE(Py_UCS4 ch)
  39. # Return 1 or 0 depending on whether ch is a linebreak character.
  40. bint Py_UNICODE_ISLINEBREAK(Py_UCS4 ch)
  41. # Return 1 or 0 depending on whether ch is a decimal character.
  42. bint Py_UNICODE_ISDECIMAL(Py_UCS4 ch)
  43. # Return 1 or 0 depending on whether ch is a digit character.
  44. bint Py_UNICODE_ISDIGIT(Py_UCS4 ch)
  45. # Return 1 or 0 depending on whether ch is a numeric character.
  46. bint Py_UNICODE_ISNUMERIC(Py_UCS4 ch)
  47. # Return 1 or 0 depending on whether ch is an alphabetic character.
  48. bint Py_UNICODE_ISALPHA(Py_UCS4 ch)
  49. # Return 1 or 0 depending on whether ch is an alphanumeric character.
  50. bint Py_UNICODE_ISALNUM(Py_UCS4 ch)
  51. # Return the character ch converted to lower case.
  52. # Used to return a Py_UNICODE value before Py3.3.
  53. Py_UCS4 Py_UNICODE_TOLOWER(Py_UCS4 ch)
  54. # Return the character ch converted to upper case.
  55. # Used to return a Py_UNICODE value before Py3.3.
  56. Py_UCS4 Py_UNICODE_TOUPPER(Py_UCS4 ch)
  57. # Return the character ch converted to title case.
  58. # Used to return a Py_UNICODE value before Py3.3.
  59. Py_UCS4 Py_UNICODE_TOTITLE(Py_UCS4 ch)
  60. # Return the character ch converted to a decimal positive
  61. # integer. Return -1 if this is not possible. This macro does not
  62. # raise exceptions.
  63. int Py_UNICODE_TODECIMAL(Py_UCS4 ch)
  64. # Return the character ch converted to a single digit
  65. # integer. Return -1 if this is not possible. This macro does not
  66. # raise exceptions.
  67. int Py_UNICODE_TODIGIT(Py_UCS4 ch)
  68. # Return the character ch converted to a double. Return -1.0 if
  69. # this is not possible. This macro does not raise exceptions.
  70. double Py_UNICODE_TONUMERIC(Py_UCS4 ch)
  71. # To create Unicode objects and access their basic sequence
  72. # properties, use these APIs:
  73. # Create a Unicode Object from the Py_UNICODE buffer u of the
  74. # given size. u may be NULL which causes the contents to be
  75. # undefined. It is the user's responsibility to fill in the needed
  76. # data. The buffer is copied into the new object. If the buffer is
  77. # not NULL, the return value might be a shared object. Therefore,
  78. # modification of the resulting Unicode object is only allowed
  79. # when u is NULL.
  80. unicode PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size)
  81. # Create a Unicode Object from the given Unicode code point ordinal.
  82. #
  83. # The ordinal must be in range(0x10000) on narrow Python builds
  84. # (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError
  85. # is raised in case it is not.
  86. unicode PyUnicode_FromOrdinal(int ordinal)
  87. # Return a read-only pointer to the Unicode object's internal
  88. # Py_UNICODE buffer, NULL if unicode is not a Unicode object.
  89. Py_UNICODE* PyUnicode_AsUnicode(object o) except NULL
  90. # Return the length of the Unicode object.
  91. Py_ssize_t PyUnicode_GetSize(object o) except -1
  92. # Coerce an encoded object obj to an Unicode object and return a
  93. # reference with incremented refcount.
  94. # String and other char buffer compatible objects are decoded
  95. # according to the given encoding and using the error handling
  96. # defined by errors. Both can be NULL to have the interface use
  97. # the default values (see the next section for details).
  98. # All other objects, including Unicode objects, cause a TypeError
  99. # to be set.
  100. object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors)
  101. # Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict")
  102. # which is used throughout the interpreter whenever coercion to
  103. # Unicode is needed.
  104. object PyUnicode_FromObject(object obj)
  105. # If the platform supports wchar_t and provides a header file
  106. # wchar.h, Python can interface directly to this type using the
  107. # following functions. Support is optimized if Python's own
  108. # Py_UNICODE type is identical to the system's wchar_t.
  109. #ctypedef int wchar_t
  110. # Create a Unicode object from the wchar_t buffer w of the given
  111. # size. Return NULL on failure.
  112. #PyObject* PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size)
  113. #Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size)
  114. # Unicode Methods
  115. # Concat two strings giving a new Unicode string.
  116. # Return value: New reference.
  117. unicode PyUnicode_Concat(object left, object right)
  118. # Split a string giving a list of Unicode strings. If sep is NULL,
  119. # splitting will be done at all whitespace substrings. Otherwise,
  120. # splits occur at the given separator. At most maxsplit splits will
  121. # be done. If negative, no limit is set. Separators are not included
  122. # in the resulting list.
  123. # Return value: New reference.
  124. list PyUnicode_Split(object s, object sep, Py_ssize_t maxsplit)
  125. # Split a Unicode string at line breaks, returning a list of Unicode
  126. # strings. CRLF is considered to be one line break. If keepend is 0,
  127. # the Line break characters are not included in the resulting strings.
  128. # Return value: New reference.
  129. list PyUnicode_Splitlines(object s, bint keepend)
  130. # Translate a string by applying a character mapping table to it and
  131. # return the resulting Unicode object.
  132. #
  133. # The mapping table must map Unicode ordinal integers to Unicode ordinal
  134. # integers or None (causing deletion of the character).
  135. #
  136. # Mapping tables need only provide the __getitem__() interface;
  137. # dictionaries and sequences work well. Unmapped character ordinals (ones
  138. # which cause a LookupError) are left untouched and are copied as-is.
  139. #
  140. # errors has the usual meaning for codecs. It may be NULL which indicates
  141. # to use the default error handling.
  142. # Return value: New reference.
  143. unicode PyUnicode_Translate(object str, object table, const char *errors)
  144. # Join a sequence of strings using the given separator and return the
  145. # resulting Unicode string.
  146. # Return value: New reference.
  147. unicode PyUnicode_Join(object separator, object seq)
  148. # Return 1 if substr matches str[start:end] at the given tail end
  149. # (direction == -1 means to do a prefix match, direction == 1 a
  150. # suffix match), 0 otherwise.
  151. # Return -1 if an error occurred.
  152. Py_ssize_t PyUnicode_Tailmatch(object str, object substr,
  153. Py_ssize_t start, Py_ssize_t end, int direction) except -1
  154. # Return the first position of substr in str[start:end] using the given
  155. # direction (direction == 1 means to do a forward search, direction == -1
  156. # a backward search). The return value is the index of the first match;
  157. # a value of -1 indicates that no match was found, and -2 indicates that an
  158. # error occurred and an exception has been set.
  159. Py_ssize_t PyUnicode_Find(object str, object substr, Py_ssize_t start, Py_ssize_t end, int direction) except -2
  160. # Return the first position of the character ch in str[start:end] using
  161. # the given direction (direction == 1 means to do a forward search,
  162. # direction == -1 a backward search). The return value is the index of
  163. # the first match; a value of -1 indicates that no match was found, and
  164. # -2 indicates that an error occurred and an exception has been set.
  165. # New in version 3.3.
  166. Py_ssize_t PyUnicode_FindChar(object str, Py_UCS4 ch, Py_ssize_t start, Py_ssize_t end, int direction) except -2
  167. # Return the number of non-overlapping occurrences of substr in
  168. # str[start:end]. Return -1 if an error occurred.
  169. Py_ssize_t PyUnicode_Count(object str, object substr, Py_ssize_t start, Py_ssize_t end) except -1
  170. # Replace at most maxcount occurrences of substr in str with replstr and
  171. # return the resulting Unicode object. maxcount == -1 means replace all
  172. # occurrences.
  173. # Return value: New reference.
  174. unicode PyUnicode_Replace(object str, object substr, object replstr, Py_ssize_t maxcount)
  175. # Compare two strings and return -1, 0, 1 for less than,
  176. # equal, and greater than, respectively.
  177. int PyUnicode_Compare(object left, object right) except? -1
  178. # Compare a unicode object, uni, with string and return -1, 0, 1 for less than,
  179. # equal, and greater than, respectively. It is best to pass only ASCII-encoded
  180. # strings, but the function interprets the input string as ISO-8859-1 if it
  181. # contains non-ASCII characters.
  182. int PyUnicode_CompareWithASCIIString(object uni, const char *string)
  183. # Rich compare two unicode strings and return one of the following:
  184. #
  185. # NULL in case an exception was raised
  186. # Py_True or Py_False for successful comparisons
  187. # Py_NotImplemented in case the type combination is unknown
  188. #
  189. # Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in case
  190. # the conversion of the arguments to Unicode fails with a UnicodeDecodeError.
  191. #
  192. # Possible values for op are Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, and Py_LE.
  193. object PyUnicode_RichCompare(object left, object right, int op)
  194. # Return a new string object from format and args; this is analogous to
  195. # format % args.
  196. # Return value: New reference.
  197. unicode PyUnicode_Format(object format, object args)
  198. # Check whether element is contained in container and return true or false
  199. # accordingly.
  200. #
  201. # element has to coerce to a one element Unicode string. -1 is returned
  202. # if there was an error.
  203. int PyUnicode_Contains(object container, object element) except -1
  204. # Intern the argument *string in place. The argument must be the address
  205. # of a pointer variable pointing to a Python unicode string object. If
  206. # there is an existing interned string that is the same as *string, it sets
  207. # *string to it (decrementing the reference count of the old string object
  208. # and incrementing the reference count of the interned string object),
  209. # otherwise it leaves *string alone and interns it (incrementing its reference
  210. # count). (Clarification: even though there is a lot of talk about reference
  211. # counts, think of this function as reference-count-neutral; you own the object
  212. # after the call if and only if you owned it before the call.)
  213. #void PyUnicode_InternInPlace(PyObject **string)
  214. # A combination of PyUnicode_FromString() and PyUnicode_InternInPlace(),
  215. # returning either a new unicode string object that has been interned, or
  216. # a new ("owned") reference to an earlier interned string object with the
  217. # same value.
  218. unicode PyUnicode_InternFromString(const char *v)
  219. # Codecs
  220. # Create a Unicode object by decoding size bytes of the encoded
  221. # string s. encoding and errors have the same meaning as the
  222. # parameters of the same name in the unicode() builtin
  223. # function. The codec to be used is looked up using the Python
  224. # codec registry. Return NULL if an exception was raised by the
  225. # codec.
  226. object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *errors)
  227. # Encode the Py_UNICODE buffer of the given size and return a
  228. # Python string object. encoding and errors have the same meaning
  229. # as the parameters of the same name in the Unicode encode()
  230. # method. The codec to be used is looked up using the Python codec
  231. # registry. Return NULL if an exception was raised by the codec.
  232. object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size,
  233. char *encoding, char *errors)
  234. # Encode a Unicode object and return the result as Python string
  235. # object. encoding and errors have the same meaning as the
  236. # parameters of the same name in the Unicode encode() method. The
  237. # codec to be used is looked up using the Python codec
  238. # registry. Return NULL if an exception was raised by the codec.
  239. object PyUnicode_AsEncodedString(object unicode, char *encoding, char *errors)
  240. # These are the UTF-8 codec APIs:
  241. # Create a Unicode object by decoding size bytes of the UTF-8
  242. # encoded string s. Return NULL if an exception was raised by the
  243. # codec.
  244. unicode PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
  245. # If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If
  246. # consumed is not NULL, trailing incomplete UTF-8 byte sequences
  247. # will not be treated as an error. Those bytes will not be decoded
  248. # and the number of bytes that have been decoded will be stored in
  249. # consumed. New in version 2.4.
  250. unicode PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
  251. # Encode the Py_UNICODE buffer of the given size using UTF-8 and
  252. # return a Python string object. Return NULL if an exception was
  253. # raised by the codec.
  254. bytes PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
  255. # Encode a Unicode objects using UTF-8 and return the result as Python bytes object. Error handling is ``strict''. Return NULL if an exception was raised by the codec.
  256. bytes PyUnicode_AsUTF8String(object unicode)
  257. # Return a pointer to the UTF-8 encoding of the Unicode object,
  258. # and store the size of the encoded representation (in bytes) in size.
  259. # The size argument can be NULL; in this case no size will be stored.
  260. # The returned buffer always has an extra null byte appended
  261. # (not included in size), regardless of whether there are any
  262. # other null code points.
  263. # In the case of an error, NULL is returned with an exception set and
  264. # no size is stored.
  265. # This caches the UTF-8 representation of the string in the Unicode
  266. # object, and subsequent calls will return a pointer to the same buffer.
  267. # The caller is not responsible for deallocating the buffer
  268. const char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size)
  269. # These are the UTF-16 codec APIs:
  270. # Decode length bytes from a UTF-16 encoded buffer string and
  271. # return the corresponding Unicode object. errors (if non-NULL)
  272. # defines the error handling. It defaults to ``strict''.
  273. #
  274. # If byteorder is non-NULL, the decoder starts decoding using the
  275. # given byte order:
  276. #
  277. # *byteorder == -1: little endian
  278. # *byteorder == 0: native order
  279. # *byteorder == 1: big endian
  280. #
  281. # and then switches if the first two bytes of the input data are a
  282. # byte order mark (BOM) and the specified byte order is native
  283. # order. This BOM is not copied into the resulting Unicode
  284. # string. After completion, *byteorder is set to the current byte
  285. # order at the.
  286. #
  287. # If byteorder is NULL, the codec starts in native order mode.
  288. unicode PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
  289. # If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
  290. # consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not
  291. # treat trailing incomplete UTF-16 byte sequences (such as an odd
  292. # number of bytes or a split surrogate pair) as an error. Those
  293. # bytes will not be decoded and the number of bytes that have been
  294. # decoded will be stored in consumed. New in version 2.4.
  295. unicode PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
  296. # Return a Python string object holding the UTF-16 encoded value
  297. # of the Unicode data in s. If byteorder is not 0, output is
  298. # written according to the following byte order:
  299. #
  300. # byteorder == -1: little endian
  301. # byteorder == 0: native byte order (writes a BOM mark)
  302. # byteorder == 1: big endian
  303. #
  304. # If byteorder is 0, the output string will always start with the
  305. # Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark
  306. # is prepended.
  307. #
  308. # If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
  309. # represented as a surrogate pair. If it is not defined, each
  310. # Py_UNICODE values is interpreted as an UCS-2 character.
  311. bytes PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
  312. # Return a Python string using the UTF-16 encoding in native byte
  313. # order. The string always starts with a BOM mark. Error handling
  314. # is ``strict''. Return NULL if an exception was raised by the
  315. # codec.
  316. bytes PyUnicode_AsUTF16String(object unicode)
  317. # These are the ``Unicode Escape'' codec APIs:
  318. # Create a Unicode object by decoding size bytes of the
  319. # Unicode-Escape encoded string s. Return NULL if an exception was
  320. # raised by the codec.
  321. object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors)
  322. # Encode the Py_UNICODE buffer of the given size using
  323. # Unicode-Escape and return a Python string object. Return NULL if
  324. # an exception was raised by the codec.
  325. object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size)
  326. # Encode a Unicode objects using Unicode-Escape and return the
  327. # result as Python string object. Error handling is
  328. # ``strict''. Return NULL if an exception was raised by the codec.
  329. object PyUnicode_AsUnicodeEscapeString(object unicode)
  330. # These are the ``Raw Unicode Escape'' codec APIs:
  331. # Create a Unicode object by decoding size bytes of the
  332. # Raw-Unicode-Escape encoded string s. Return NULL if an exception
  333. # was raised by the codec.
  334. object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *errors)
  335. # Encode the Py_UNICODE buffer of the given size using
  336. # Raw-Unicode-Escape and return a Python string object. Return
  337. # NULL if an exception was raised by the codec.
  338. object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char *errors)
  339. # Encode a Unicode objects using Raw-Unicode-Escape and return the
  340. # result as Python string object. Error handling is
  341. # ``strict''. Return NULL if an exception was raised by the codec.
  342. object PyUnicode_AsRawUnicodeEscapeString(object unicode)
  343. # These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode ordinals and only these are accepted by the codecs during encoding.
  344. # Create a Unicode object by decoding size bytes of the Latin-1
  345. # encoded string s. Return NULL if an exception was raised by the
  346. # codec.
  347. unicode PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
  348. # Encode the Py_UNICODE buffer of the given size using Latin-1 and
  349. # return a Python bytes object. Return NULL if an exception was
  350. # raised by the codec.
  351. bytes PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
  352. # Encode a Unicode objects using Latin-1 and return the result as
  353. # Python bytes object. Error handling is ``strict''. Return NULL
  354. # if an exception was raised by the codec.
  355. bytes PyUnicode_AsLatin1String(object unicode)
  356. # These are the ASCII codec APIs. Only 7-bit ASCII data is
  357. # accepted. All other codes generate errors.
  358. # Create a Unicode object by decoding size bytes of the ASCII
  359. # encoded string s. Return NULL if an exception was raised by the
  360. # codec.
  361. unicode PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
  362. # Encode the Py_UNICODE buffer of the given size using ASCII and
  363. # return a Python bytes object. Return NULL if an exception was
  364. # raised by the codec.
  365. bytes PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
  366. # Encode a Unicode objects using ASCII and return the result as
  367. # Python bytes object. Error handling is ``strict''. Return NULL
  368. # if an exception was raised by the codec.
  369. bytes PyUnicode_AsASCIIString(object o)
  370. # These are the mapping codec APIs:
  371. #
  372. # This codec is special in that it can be used to implement many
  373. # different codecs (and this is in fact what was done to obtain most
  374. # of the standard codecs included in the encodings package). The codec
  375. # uses mapping to encode and decode characters.
  376. #
  377. # Decoding mappings must map single string characters to single
  378. # Unicode characters, integers (which are then interpreted as Unicode
  379. # ordinals) or None (meaning "undefined mapping" and causing an
  380. # error).
  381. #
  382. # Encoding mappings must map single Unicode characters to single
  383. # string characters, integers (which are then interpreted as Latin-1
  384. # ordinals) or None (meaning "undefined mapping" and causing an
  385. # error).
  386. #
  387. # The mapping objects provided must only support the __getitem__
  388. # mapping interface.
  389. #
  390. # If a character lookup fails with a LookupError, the character is
  391. # copied as-is meaning that its ordinal value will be interpreted as
  392. # Unicode or Latin-1 ordinal resp. Because of this, mappings only need
  393. # to contain those mappings which map characters to different code
  394. # points.
  395. # Create a Unicode object by decoding size bytes of the encoded
  396. # string s using the given mapping object. Return NULL if an
  397. # exception was raised by the codec. If mapping is NULL latin-1
  398. # decoding will be done. Else it can be a dictionary mapping byte
  399. # or a unicode string, which is treated as a lookup table. Byte
  400. # values greater that the length of the string and U+FFFE
  401. # "characters" are treated as "undefined mapping". Changed in
  402. # version 2.4: Allowed unicode string as mapping argument.
  403. object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, char *errors)
  404. # Encode the Py_UNICODE buffer of the given size using the given
  405. # mapping object and return a Python string object. Return NULL if
  406. # an exception was raised by the codec.
  407. #
  408. # Deprecated since version 3.3, will be removed in version 4.0.
  409. object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors)
  410. # Encode a Unicode objects using the given mapping object and
  411. # return the result as Python string object. Error handling is
  412. # ``strict''. Return NULL if an exception was raised by the codec.
  413. object PyUnicode_AsCharmapString(object o, object mapping)
  414. # The following codec API is special in that maps Unicode to Unicode.
  415. # Translate a Py_UNICODE buffer of the given length by applying a
  416. # character mapping table to it and return the resulting Unicode
  417. # object. Return NULL when an exception was raised by the codec.
  418. #
  419. # The mapping table must map Unicode ordinal integers to Unicode
  420. # ordinal integers or None (causing deletion of the character).
  421. #
  422. # Mapping tables need only provide the __getitem__() interface;
  423. # dictionaries and sequences work well. Unmapped character
  424. # ordinals (ones which cause a LookupError) are left untouched and
  425. # are copied as-is.
  426. #
  427. # Deprecated since version 3.3, will be removed in version 4.0.
  428. object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size,
  429. object table, char *errors)
  430. # These are the MBCS codec APIs. They are currently only available on
  431. # Windows and use the Win32 MBCS converters to implement the
  432. # conversions. Note that MBCS (or DBCS) is a class of encodings, not
  433. # just one. The target encoding is defined by the user settings on the
  434. # machine running the codec.
  435. # Create a Unicode object by decoding size bytes of the MBCS
  436. # encoded string s. Return NULL if an exception was raised by the
  437. # codec.
  438. unicode PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
  439. # If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If
  440. # consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not
  441. # decode trailing lead byte and the number of bytes that have been
  442. # decoded will be stored in consumed. New in version 2.5.
  443. # NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0)
  444. unicode PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
  445. # Encode the Py_UNICODE buffer of the given size using MBCS and
  446. # return a Python string object. Return NULL if an exception was
  447. # raised by the codec.
  448. bytes PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
  449. # Encode a Unicode objects using MBCS and return the result as
  450. # Python string object. Error handling is ``strict''. Return NULL
  451. # if an exception was raised by the codec.
  452. bytes PyUnicode_AsMBCSString(object o)
  453. # Encode the Unicode object using the specified code page and return
  454. # a Python bytes object. Return NULL if an exception was raised by the
  455. # codec. Use CP_ACP code page to get the MBCS encoder.
  456. #
  457. # New in version 3.3.
  458. bytes PyUnicode_EncodeCodePage(int code_page, object unicode, const char *errors)
  459. # Py_UCS4 helpers (new in CPython 3.3)
  460. # These utility functions work on strings of Py_UCS4 characters and
  461. # otherwise behave like the C standard library functions with the same name.
  462. size_t Py_UCS4_strlen(const Py_UCS4 *u)
  463. Py_UCS4* Py_UCS4_strcpy(Py_UCS4 *s1, const Py_UCS4 *s2)
  464. Py_UCS4* Py_UCS4_strncpy(Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
  465. Py_UCS4* Py_UCS4_strcat(Py_UCS4 *s1, const Py_UCS4 *s2)
  466. int Py_UCS4_strcmp(const Py_UCS4 *s1, const Py_UCS4 *s2)
  467. int Py_UCS4_strncmp(const Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
  468. Py_UCS4* Py_UCS4_strchr(const Py_UCS4 *s, Py_UCS4 c)
  469. Py_UCS4* Py_UCS4_strrchr(const Py_UCS4 *s, Py_UCS4 c)