unistr.h 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
  2. /* Elementary Unicode string functions.
  3. Copyright (C) 2001-2002, 2005-2024 Free Software Foundation, Inc.
  4. This file is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU Lesser General Public License as
  6. published by the Free Software Foundation; either version 2.1 of the
  7. License, or (at your option) any later version.
  8. This file is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  14. #ifndef _UNISTR_H
  15. #define _UNISTR_H
  16. #include "unitypes.h"
  17. /* Get bool. */
  18. #include <stdbool.h>
  19. /* Get size_t, ptrdiff_t. */
  20. #include <stddef.h>
  21. /* Get free(). */
  22. #include <stdlib.h>
  23. #ifdef __cplusplus
  24. extern "C" {
  25. #endif
  26. /* Conventions:
  27. All functions prefixed with u8_ operate on UTF-8 encoded strings.
  28. Their unit is an uint8_t (1 byte).
  29. All functions prefixed with u16_ operate on UTF-16 encoded strings.
  30. Their unit is an uint16_t (a 2-byte word).
  31. All functions prefixed with u32_ operate on UCS-4 encoded strings.
  32. Their unit is an uint32_t (a 4-byte word).
  33. All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
  34. n units.
  35. All arguments starting with "str" and the arguments of functions starting
  36. with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
  37. which terminates at the first NUL unit. This termination unit is
  38. considered part of the string for all memory allocation purposes, but
  39. is not considered part of the string for all other logical purposes.
  40. Functions returning a string result take a (resultbuf, lengthp) argument
  41. pair. If resultbuf is not NULL and the result fits into *lengthp units,
  42. it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
  43. allocated string is returned. In both cases, *lengthp is set to the
  44. length (number of units) of the returned string. In case of error,
  45. NULL is returned and errno is set. */
  46. /* Elementary string checks. */
  47. /* Check whether an UTF-8 string is well-formed.
  48. Return NULL if valid, or a pointer to the first invalid unit otherwise. */
  49. extern const uint8_t *
  50. u8_check (const uint8_t *s, size_t n)
  51. _UC_ATTRIBUTE_PURE;
  52. /* Check whether an UTF-16 string is well-formed.
  53. Return NULL if valid, or a pointer to the first invalid unit otherwise. */
  54. extern const uint16_t *
  55. u16_check (const uint16_t *s, size_t n)
  56. _UC_ATTRIBUTE_PURE;
  57. /* Check whether an UCS-4 string is well-formed.
  58. Return NULL if valid, or a pointer to the first invalid unit otherwise. */
  59. extern const uint32_t *
  60. u32_check (const uint32_t *s, size_t n)
  61. _UC_ATTRIBUTE_PURE;
  62. /* Elementary string conversions. */
  63. /* Convert an UTF-8 string to an UTF-16 string. */
  64. extern uint16_t *
  65. u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
  66. size_t *lengthp);
  67. /* Convert an UTF-8 string to an UCS-4 string. */
  68. extern uint32_t *
  69. u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
  70. size_t *lengthp);
  71. /* Convert an UTF-16 string to an UTF-8 string. */
  72. extern uint8_t *
  73. u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
  74. size_t *lengthp);
  75. /* Convert an UTF-16 string to an UCS-4 string. */
  76. extern uint32_t *
  77. u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
  78. size_t *lengthp);
  79. /* Convert an UCS-4 string to an UTF-8 string. */
  80. extern uint8_t *
  81. u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
  82. size_t *lengthp);
  83. /* Convert an UCS-4 string to an UTF-16 string. */
  84. extern uint16_t *
  85. u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
  86. size_t *lengthp);
  87. /* Elementary string functions. */
  88. /* Return the length (number of units) of the first character in S, which is
  89. no longer than N. Return 0 if it is the NUL character. Return -1 upon
  90. failure. */
  91. /* Similar to mblen(), except that s must not be NULL. */
  92. extern int
  93. u8_mblen (const uint8_t *s, size_t n)
  94. _UC_ATTRIBUTE_PURE;
  95. extern int
  96. u16_mblen (const uint16_t *s, size_t n)
  97. _UC_ATTRIBUTE_PURE;
  98. extern int
  99. u32_mblen (const uint32_t *s, size_t n)
  100. _UC_ATTRIBUTE_PURE;
  101. /* Return the length (number of units) of the first character in S, putting
  102. its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
  103. and an appropriate number of units is returned.
  104. The number of available units, N, must be > 0. */
  105. /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
  106. and the NUL character is not treated specially. */
  107. /* The variants with _unsafe suffix are for backward compatibility with
  108. libunistring versions < 0.9.7. */
  109. #if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
  110. # if !HAVE_INLINE
  111. extern int
  112. u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
  113. # else
  114. extern int
  115. u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
  116. static inline int
  117. u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
  118. {
  119. uint8_t c = *s;
  120. if (c < 0x80)
  121. {
  122. *puc = c;
  123. return 1;
  124. }
  125. else
  126. return u8_mbtouc_unsafe_aux (puc, s, n);
  127. }
  128. # endif
  129. #endif
  130. #if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
  131. # if !HAVE_INLINE
  132. extern int
  133. u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
  134. # else
  135. extern int
  136. u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
  137. static inline int
  138. u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
  139. {
  140. uint16_t c = *s;
  141. if (c < 0xd800 || c >= 0xe000)
  142. {
  143. *puc = c;
  144. return 1;
  145. }
  146. else
  147. return u16_mbtouc_unsafe_aux (puc, s, n);
  148. }
  149. # endif
  150. #endif
  151. #if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
  152. # if !HAVE_INLINE
  153. extern int
  154. u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
  155. # else
  156. static inline int
  157. u32_mbtouc_unsafe (ucs4_t *puc,
  158. const uint32_t *s, _GL_ATTRIBUTE_MAYBE_UNUSED size_t n)
  159. {
  160. uint32_t c = *s;
  161. if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
  162. *puc = c;
  163. else
  164. /* invalid multibyte character */
  165. *puc = 0xfffd;
  166. return 1;
  167. }
  168. # endif
  169. #endif
  170. #if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING
  171. # if !HAVE_INLINE
  172. extern int
  173. u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
  174. # else
  175. extern int
  176. u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
  177. static inline int
  178. u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
  179. {
  180. uint8_t c = *s;
  181. if (c < 0x80)
  182. {
  183. *puc = c;
  184. return 1;
  185. }
  186. else
  187. return u8_mbtouc_aux (puc, s, n);
  188. }
  189. # endif
  190. #endif
  191. #if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING
  192. # if !HAVE_INLINE
  193. extern int
  194. u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
  195. # else
  196. extern int
  197. u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
  198. static inline int
  199. u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
  200. {
  201. uint16_t c = *s;
  202. if (c < 0xd800 || c >= 0xe000)
  203. {
  204. *puc = c;
  205. return 1;
  206. }
  207. else
  208. return u16_mbtouc_aux (puc, s, n);
  209. }
  210. # endif
  211. #endif
  212. #if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING
  213. # if !HAVE_INLINE
  214. extern int
  215. u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
  216. # else
  217. static inline int
  218. u32_mbtouc (ucs4_t *puc, const uint32_t *s,
  219. _GL_ATTRIBUTE_MAYBE_UNUSED size_t n)
  220. {
  221. uint32_t c = *s;
  222. if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
  223. *puc = c;
  224. else
  225. /* invalid multibyte character */
  226. *puc = 0xfffd;
  227. return 1;
  228. }
  229. # endif
  230. #endif
  231. /* Return the length (number of units) of the first character in S, putting
  232. its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
  233. and -1 is returned for an invalid sequence of units, -2 is returned for an
  234. incomplete sequence of units.
  235. The number of available units, N, must be > 0. */
  236. /* Similar to u*_mbtouc(), except that the return value gives more details
  237. about the failure, similar to mbrtowc(). */
  238. #if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING
  239. extern int
  240. u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
  241. #endif
  242. #if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING
  243. extern int
  244. u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
  245. #endif
  246. #if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING
  247. extern int
  248. u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
  249. #endif
  250. /* Put the multibyte character represented by UC in S, returning its
  251. length. Return -1 upon failure, -2 if the number of available units, N,
  252. is too small. The latter case cannot occur if N >= 6/2/1, respectively. */
  253. /* Similar to wctomb(), except that s must not be NULL, and the argument n
  254. must be specified. */
  255. #if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING
  256. /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */
  257. extern int
  258. u8_uctomb_aux (uint8_t *s, ucs4_t uc, ptrdiff_t n);
  259. # if !HAVE_INLINE
  260. extern int
  261. u8_uctomb (uint8_t *s, ucs4_t uc, ptrdiff_t n);
  262. # else
  263. static inline int
  264. u8_uctomb (uint8_t *s, ucs4_t uc, ptrdiff_t n)
  265. {
  266. if (uc < 0x80 && n > 0)
  267. {
  268. s[0] = uc;
  269. return 1;
  270. }
  271. else
  272. return u8_uctomb_aux (s, uc, n);
  273. }
  274. # endif
  275. #endif
  276. #if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING
  277. /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */
  278. extern int
  279. u16_uctomb_aux (uint16_t *s, ucs4_t uc, ptrdiff_t n);
  280. # if !HAVE_INLINE
  281. extern int
  282. u16_uctomb (uint16_t *s, ucs4_t uc, ptrdiff_t n);
  283. # else
  284. static inline int
  285. u16_uctomb (uint16_t *s, ucs4_t uc, ptrdiff_t n)
  286. {
  287. if (uc < 0xd800 && n > 0)
  288. {
  289. s[0] = uc;
  290. return 1;
  291. }
  292. else
  293. return u16_uctomb_aux (s, uc, n);
  294. }
  295. # endif
  296. #endif
  297. #if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING
  298. # if !HAVE_INLINE
  299. extern int
  300. u32_uctomb (uint32_t *s, ucs4_t uc, ptrdiff_t n);
  301. # else
  302. static inline int
  303. u32_uctomb (uint32_t *s, ucs4_t uc, ptrdiff_t n)
  304. {
  305. if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
  306. {
  307. if (n > 0)
  308. {
  309. *s = uc;
  310. return 1;
  311. }
  312. else
  313. return -2;
  314. }
  315. else
  316. return -1;
  317. }
  318. # endif
  319. #endif
  320. /* Copy N units from SRC to DEST. */
  321. /* Similar to memcpy(). */
  322. extern uint8_t *
  323. u8_cpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
  324. extern uint16_t *
  325. u16_cpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
  326. extern uint32_t *
  327. u32_cpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
  328. /* Copy N units from SRC to DEST, returning pointer after last written unit. */
  329. /* Similar to mempcpy(). */
  330. extern uint8_t *
  331. u8_pcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
  332. extern uint16_t *
  333. u16_pcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
  334. extern uint32_t *
  335. u32_pcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
  336. /* Copy N units from SRC to DEST, guaranteeing correct behavior for
  337. overlapping memory areas. */
  338. /* Similar to memmove(). */
  339. extern uint8_t *
  340. u8_move (uint8_t *dest, const uint8_t *src, size_t n);
  341. extern uint16_t *
  342. u16_move (uint16_t *dest, const uint16_t *src, size_t n);
  343. extern uint32_t *
  344. u32_move (uint32_t *dest, const uint32_t *src, size_t n);
  345. /* Set the first N characters of S to UC. UC should be a character that
  346. occupies only 1 unit. */
  347. /* Similar to memset(). */
  348. extern uint8_t *
  349. u8_set (uint8_t *s, ucs4_t uc, size_t n);
  350. extern uint16_t *
  351. u16_set (uint16_t *s, ucs4_t uc, size_t n);
  352. extern uint32_t *
  353. u32_set (uint32_t *s, ucs4_t uc, size_t n);
  354. /* Compare S1 and S2, each of length N. */
  355. /* Similar to memcmp(). */
  356. extern int
  357. u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n)
  358. _UC_ATTRIBUTE_PURE;
  359. extern int
  360. u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n)
  361. _UC_ATTRIBUTE_PURE;
  362. extern int
  363. u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n)
  364. _UC_ATTRIBUTE_PURE;
  365. /* Compare S1 and S2. */
  366. /* Similar to the gnulib function memcmp2(). */
  367. extern int
  368. u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2)
  369. _UC_ATTRIBUTE_PURE;
  370. extern int
  371. u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2)
  372. _UC_ATTRIBUTE_PURE;
  373. extern int
  374. u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2)
  375. _UC_ATTRIBUTE_PURE;
  376. /* Search the string at S for UC. */
  377. /* Similar to memchr(). */
  378. extern uint8_t *
  379. u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
  380. _UC_ATTRIBUTE_PURE;
  381. extern uint16_t *
  382. u16_chr (const uint16_t *s, size_t n, ucs4_t uc)
  383. _UC_ATTRIBUTE_PURE;
  384. extern uint32_t *
  385. u32_chr (const uint32_t *s, size_t n, ucs4_t uc)
  386. _UC_ATTRIBUTE_PURE;
  387. /* Count the number of Unicode characters in the N units from S. */
  388. /* Similar to mbsnlen(). */
  389. extern size_t
  390. u8_mbsnlen (const uint8_t *s, size_t n)
  391. _UC_ATTRIBUTE_PURE;
  392. extern size_t
  393. u16_mbsnlen (const uint16_t *s, size_t n)
  394. _UC_ATTRIBUTE_PURE;
  395. extern size_t
  396. u32_mbsnlen (const uint32_t *s, size_t n)
  397. _UC_ATTRIBUTE_PURE;
  398. /* Elementary string functions with memory allocation. */
  399. /* Make a freshly allocated copy of S, of length N. */
  400. extern uint8_t *
  401. u8_cpy_alloc (const uint8_t *s, size_t n);
  402. extern uint16_t *
  403. u16_cpy_alloc (const uint16_t *s, size_t n);
  404. extern uint32_t *
  405. u32_cpy_alloc (const uint32_t *s, size_t n);
  406. /* Elementary string functions on NUL terminated strings. */
  407. /* Return the length (number of units) of the first character in S.
  408. Return 0 if it is the NUL character. Return -1 upon failure. */
  409. extern int
  410. u8_strmblen (const uint8_t *s)
  411. _UC_ATTRIBUTE_PURE;
  412. extern int
  413. u16_strmblen (const uint16_t *s)
  414. _UC_ATTRIBUTE_PURE;
  415. extern int
  416. u32_strmblen (const uint32_t *s)
  417. _UC_ATTRIBUTE_PURE;
  418. /* Return the length (number of units) of the first character in S, putting
  419. its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL
  420. character. Return -1 upon failure. */
  421. extern int
  422. u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
  423. extern int
  424. u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
  425. extern int
  426. u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
  427. /* Forward iteration step. Advances the pointer past the next character,
  428. or returns NULL if the end of the string has been reached. Puts the
  429. character's 'ucs4_t' representation in *PUC. */
  430. extern const uint8_t *
  431. u8_next (ucs4_t *puc, const uint8_t *s);
  432. extern const uint16_t *
  433. u16_next (ucs4_t *puc, const uint16_t *s);
  434. extern const uint32_t *
  435. u32_next (ucs4_t *puc, const uint32_t *s);
  436. /* Backward iteration step. Advances the pointer to point to the previous
  437. character, or returns NULL if the beginning of the string had been reached.
  438. Puts the character's 'ucs4_t' representation in *PUC. */
  439. extern const uint8_t *
  440. u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
  441. extern const uint16_t *
  442. u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
  443. extern const uint32_t *
  444. u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
  445. /* Return the number of units in S. */
  446. /* Similar to strlen(), wcslen(). */
  447. extern size_t
  448. u8_strlen (const uint8_t *s)
  449. _UC_ATTRIBUTE_PURE;
  450. extern size_t
  451. u16_strlen (const uint16_t *s)
  452. _UC_ATTRIBUTE_PURE;
  453. extern size_t
  454. u32_strlen (const uint32_t *s)
  455. _UC_ATTRIBUTE_PURE;
  456. /* Return the number of units in S, but at most MAXLEN. */
  457. /* Similar to strnlen(), wcsnlen(). */
  458. extern size_t
  459. u8_strnlen (const uint8_t *s, size_t maxlen)
  460. _UC_ATTRIBUTE_PURE;
  461. extern size_t
  462. u16_strnlen (const uint16_t *s, size_t maxlen)
  463. _UC_ATTRIBUTE_PURE;
  464. extern size_t
  465. u32_strnlen (const uint32_t *s, size_t maxlen)
  466. _UC_ATTRIBUTE_PURE;
  467. /* Copy SRC to DEST. */
  468. /* Similar to strcpy(), wcscpy(). */
  469. extern uint8_t *
  470. u8_strcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
  471. extern uint16_t *
  472. u16_strcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
  473. extern uint32_t *
  474. u32_strcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
  475. /* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */
  476. /* Similar to stpcpy(). */
  477. extern uint8_t *
  478. u8_stpcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
  479. extern uint16_t *
  480. u16_stpcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
  481. extern uint32_t *
  482. u32_stpcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
  483. /* Copy no more than N units of SRC to DEST. */
  484. /* Similar to strncpy(), wcsncpy(). */
  485. extern uint8_t *
  486. u8_strncpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
  487. extern uint16_t *
  488. u16_strncpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
  489. extern uint32_t *
  490. u32_strncpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
  491. /* Copy no more than N units of SRC to DEST. Return a pointer past the last
  492. non-NUL unit written into DEST. */
  493. /* Similar to stpncpy(). */
  494. extern uint8_t *
  495. u8_stpncpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
  496. extern uint16_t *
  497. u16_stpncpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
  498. extern uint32_t *
  499. u32_stpncpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
  500. /* Append SRC onto DEST. */
  501. /* Similar to strcat(), wcscat(). */
  502. extern uint8_t *
  503. u8_strcat (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
  504. extern uint16_t *
  505. u16_strcat (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
  506. extern uint32_t *
  507. u32_strcat (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
  508. /* Append no more than N units of SRC onto DEST. */
  509. /* Similar to strncat(), wcsncat(). */
  510. extern uint8_t *
  511. u8_strncat (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
  512. extern uint16_t *
  513. u16_strncat (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
  514. extern uint32_t *
  515. u32_strncat (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
  516. /* Compare S1 and S2. */
  517. /* Similar to strcmp(), wcscmp(). */
  518. #ifdef __sun
  519. /* Avoid a collision with the u8_strcmp() function in Solaris 11 libc. */
  520. extern int
  521. u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2)
  522. _UC_ATTRIBUTE_PURE;
  523. # define u8_strcmp u8_strcmp_gnu
  524. #else
  525. extern int
  526. u8_strcmp (const uint8_t *s1, const uint8_t *s2)
  527. _UC_ATTRIBUTE_PURE;
  528. #endif
  529. extern int
  530. u16_strcmp (const uint16_t *s1, const uint16_t *s2)
  531. _UC_ATTRIBUTE_PURE;
  532. extern int
  533. u32_strcmp (const uint32_t *s1, const uint32_t *s2)
  534. _UC_ATTRIBUTE_PURE;
  535. /* Compare S1 and S2 using the collation rules of the current locale.
  536. Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
  537. Upon failure, set errno and return any value. */
  538. /* Similar to strcoll(), wcscoll(). */
  539. extern int
  540. u8_strcoll (const uint8_t *s1, const uint8_t *s2);
  541. extern int
  542. u16_strcoll (const uint16_t *s1, const uint16_t *s2);
  543. extern int
  544. u32_strcoll (const uint32_t *s1, const uint32_t *s2);
  545. /* Compare no more than N units of S1 and S2. */
  546. /* Similar to strncmp(), wcsncmp(). */
  547. extern int
  548. u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n)
  549. _UC_ATTRIBUTE_PURE;
  550. extern int
  551. u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n)
  552. _UC_ATTRIBUTE_PURE;
  553. extern int
  554. u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n)
  555. _UC_ATTRIBUTE_PURE;
  556. /* Duplicate S, returning an identical malloc'd string. */
  557. /* Similar to strdup(), wcsdup(). */
  558. extern uint8_t *
  559. u8_strdup (const uint8_t *s)
  560. _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE;
  561. extern uint16_t *
  562. u16_strdup (const uint16_t *s)
  563. _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE;
  564. extern uint32_t *
  565. u32_strdup (const uint32_t *s)
  566. _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE;
  567. /* Find the first occurrence of UC in STR. */
  568. /* Similar to strchr(), wcschr(). */
  569. extern uint8_t *
  570. u8_strchr (const uint8_t *str, ucs4_t uc)
  571. _UC_ATTRIBUTE_PURE;
  572. extern uint16_t *
  573. u16_strchr (const uint16_t *str, ucs4_t uc)
  574. _UC_ATTRIBUTE_PURE;
  575. extern uint32_t *
  576. u32_strchr (const uint32_t *str, ucs4_t uc)
  577. _UC_ATTRIBUTE_PURE;
  578. /* Find the last occurrence of UC in STR. */
  579. /* Similar to strrchr(), wcsrchr(). */
  580. extern uint8_t *
  581. u8_strrchr (const uint8_t *str, ucs4_t uc)
  582. _UC_ATTRIBUTE_PURE;
  583. extern uint16_t *
  584. u16_strrchr (const uint16_t *str, ucs4_t uc)
  585. _UC_ATTRIBUTE_PURE;
  586. extern uint32_t *
  587. u32_strrchr (const uint32_t *str, ucs4_t uc)
  588. _UC_ATTRIBUTE_PURE;
  589. /* Return the length of the initial segment of STR which consists entirely
  590. of Unicode characters not in REJECT. */
  591. /* Similar to strcspn(), wcscspn(). */
  592. extern size_t
  593. u8_strcspn (const uint8_t *str, const uint8_t *reject)
  594. _UC_ATTRIBUTE_PURE;
  595. extern size_t
  596. u16_strcspn (const uint16_t *str, const uint16_t *reject)
  597. _UC_ATTRIBUTE_PURE;
  598. extern size_t
  599. u32_strcspn (const uint32_t *str, const uint32_t *reject)
  600. _UC_ATTRIBUTE_PURE;
  601. /* Return the length of the initial segment of STR which consists entirely
  602. of Unicode characters in ACCEPT. */
  603. /* Similar to strspn(), wcsspn(). */
  604. extern size_t
  605. u8_strspn (const uint8_t *str, const uint8_t *accept)
  606. _UC_ATTRIBUTE_PURE;
  607. extern size_t
  608. u16_strspn (const uint16_t *str, const uint16_t *accept)
  609. _UC_ATTRIBUTE_PURE;
  610. extern size_t
  611. u32_strspn (const uint32_t *str, const uint32_t *accept)
  612. _UC_ATTRIBUTE_PURE;
  613. /* Find the first occurrence in STR of any character in ACCEPT. */
  614. /* Similar to strpbrk(), wcspbrk(). */
  615. extern uint8_t *
  616. u8_strpbrk (const uint8_t *str, const uint8_t *accept)
  617. _UC_ATTRIBUTE_PURE;
  618. extern uint16_t *
  619. u16_strpbrk (const uint16_t *str, const uint16_t *accept)
  620. _UC_ATTRIBUTE_PURE;
  621. extern uint32_t *
  622. u32_strpbrk (const uint32_t *str, const uint32_t *accept)
  623. _UC_ATTRIBUTE_PURE;
  624. /* Find the first occurrence of NEEDLE in HAYSTACK. */
  625. /* Similar to strstr(), wcsstr(). */
  626. extern uint8_t *
  627. u8_strstr (const uint8_t *haystack, const uint8_t *needle)
  628. _UC_ATTRIBUTE_PURE;
  629. extern uint16_t *
  630. u16_strstr (const uint16_t *haystack, const uint16_t *needle)
  631. _UC_ATTRIBUTE_PURE;
  632. extern uint32_t *
  633. u32_strstr (const uint32_t *haystack, const uint32_t *needle)
  634. _UC_ATTRIBUTE_PURE;
  635. /* Test whether STR starts with PREFIX. */
  636. extern bool
  637. u8_startswith (const uint8_t *str, const uint8_t *prefix)
  638. _UC_ATTRIBUTE_PURE;
  639. extern bool
  640. u16_startswith (const uint16_t *str, const uint16_t *prefix)
  641. _UC_ATTRIBUTE_PURE;
  642. extern bool
  643. u32_startswith (const uint32_t *str, const uint32_t *prefix)
  644. _UC_ATTRIBUTE_PURE;
  645. /* Test whether STR ends with SUFFIX. */
  646. extern bool
  647. u8_endswith (const uint8_t *str, const uint8_t *suffix)
  648. _UC_ATTRIBUTE_PURE;
  649. extern bool
  650. u16_endswith (const uint16_t *str, const uint16_t *suffix)
  651. _UC_ATTRIBUTE_PURE;
  652. extern bool
  653. u32_endswith (const uint32_t *str, const uint32_t *suffix)
  654. _UC_ATTRIBUTE_PURE;
  655. /* Divide STR into tokens separated by characters in DELIM.
  656. This interface is actually more similar to wcstok than to strtok. */
  657. /* Similar to strtok_r(), wcstok(). */
  658. extern uint8_t *
  659. u8_strtok (uint8_t *_UC_RESTRICT str, const uint8_t *delim,
  660. uint8_t **ptr);
  661. extern uint16_t *
  662. u16_strtok (uint16_t *_UC_RESTRICT str, const uint16_t *delim,
  663. uint16_t **ptr);
  664. extern uint32_t *
  665. u32_strtok (uint32_t *_UC_RESTRICT str, const uint32_t *delim,
  666. uint32_t **ptr);
  667. #ifdef __cplusplus
  668. }
  669. #endif
  670. #endif /* _UNISTR_H */