usprep.h 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: usprep.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003jul2
  16. * created by: Ram Viswanadha
  17. */
  18. #ifndef __USPREP_H__
  19. #define __USPREP_H__
  20. /**
  21. * \file
  22. * \brief C API: Implements the StringPrep algorithm.
  23. */
  24. #include "unicode/utypes.h"
  25. #if U_SHOW_CPLUSPLUS_API
  26. #include "unicode/localpointer.h"
  27. #endif // U_SHOW_CPLUSPLUS_API
  28. /**
  29. *
  30. * StringPrep API implements the StingPrep framework as described by RFC 3454.
  31. * StringPrep prepares Unicode strings for use in network protocols.
  32. * Profiles of StingPrep are set of rules and data according to with the
  33. * Unicode Strings are prepared. Each profiles contains tables which describe
  34. * how a code point should be treated. The tables are broadly classified into
  35. * <ul>
  36. * <li> Unassigned Table: Contains code points that are unassigned
  37. * in the Unicode Version supported by StringPrep. Currently
  38. * RFC 3454 supports Unicode 3.2. </li>
  39. * <li> Prohibited Table: Contains code points that are prohibited from
  40. * the output of the StringPrep processing function. </li>
  41. * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
  42. * </ul>
  43. *
  44. * The procedure for preparing Unicode strings:
  45. * <ol>
  46. * <li> Map: For each character in the input, check if it has a mapping
  47. * and, if so, replace it with its mapping. </li>
  48. * <li> Normalize: Possibly normalize the result of step 1 using Unicode
  49. * normalization. </li>
  50. * <li> Prohibit: Check for any characters that are not allowed in the
  51. * output. If any are found, return an error.</li>
  52. * <li> Check bidi: Possibly check for right-to-left characters, and if
  53. * any are found, make sure that the whole string satisfies the
  54. * requirements for bidirectional strings. If the string does not
  55. * satisfy the requirements for bidirectional strings, return an
  56. * error. </li>
  57. * </ol>
  58. * @author Ram Viswanadha
  59. */
  60. #if !UCONFIG_NO_IDNA
  61. #include "unicode/parseerr.h"
  62. /**
  63. * The StringPrep profile
  64. * @stable ICU 2.8
  65. */
  66. typedef struct UStringPrepProfile UStringPrepProfile;
  67. /**
  68. * Option to prohibit processing of unassigned code points in the input
  69. *
  70. * @see usprep_prepare
  71. * @stable ICU 2.8
  72. */
  73. #define USPREP_DEFAULT 0x0000
  74. /**
  75. * Option to allow processing of unassigned code points in the input
  76. *
  77. * @see usprep_prepare
  78. * @stable ICU 2.8
  79. */
  80. #define USPREP_ALLOW_UNASSIGNED 0x0001
  81. /**
  82. * enums for the standard stringprep profile types
  83. * supported by usprep_openByType.
  84. * @see usprep_openByType
  85. * @stable ICU 4.2
  86. */
  87. typedef enum UStringPrepProfileType {
  88. /**
  89. * RFC3491 Nameprep
  90. * @stable ICU 4.2
  91. */
  92. USPREP_RFC3491_NAMEPREP,
  93. /**
  94. * RFC3530 nfs4_cs_prep
  95. * @stable ICU 4.2
  96. */
  97. USPREP_RFC3530_NFS4_CS_PREP,
  98. /**
  99. * RFC3530 nfs4_cs_prep with case insensitive option
  100. * @stable ICU 4.2
  101. */
  102. USPREP_RFC3530_NFS4_CS_PREP_CI,
  103. /**
  104. * RFC3530 nfs4_cis_prep
  105. * @stable ICU 4.2
  106. */
  107. USPREP_RFC3530_NFS4_CIS_PREP,
  108. /**
  109. * RFC3530 nfs4_mixed_prep for prefix
  110. * @stable ICU 4.2
  111. */
  112. USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX,
  113. /**
  114. * RFC3530 nfs4_mixed_prep for suffix
  115. * @stable ICU 4.2
  116. */
  117. USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX,
  118. /**
  119. * RFC3722 iSCSI
  120. * @stable ICU 4.2
  121. */
  122. USPREP_RFC3722_ISCSI,
  123. /**
  124. * RFC3920 XMPP Nodeprep
  125. * @stable ICU 4.2
  126. */
  127. USPREP_RFC3920_NODEPREP,
  128. /**
  129. * RFC3920 XMPP Resourceprep
  130. * @stable ICU 4.2
  131. */
  132. USPREP_RFC3920_RESOURCEPREP,
  133. /**
  134. * RFC4011 Policy MIB Stringprep
  135. * @stable ICU 4.2
  136. */
  137. USPREP_RFC4011_MIB,
  138. /**
  139. * RFC4013 SASLprep
  140. * @stable ICU 4.2
  141. */
  142. USPREP_RFC4013_SASLPREP,
  143. /**
  144. * RFC4505 trace
  145. * @stable ICU 4.2
  146. */
  147. USPREP_RFC4505_TRACE,
  148. /**
  149. * RFC4518 LDAP
  150. * @stable ICU 4.2
  151. */
  152. USPREP_RFC4518_LDAP,
  153. /**
  154. * RFC4518 LDAP for case ignore, numeric and stored prefix
  155. * matching rules
  156. * @stable ICU 4.2
  157. */
  158. USPREP_RFC4518_LDAP_CI
  159. } UStringPrepProfileType;
  160. /**
  161. * Creates a StringPrep profile from the data file.
  162. *
  163. * @param path string containing the full path pointing to the directory
  164. * where the profile reside followed by the package name
  165. * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
  166. * if NULL, ICU default data files will be used.
  167. * @param fileName name of the profile file to be opened
  168. * @param status ICU error code in/out parameter. Must not be NULL.
  169. * Must fulfill U_SUCCESS before the function call.
  170. * @return Pointer to UStringPrepProfile that is opened. Should be closed by
  171. * calling usprep_close()
  172. * @see usprep_close()
  173. * @stable ICU 2.8
  174. */
  175. U_CAPI UStringPrepProfile* U_EXPORT2
  176. usprep_open(const char* path,
  177. const char* fileName,
  178. UErrorCode* status);
  179. /**
  180. * Creates a StringPrep profile for the specified profile type.
  181. *
  182. * @param type The profile type
  183. * @param status ICU error code in/out parameter. Must not be NULL.
  184. * Must fulfill U_SUCCESS before the function call.
  185. * @return Pointer to UStringPrepProfile that is opened. Should be closed by
  186. * calling usprep_close()
  187. * @see usprep_close()
  188. * @stable ICU 4.2
  189. */
  190. U_CAPI UStringPrepProfile* U_EXPORT2
  191. usprep_openByType(UStringPrepProfileType type,
  192. UErrorCode* status);
  193. /**
  194. * Closes the profile
  195. * @param profile The profile to close
  196. * @stable ICU 2.8
  197. */
  198. U_CAPI void U_EXPORT2
  199. usprep_close(UStringPrepProfile* profile);
  200. #if U_SHOW_CPLUSPLUS_API
  201. U_NAMESPACE_BEGIN
  202. /**
  203. * \class LocalUStringPrepProfilePointer
  204. * "Smart pointer" class, closes a UStringPrepProfile via usprep_close().
  205. * For most methods see the LocalPointerBase base class.
  206. *
  207. * @see LocalPointerBase
  208. * @see LocalPointer
  209. * @stable ICU 4.4
  210. */
  211. U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close);
  212. U_NAMESPACE_END
  213. #endif
  214. /**
  215. * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
  216. * checks for prohibited and BiDi characters in the order defined by RFC 3454
  217. * depending on the options specified in the profile.
  218. *
  219. * @param prep The profile to use
  220. * @param src Pointer to UChar buffer containing the string to prepare
  221. * @param srcLength Number of characters in the source string
  222. * @param dest Pointer to the destination buffer to receive the output
  223. * @param destCapacity The capacity of destination array
  224. * @param options A bit set of options:
  225. *
  226. * - USPREP_DEFAULT Prohibit processing of unassigned code points in the input
  227. *
  228. * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
  229. * as normal Unicode code points.
  230. *
  231. * @param parseError Pointer to UParseError struct to receive information on position
  232. * of error if an error is encountered. Can be NULL.
  233. * @param status ICU in/out error code parameter.
  234. * U_INVALID_CHAR_FOUND if src contains
  235. * unmatched single surrogates.
  236. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  237. * too many code points.
  238. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  239. * @return The number of UChars in the destination buffer
  240. * @stable ICU 2.8
  241. */
  242. U_CAPI int32_t U_EXPORT2
  243. usprep_prepare( const UStringPrepProfile* prep,
  244. const UChar* src, int32_t srcLength,
  245. UChar* dest, int32_t destCapacity,
  246. int32_t options,
  247. UParseError* parseError,
  248. UErrorCode* status );
  249. #endif /* #if !UCONFIG_NO_IDNA */
  250. #endif