utrie_swap.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // utrie_swap.cpp
  4. // created: 2018aug08 Markus W. Scherer
  5. #include "unicode/utypes.h"
  6. #include "cmemory.h"
  7. #include "ucptrie_impl.h"
  8. #include "udataswp.h"
  9. #include "utrie.h"
  10. #include "utrie2_impl.h"
  11. // These functions for swapping different generations of ICU code point tries are here
  12. // so that their implementation files need not depend on swapper code,
  13. // need not depend on each other, and so that other swapper code
  14. // need not depend on other trie code.
  15. namespace {
  16. constexpr int32_t ASCII_LIMIT = 0x80;
  17. } // namespace
  18. U_CAPI int32_t U_EXPORT2
  19. utrie_swap(const UDataSwapper *ds,
  20. const void *inData, int32_t length, void *outData,
  21. UErrorCode *pErrorCode) {
  22. const UTrieHeader *inTrie;
  23. UTrieHeader trie;
  24. int32_t size;
  25. UBool dataIs32;
  26. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  27. return 0;
  28. }
  29. if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
  30. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  31. return 0;
  32. }
  33. /* setup and swapping */
  34. if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
  35. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  36. return 0;
  37. }
  38. inTrie=(const UTrieHeader *)inData;
  39. trie.signature=ds->readUInt32(inTrie->signature);
  40. trie.options=ds->readUInt32(inTrie->options);
  41. trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
  42. trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
  43. if( trie.signature!=0x54726965 ||
  44. (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
  45. ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
  46. trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
  47. (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
  48. trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
  49. (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
  50. ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
  51. ) {
  52. *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
  53. return 0;
  54. }
  55. dataIs32 = (trie.options & UTRIE_OPTIONS_DATA_IS_32_BIT) != 0;
  56. size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
  57. if(length>=0) {
  58. UTrieHeader *outTrie;
  59. if(length<size) {
  60. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  61. return 0;
  62. }
  63. outTrie=(UTrieHeader *)outData;
  64. /* swap the header */
  65. ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
  66. /* swap the index and the data */
  67. if(dataIs32) {
  68. ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
  69. ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
  70. (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
  71. } else {
  72. ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
  73. }
  74. }
  75. return size;
  76. }
  77. U_CAPI int32_t U_EXPORT2
  78. utrie2_swap(const UDataSwapper *ds,
  79. const void *inData, int32_t length, void *outData,
  80. UErrorCode *pErrorCode) {
  81. const UTrie2Header *inTrie;
  82. UTrie2Header trie;
  83. int32_t dataLength, size;
  84. UTrie2ValueBits valueBits;
  85. if(U_FAILURE(*pErrorCode)) {
  86. return 0;
  87. }
  88. if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
  89. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  90. return 0;
  91. }
  92. /* setup and swapping */
  93. if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
  94. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  95. return 0;
  96. }
  97. inTrie=(const UTrie2Header *)inData;
  98. trie.signature=ds->readUInt32(inTrie->signature);
  99. trie.options=ds->readUInt16(inTrie->options);
  100. trie.indexLength=ds->readUInt16(inTrie->indexLength);
  101. trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
  102. valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
  103. dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
  104. if( trie.signature!=UTRIE2_SIG ||
  105. valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
  106. trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
  107. dataLength<UTRIE2_DATA_START_OFFSET
  108. ) {
  109. *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
  110. return 0;
  111. }
  112. size=sizeof(UTrie2Header)+trie.indexLength*2;
  113. switch(valueBits) {
  114. case UTRIE2_16_VALUE_BITS:
  115. size+=dataLength*2;
  116. break;
  117. case UTRIE2_32_VALUE_BITS:
  118. size+=dataLength*4;
  119. break;
  120. default:
  121. *pErrorCode=U_INVALID_FORMAT_ERROR;
  122. return 0;
  123. }
  124. if(length>=0) {
  125. UTrie2Header *outTrie;
  126. if(length<size) {
  127. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  128. return 0;
  129. }
  130. outTrie=(UTrie2Header *)outData;
  131. /* swap the header */
  132. ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
  133. ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
  134. /* swap the index and the data */
  135. switch(valueBits) {
  136. case UTRIE2_16_VALUE_BITS:
  137. ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
  138. break;
  139. case UTRIE2_32_VALUE_BITS:
  140. ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
  141. ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
  142. (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
  143. break;
  144. default:
  145. *pErrorCode=U_INVALID_FORMAT_ERROR;
  146. return 0;
  147. }
  148. }
  149. return size;
  150. }
  151. U_CAPI int32_t U_EXPORT2
  152. ucptrie_swap(const UDataSwapper *ds,
  153. const void *inData, int32_t length, void *outData,
  154. UErrorCode *pErrorCode) {
  155. const UCPTrieHeader *inTrie;
  156. UCPTrieHeader trie;
  157. int32_t dataLength, size;
  158. UCPTrieValueWidth valueWidth;
  159. if(U_FAILURE(*pErrorCode)) {
  160. return 0;
  161. }
  162. if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
  163. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  164. return 0;
  165. }
  166. /* setup and swapping */
  167. if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
  168. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  169. return 0;
  170. }
  171. inTrie=(const UCPTrieHeader *)inData;
  172. trie.signature=ds->readUInt32(inTrie->signature);
  173. trie.options=ds->readUInt16(inTrie->options);
  174. trie.indexLength=ds->readUInt16(inTrie->indexLength);
  175. trie.dataLength = ds->readUInt16(inTrie->dataLength);
  176. UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
  177. valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
  178. dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
  179. int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
  180. UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
  181. if( trie.signature!=UCPTRIE_SIG ||
  182. type > UCPTRIE_TYPE_SMALL ||
  183. (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
  184. valueWidth > UCPTRIE_VALUE_BITS_8 ||
  185. trie.indexLength < minIndexLength ||
  186. dataLength < ASCII_LIMIT
  187. ) {
  188. *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
  189. return 0;
  190. }
  191. size=sizeof(UCPTrieHeader)+trie.indexLength*2;
  192. switch(valueWidth) {
  193. case UCPTRIE_VALUE_BITS_16:
  194. size+=dataLength*2;
  195. break;
  196. case UCPTRIE_VALUE_BITS_32:
  197. size+=dataLength*4;
  198. break;
  199. case UCPTRIE_VALUE_BITS_8:
  200. size+=dataLength;
  201. break;
  202. default:
  203. *pErrorCode=U_INVALID_FORMAT_ERROR;
  204. return 0;
  205. }
  206. if(length>=0) {
  207. UCPTrieHeader *outTrie;
  208. if(length<size) {
  209. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  210. return 0;
  211. }
  212. outTrie=(UCPTrieHeader *)outData;
  213. /* swap the header */
  214. ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
  215. ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
  216. /* swap the index */
  217. const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1);
  218. uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1);
  219. ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode);
  220. /* swap the data */
  221. const uint16_t *inData=inIndex+trie.indexLength;
  222. uint16_t *outData=outIndex+trie.indexLength;
  223. switch(valueWidth) {
  224. case UCPTRIE_VALUE_BITS_16:
  225. ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode);
  226. break;
  227. case UCPTRIE_VALUE_BITS_32:
  228. ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode);
  229. break;
  230. case UCPTRIE_VALUE_BITS_8:
  231. if(inTrie!=outTrie) {
  232. uprv_memmove(outData, inData, dataLength);
  233. }
  234. break;
  235. default:
  236. *pErrorCode=U_INVALID_FORMAT_ERROR;
  237. return 0;
  238. }
  239. }
  240. return size;
  241. }
  242. namespace {
  243. /**
  244. * Gets the trie version from 32-bit-aligned memory containing the serialized form
  245. * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
  246. *
  247. * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
  248. * @param length the number of bytes available at data;
  249. * can be more than necessary (see return value)
  250. * @param anyEndianOk If false, only platform-endian serialized forms are recognized.
  251. * If true, opposite-endian serialized forms are recognized as well.
  252. * @return the trie version of the serialized form, or 0 if it is not
  253. * recognized as a serialized trie
  254. */
  255. int32_t
  256. getVersion(const void *data, int32_t length, UBool anyEndianOk) {
  257. uint32_t signature;
  258. if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
  259. return 0;
  260. }
  261. signature = *static_cast<const uint32_t*>(data);
  262. if(signature==UCPTRIE_SIG) {
  263. return 3;
  264. }
  265. if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
  266. return 3;
  267. }
  268. if(signature==UTRIE2_SIG) {
  269. return 2;
  270. }
  271. if(anyEndianOk && signature==UTRIE2_OE_SIG) {
  272. return 2;
  273. }
  274. if(signature==UTRIE_SIG) {
  275. return 1;
  276. }
  277. if(anyEndianOk && signature==UTRIE_OE_SIG) {
  278. return 1;
  279. }
  280. return 0;
  281. }
  282. } // namespace
  283. U_CAPI int32_t U_EXPORT2
  284. utrie_swapAnyVersion(const UDataSwapper *ds,
  285. const void *inData, int32_t length, void *outData,
  286. UErrorCode *pErrorCode) {
  287. if(U_FAILURE(*pErrorCode)) { return 0; }
  288. switch(getVersion(inData, length, true)) {
  289. case 1:
  290. return utrie_swap(ds, inData, length, outData, pErrorCode);
  291. case 2:
  292. return utrie2_swap(ds, inData, length, outData, pErrorCode);
  293. case 3:
  294. return ucptrie_swap(ds, inData, length, outData, pErrorCode);
  295. default:
  296. *pErrorCode=U_INVALID_FORMAT_ERROR;
  297. return 0;
  298. }
  299. }