string.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. #include <aws/common/string.h>
  6. #ifdef _WIN32
  7. # include <windows.h>
  8. struct aws_wstring *aws_string_convert_to_wstring(
  9. struct aws_allocator *allocator,
  10. const struct aws_string *to_convert) {
  11. AWS_PRECONDITION(to_convert);
  12. struct aws_byte_cursor convert_cur = aws_byte_cursor_from_string(to_convert);
  13. return aws_string_convert_to_wchar_from_byte_cursor(allocator, &convert_cur);
  14. }
  15. struct aws_wstring *aws_string_convert_to_wchar_from_byte_cursor(
  16. struct aws_allocator *allocator,
  17. const struct aws_byte_cursor *to_convert) {
  18. AWS_PRECONDITION(to_convert);
  19. /* if a length is passed for the to_convert string, converted size does not include the null terminator,
  20. * which is a good thing. */
  21. int converted_size = MultiByteToWideChar(CP_UTF8, 0, (const char *)to_convert->ptr, (int)to_convert->len, NULL, 0);
  22. if (!converted_size) {
  23. aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
  24. return NULL;
  25. }
  26. size_t str_len_size = 0;
  27. size_t malloc_size = 0;
  28. /* double the size because the return value above is # of characters, not bytes size. */
  29. if (aws_mul_size_checked(sizeof(wchar_t), converted_size, &str_len_size)) {
  30. return NULL;
  31. }
  32. /* UTF-16, the NULL terminator is two bytes. */
  33. if (aws_add_size_checked(sizeof(struct aws_wstring) + 2, str_len_size, &malloc_size)) {
  34. return NULL;
  35. }
  36. struct aws_wstring *str = aws_mem_acquire(allocator, malloc_size);
  37. if (!str) {
  38. return NULL;
  39. }
  40. /* Fields are declared const, so we need to copy them in like this */
  41. *(struct aws_allocator **)(&str->allocator) = allocator;
  42. *(size_t *)(&str->len) = (size_t)converted_size;
  43. int converted_res = MultiByteToWideChar(
  44. CP_UTF8, 0, (const char *)to_convert->ptr, (int)to_convert->len, (wchar_t *)str->bytes, converted_size);
  45. /* windows had its chance to do its thing, no take backsies. */
  46. AWS_FATAL_ASSERT(converted_res > 0);
  47. *(wchar_t *)&str->bytes[converted_size] = 0;
  48. return str;
  49. }
  50. struct aws_wstring *aws_wstring_new_from_cursor(
  51. struct aws_allocator *allocator,
  52. const struct aws_byte_cursor *w_str_cur) {
  53. AWS_PRECONDITION(allocator && aws_byte_cursor_is_valid(w_str_cur));
  54. return aws_wstring_new_from_array(allocator, (wchar_t *)w_str_cur->ptr, w_str_cur->len / sizeof(wchar_t));
  55. }
  56. struct aws_wstring *aws_wstring_new_from_array(struct aws_allocator *allocator, const wchar_t *w_str, size_t len) {
  57. AWS_PRECONDITION(allocator);
  58. AWS_PRECONDITION(AWS_MEM_IS_READABLE(w_str, len));
  59. size_t str_byte_len = 0;
  60. size_t malloc_size = 0;
  61. /* double the size because the return value above is # of characters, not bytes size. */
  62. if (aws_mul_size_checked(sizeof(wchar_t), len, &str_byte_len)) {
  63. return NULL;
  64. }
  65. /* UTF-16, the NULL terminator is two bytes. */
  66. if (aws_add_size_checked(sizeof(struct aws_wstring) + 2, str_byte_len, &malloc_size)) {
  67. return NULL;
  68. }
  69. struct aws_wstring *str = aws_mem_acquire(allocator, malloc_size);
  70. /* Fields are declared const, so we need to copy them in like this */
  71. *(struct aws_allocator **)(&str->allocator) = allocator;
  72. *(size_t *)(&str->len) = len;
  73. if (len > 0) {
  74. memcpy((void *)str->bytes, w_str, str_byte_len);
  75. }
  76. /* in case this is a utf-16 string in the array, allow that here. */
  77. *(wchar_t *)&str->bytes[len] = 0;
  78. AWS_RETURN_WITH_POSTCONDITION(str, aws_wstring_is_valid(str));
  79. }
  80. bool aws_wstring_is_valid(const struct aws_wstring *str) {
  81. return str && AWS_MEM_IS_READABLE(&str->bytes[0], str->len + 1) && str->bytes[str->len] == 0;
  82. }
  83. void aws_wstring_destroy(struct aws_wstring *str) {
  84. AWS_PRECONDITION(!str || aws_wstring_is_valid(str));
  85. if (str && str->allocator) {
  86. aws_mem_release(str->allocator, str);
  87. }
  88. }
  89. static struct aws_string *s_convert_from_wchar(
  90. struct aws_allocator *allocator,
  91. const wchar_t *to_convert,
  92. int len_chars) {
  93. AWS_FATAL_PRECONDITION(to_convert);
  94. int bytes_size = WideCharToMultiByte(CP_UTF8, 0, to_convert, len_chars, NULL, 0, NULL, NULL);
  95. if (!bytes_size) {
  96. aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
  97. return NULL;
  98. }
  99. size_t malloc_size = 0;
  100. /* bytes_size already contains the space for the null terminator */
  101. if (aws_add_size_checked(sizeof(struct aws_string), bytes_size, &malloc_size)) {
  102. return NULL;
  103. }
  104. struct aws_string *str = aws_mem_acquire(allocator, malloc_size);
  105. if (!str) {
  106. return NULL;
  107. }
  108. /* Fields are declared const, so we need to copy them in like this */
  109. *(struct aws_allocator **)(&str->allocator) = allocator;
  110. *(size_t *)(&str->len) = (size_t)bytes_size - 1;
  111. int converted_res =
  112. WideCharToMultiByte(CP_UTF8, 0, to_convert, len_chars, (char *)str->bytes, bytes_size, NULL, NULL);
  113. /* windows had its chance to do its thing, no take backsies. */
  114. AWS_FATAL_ASSERT(converted_res > 0);
  115. *(uint8_t *)&str->bytes[str->len] = 0;
  116. return str;
  117. }
  118. struct aws_string *aws_string_convert_from_wchar_str(
  119. struct aws_allocator *allocator,
  120. const struct aws_wstring *to_convert) {
  121. AWS_FATAL_PRECONDITION(to_convert);
  122. return s_convert_from_wchar(allocator, aws_wstring_c_str(to_convert), (int)aws_wstring_num_chars(to_convert));
  123. }
  124. struct aws_string *aws_string_convert_from_wchar_c_str(struct aws_allocator *allocator, const wchar_t *to_convert) {
  125. return s_convert_from_wchar(allocator, to_convert, -1);
  126. }
  127. const wchar_t *aws_wstring_c_str(const struct aws_wstring *str) {
  128. AWS_PRECONDITION(str);
  129. return str->bytes;
  130. }
  131. size_t aws_wstring_num_chars(const struct aws_wstring *str) {
  132. AWS_PRECONDITION(str);
  133. if (str->len == 0) {
  134. return 0;
  135. }
  136. return str->len;
  137. }
  138. size_t aws_wstring_size_bytes(const struct aws_wstring *str) {
  139. AWS_PRECONDITION(str);
  140. return aws_wstring_num_chars(str) * sizeof(wchar_t);
  141. }
  142. #endif /* _WIN32 */
  143. struct aws_string *aws_string_new_from_c_str(struct aws_allocator *allocator, const char *c_str) {
  144. AWS_PRECONDITION(allocator && c_str);
  145. return aws_string_new_from_array(allocator, (const uint8_t *)c_str, strlen(c_str));
  146. }
  147. struct aws_string *aws_string_new_from_array(struct aws_allocator *allocator, const uint8_t *bytes, size_t len) {
  148. AWS_PRECONDITION(allocator);
  149. AWS_PRECONDITION(AWS_MEM_IS_READABLE(bytes, len));
  150. size_t malloc_size;
  151. if (aws_add_size_checked(sizeof(struct aws_string) + 1, len, &malloc_size)) {
  152. return NULL;
  153. }
  154. struct aws_string *str = aws_mem_acquire(allocator, malloc_size);
  155. if (!str) {
  156. return NULL;
  157. }
  158. /* Fields are declared const, so we need to copy them in like this */
  159. *(struct aws_allocator **)(&str->allocator) = allocator;
  160. *(size_t *)(&str->len) = len;
  161. if (len > 0) {
  162. memcpy((void *)str->bytes, bytes, len);
  163. }
  164. *(uint8_t *)&str->bytes[len] = 0;
  165. AWS_RETURN_WITH_POSTCONDITION(str, aws_string_is_valid(str));
  166. }
  167. struct aws_string *aws_string_new_from_string(struct aws_allocator *allocator, const struct aws_string *str) {
  168. AWS_PRECONDITION(allocator && aws_string_is_valid(str));
  169. return aws_string_new_from_array(allocator, str->bytes, str->len);
  170. }
  171. struct aws_string *aws_string_new_from_cursor(struct aws_allocator *allocator, const struct aws_byte_cursor *cursor) {
  172. AWS_PRECONDITION(allocator && aws_byte_cursor_is_valid(cursor));
  173. return aws_string_new_from_array(allocator, cursor->ptr, cursor->len);
  174. }
  175. struct aws_string *aws_string_new_from_buf(struct aws_allocator *allocator, const struct aws_byte_buf *buf) {
  176. AWS_PRECONDITION(allocator && aws_byte_buf_is_valid(buf));
  177. return aws_string_new_from_array(allocator, buf->buffer, buf->len);
  178. }
  179. void aws_string_destroy(struct aws_string *str) {
  180. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  181. if (str && str->allocator) {
  182. aws_mem_release(str->allocator, str);
  183. }
  184. }
  185. void aws_string_destroy_secure(struct aws_string *str) {
  186. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  187. if (str) {
  188. aws_secure_zero((void *)aws_string_bytes(str), str->len);
  189. if (str->allocator) {
  190. aws_mem_release(str->allocator, str);
  191. }
  192. }
  193. }
  194. int aws_string_compare(const struct aws_string *a, const struct aws_string *b) {
  195. AWS_PRECONDITION(!a || aws_string_is_valid(a));
  196. AWS_PRECONDITION(!b || aws_string_is_valid(b));
  197. if (a == b) {
  198. return 0; /* strings identical */
  199. }
  200. if (a == NULL) {
  201. return -1;
  202. }
  203. if (b == NULL) {
  204. return 1;
  205. }
  206. size_t len_a = a->len;
  207. size_t len_b = b->len;
  208. size_t min_len = len_a < len_b ? len_a : len_b;
  209. int ret = memcmp(aws_string_bytes(a), aws_string_bytes(b), min_len);
  210. AWS_POSTCONDITION(aws_string_is_valid(a));
  211. AWS_POSTCONDITION(aws_string_is_valid(b));
  212. if (ret) {
  213. return ret; /* overlapping characters differ */
  214. }
  215. if (len_a == len_b) {
  216. return 0; /* strings identical */
  217. }
  218. if (len_a > len_b) {
  219. return 1; /* string b is first n characters of string a */
  220. }
  221. return -1; /* string a is first n characters of string b */
  222. }
  223. int aws_array_list_comparator_string(const void *a, const void *b) {
  224. if (a == b) {
  225. return 0; /* strings identical */
  226. }
  227. if (a == NULL) {
  228. return -1;
  229. }
  230. if (b == NULL) {
  231. return 1;
  232. }
  233. const struct aws_string *str_a = *(const struct aws_string **)a;
  234. const struct aws_string *str_b = *(const struct aws_string **)b;
  235. return aws_string_compare(str_a, str_b);
  236. }
  237. /**
  238. * Returns true if bytes of string are the same, false otherwise.
  239. */
  240. bool aws_string_eq(const struct aws_string *a, const struct aws_string *b) {
  241. AWS_PRECONDITION(!a || aws_string_is_valid(a));
  242. AWS_PRECONDITION(!b || aws_string_is_valid(b));
  243. if (a == b) {
  244. return true;
  245. }
  246. if (a == NULL || b == NULL) {
  247. return false;
  248. }
  249. return aws_array_eq(a->bytes, a->len, b->bytes, b->len);
  250. }
  251. /**
  252. * Returns true if bytes of string are equivalent, using a case-insensitive comparison.
  253. */
  254. bool aws_string_eq_ignore_case(const struct aws_string *a, const struct aws_string *b) {
  255. AWS_PRECONDITION(!a || aws_string_is_valid(a));
  256. AWS_PRECONDITION(!b || aws_string_is_valid(b));
  257. if (a == b) {
  258. return true;
  259. }
  260. if (a == NULL || b == NULL) {
  261. return false;
  262. }
  263. return aws_array_eq_ignore_case(a->bytes, a->len, b->bytes, b->len);
  264. }
  265. /**
  266. * Returns true if bytes of string and cursor are the same, false otherwise.
  267. */
  268. bool aws_string_eq_byte_cursor(const struct aws_string *str, const struct aws_byte_cursor *cur) {
  269. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  270. AWS_PRECONDITION(!cur || aws_byte_cursor_is_valid(cur));
  271. if (str == NULL && cur == NULL) {
  272. return true;
  273. }
  274. if (str == NULL || cur == NULL) {
  275. return false;
  276. }
  277. return aws_array_eq(str->bytes, str->len, cur->ptr, cur->len);
  278. }
  279. /**
  280. * Returns true if bytes of string and cursor are equivalent, using a case-insensitive comparison.
  281. */
  282. bool aws_string_eq_byte_cursor_ignore_case(const struct aws_string *str, const struct aws_byte_cursor *cur) {
  283. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  284. AWS_PRECONDITION(!cur || aws_byte_cursor_is_valid(cur));
  285. if (str == NULL && cur == NULL) {
  286. return true;
  287. }
  288. if (str == NULL || cur == NULL) {
  289. return false;
  290. }
  291. return aws_array_eq_ignore_case(str->bytes, str->len, cur->ptr, cur->len);
  292. }
  293. /**
  294. * Returns true if bytes of string and buffer are the same, false otherwise.
  295. */
  296. bool aws_string_eq_byte_buf(const struct aws_string *str, const struct aws_byte_buf *buf) {
  297. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  298. AWS_PRECONDITION(!buf || aws_byte_buf_is_valid(buf));
  299. if (str == NULL && buf == NULL) {
  300. return true;
  301. }
  302. if (str == NULL || buf == NULL) {
  303. return false;
  304. }
  305. return aws_array_eq(str->bytes, str->len, buf->buffer, buf->len);
  306. }
  307. /**
  308. * Returns true if bytes of string and buffer are equivalent, using a case-insensitive comparison.
  309. */
  310. bool aws_string_eq_byte_buf_ignore_case(const struct aws_string *str, const struct aws_byte_buf *buf) {
  311. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  312. AWS_PRECONDITION(!buf || aws_byte_buf_is_valid(buf));
  313. if (str == NULL && buf == NULL) {
  314. return true;
  315. }
  316. if (str == NULL || buf == NULL) {
  317. return false;
  318. }
  319. return aws_array_eq_ignore_case(str->bytes, str->len, buf->buffer, buf->len);
  320. }
  321. bool aws_string_eq_c_str(const struct aws_string *str, const char *c_str) {
  322. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  323. if (str == NULL && c_str == NULL) {
  324. return true;
  325. }
  326. if (str == NULL || c_str == NULL) {
  327. return false;
  328. }
  329. return aws_array_eq_c_str(str->bytes, str->len, c_str);
  330. }
  331. /**
  332. * Returns true if bytes of strings are equivalent, using a case-insensitive comparison.
  333. */
  334. bool aws_string_eq_c_str_ignore_case(const struct aws_string *str, const char *c_str) {
  335. AWS_PRECONDITION(!str || aws_string_is_valid(str));
  336. if (str == NULL && c_str == NULL) {
  337. return true;
  338. }
  339. if (str == NULL || c_str == NULL) {
  340. return false;
  341. }
  342. return aws_array_eq_c_str_ignore_case(str->bytes, str->len, c_str);
  343. }
  344. bool aws_byte_buf_write_from_whole_string(
  345. struct aws_byte_buf *AWS_RESTRICT buf,
  346. const struct aws_string *AWS_RESTRICT src) {
  347. AWS_PRECONDITION(!buf || aws_byte_buf_is_valid(buf));
  348. AWS_PRECONDITION(!src || aws_string_is_valid(src));
  349. if (buf == NULL || src == NULL) {
  350. return false;
  351. }
  352. return aws_byte_buf_write(buf, aws_string_bytes(src), src->len);
  353. }
  354. /**
  355. * Creates an aws_byte_cursor from an existing string.
  356. */
  357. struct aws_byte_cursor aws_byte_cursor_from_string(const struct aws_string *src) {
  358. AWS_PRECONDITION(aws_string_is_valid(src));
  359. return aws_byte_cursor_from_array(aws_string_bytes(src), src->len);
  360. }
  361. struct aws_string *aws_string_clone_or_reuse(struct aws_allocator *allocator, const struct aws_string *str) {
  362. AWS_PRECONDITION(allocator);
  363. AWS_PRECONDITION(aws_string_is_valid(str));
  364. if (str->allocator == NULL) {
  365. /* Since the string cannot be deallocated, we assume that it will remain valid for the lifetime of the
  366. * application */
  367. AWS_POSTCONDITION(aws_string_is_valid(str));
  368. return (struct aws_string *)str;
  369. }
  370. AWS_POSTCONDITION(aws_string_is_valid(str));
  371. return aws_string_new_from_string(allocator, str);
  372. }
  373. int aws_secure_strlen(const char *str, size_t max_read_len, size_t *str_len) {
  374. AWS_ERROR_PRECONDITION(str && str_len, AWS_ERROR_INVALID_ARGUMENT);
  375. /* why not strnlen? It doesn't work everywhere as it wasn't standardized til C11, and is considered
  376. * a GNU extension. This should be faster anyways. This should work for ascii and utf8.
  377. * Any other character sets in use deserve what they get. */
  378. char *null_char_ptr = memchr(str, '\0', max_read_len);
  379. if (null_char_ptr) {
  380. *str_len = null_char_ptr - str;
  381. return AWS_OP_SUCCESS;
  382. }
  383. return aws_raise_error(AWS_ERROR_C_STRING_BUFFER_NOT_NULL_TERMINATED);
  384. }