strutil.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. #include <aws/http/private/strutil.h>
  6. static struct aws_byte_cursor s_trim(struct aws_byte_cursor cursor, const bool trim_table[256]) {
  7. /* trim leading whitespace */
  8. size_t i;
  9. for (i = 0; i < cursor.len; ++i) {
  10. const uint8_t c = cursor.ptr[i];
  11. if (!trim_table[c]) {
  12. break;
  13. }
  14. }
  15. cursor.ptr += i;
  16. cursor.len -= i;
  17. /* trim trailing whitespace */
  18. for (; cursor.len; --cursor.len) {
  19. const uint8_t c = cursor.ptr[cursor.len - 1];
  20. if (!trim_table[c]) {
  21. break;
  22. }
  23. }
  24. return cursor;
  25. }
  26. static const bool s_http_whitespace_table[256] = {
  27. [' '] = true,
  28. ['\t'] = true,
  29. };
  30. struct aws_byte_cursor aws_strutil_trim_http_whitespace(struct aws_byte_cursor cursor) {
  31. return s_trim(cursor, s_http_whitespace_table);
  32. }
  33. /* RFC7230 section 3.2.6:
  34. * token = 1*tchar
  35. * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
  36. * / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
  37. * / DIGIT / ALPHA
  38. */
  39. static const bool s_http_token_table[256] = {
  40. ['!'] = true, ['#'] = true, ['$'] = true, ['%'] = true, ['&'] = true, ['\''] = true, ['*'] = true, ['+'] = true,
  41. ['-'] = true, ['.'] = true, ['^'] = true, ['_'] = true, ['`'] = true, ['|'] = true, ['~'] = true,
  42. ['0'] = true, ['1'] = true, ['2'] = true, ['3'] = true, ['4'] = true, ['5'] = true, ['6'] = true, ['7'] = true,
  43. ['8'] = true, ['9'] = true,
  44. ['A'] = true, ['B'] = true, ['C'] = true, ['D'] = true, ['E'] = true, ['F'] = true, ['G'] = true, ['H'] = true,
  45. ['I'] = true, ['J'] = true, ['K'] = true, ['L'] = true, ['M'] = true, ['N'] = true, ['O'] = true, ['P'] = true,
  46. ['Q'] = true, ['R'] = true, ['S'] = true, ['T'] = true, ['U'] = true, ['V'] = true, ['W'] = true, ['X'] = true,
  47. ['Y'] = true, ['Z'] = true,
  48. ['a'] = true, ['b'] = true, ['c'] = true, ['d'] = true, ['e'] = true, ['f'] = true, ['g'] = true, ['h'] = true,
  49. ['i'] = true, ['j'] = true, ['k'] = true, ['l'] = true, ['m'] = true, ['n'] = true, ['o'] = true, ['p'] = true,
  50. ['q'] = true, ['r'] = true, ['s'] = true, ['t'] = true, ['u'] = true, ['v'] = true, ['w'] = true, ['x'] = true,
  51. ['y'] = true, ['z'] = true,
  52. };
  53. /* Same as above, but with uppercase characters removed */
  54. static const bool s_http_lowercase_token_table[256] = {
  55. ['!'] = true, ['#'] = true, ['$'] = true, ['%'] = true, ['&'] = true, ['\''] = true, ['*'] = true, ['+'] = true,
  56. ['-'] = true, ['.'] = true, ['^'] = true, ['_'] = true, ['`'] = true, ['|'] = true, ['~'] = true,
  57. ['0'] = true, ['1'] = true, ['2'] = true, ['3'] = true, ['4'] = true, ['5'] = true, ['6'] = true, ['7'] = true,
  58. ['8'] = true, ['9'] = true,
  59. ['a'] = true, ['b'] = true, ['c'] = true, ['d'] = true, ['e'] = true, ['f'] = true, ['g'] = true, ['h'] = true,
  60. ['i'] = true, ['j'] = true, ['k'] = true, ['l'] = true, ['m'] = true, ['n'] = true, ['o'] = true, ['p'] = true,
  61. ['q'] = true, ['r'] = true, ['s'] = true, ['t'] = true, ['u'] = true, ['v'] = true, ['w'] = true, ['x'] = true,
  62. ['y'] = true, ['z'] = true,
  63. };
  64. static bool s_is_token(struct aws_byte_cursor token, const bool token_table[256]) {
  65. if (token.len == 0) {
  66. return false;
  67. }
  68. for (size_t i = 0; i < token.len; ++i) {
  69. const uint8_t c = token.ptr[i];
  70. if (token_table[c] == false) {
  71. return false;
  72. }
  73. }
  74. return true;
  75. }
  76. bool aws_strutil_is_http_token(struct aws_byte_cursor token) {
  77. return s_is_token(token, s_http_token_table);
  78. }
  79. bool aws_strutil_is_lowercase_http_token(struct aws_byte_cursor token) {
  80. return s_is_token(token, s_http_lowercase_token_table);
  81. }
  82. /* clang-format off */
  83. /**
  84. * Table with true for all octets allowed in field-content,
  85. * as defined in RFC7230 section 3.2 and 3.2.6 and RFC5234 appendix-B.1:
  86. *
  87. * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  88. * field-vchar = VCHAR / obs-text
  89. * VCHAR = %x21-7E ; visible (printing) characters
  90. * obs-text = %x80-FF
  91. */
  92. static const bool s_http_field_content_table[256] = {
  93. /* clang-format off */
  94. /* whitespace */
  95. ['\t'] = true, [' '] = true,
  96. /* VCHAR = 0x21-7E */
  97. [0x21] = true, [0x22] = true, [0x23] = true, [0x24] = true, [0x25] = true, [0x26] = true, [0x27] = true,
  98. [0x28] = true, [0x29] = true, [0x2A] = true, [0x2B] = true, [0x2C] = true, [0x2D] = true, [0x2E] = true,
  99. [0x2F] = true, [0x30] = true, [0x31] = true, [0x32] = true, [0x33] = true, [0x34] = true, [0x35] = true,
  100. [0x36] = true, [0x37] = true, [0x38] = true, [0x39] = true, [0x3A] = true, [0x3B] = true, [0x3C] = true,
  101. [0x3D] = true, [0x3E] = true, [0x3F] = true, [0x40] = true, [0x41] = true, [0x42] = true, [0x43] = true,
  102. [0x44] = true, [0x45] = true, [0x46] = true, [0x47] = true, [0x48] = true, [0x49] = true, [0x4A] = true,
  103. [0x4B] = true, [0x4C] = true, [0x4D] = true, [0x4E] = true, [0x4F] = true, [0x50] = true, [0x51] = true,
  104. [0x52] = true, [0x53] = true, [0x54] = true, [0x55] = true, [0x56] = true, [0x57] = true, [0x58] = true,
  105. [0x59] = true, [0x5A] = true, [0x5B] = true, [0x5C] = true, [0x5D] = true, [0x5E] = true, [0x5F] = true,
  106. [0x60] = true, [0x61] = true, [0x62] = true, [0x63] = true, [0x64] = true, [0x65] = true, [0x66] = true,
  107. [0x67] = true, [0x68] = true, [0x69] = true, [0x6A] = true, [0x6B] = true, [0x6C] = true, [0x6D] = true,
  108. [0x6E] = true, [0x6F] = true, [0x70] = true, [0x71] = true, [0x72] = true, [0x73] = true, [0x74] = true,
  109. [0x75] = true, [0x76] = true, [0x77] = true, [0x78] = true, [0x79] = true, [0x7A] = true, [0x7B] = true,
  110. [0x7C] = true, [0x7D] = true, [0x7E] = true,
  111. /* obs-text = %x80-FF */
  112. [0x80] = true, [0x81] = true, [0x82] = true, [0x83] = true, [0x84] = true, [0x85] = true, [0x86] = true,
  113. [0x87] = true, [0x88] = true, [0x89] = true, [0x8A] = true, [0x8B] = true, [0x8C] = true, [0x8D] = true,
  114. [0x8E] = true, [0x8F] = true, [0x90] = true, [0x91] = true, [0x92] = true, [0x93] = true, [0x94] = true,
  115. [0x95] = true, [0x96] = true, [0x97] = true, [0x98] = true, [0x99] = true, [0x9A] = true, [0x9B] = true,
  116. [0x9C] = true, [0x9D] = true, [0x9E] = true, [0x9F] = true, [0xA0] = true, [0xA1] = true, [0xA2] = true,
  117. [0xA3] = true, [0xA4] = true, [0xA5] = true, [0xA6] = true, [0xA7] = true, [0xA8] = true, [0xA9] = true,
  118. [0xAA] = true, [0xAB] = true, [0xAC] = true, [0xAD] = true, [0xAE] = true, [0xAF] = true, [0xB0] = true,
  119. [0xB1] = true, [0xB2] = true, [0xB3] = true, [0xB4] = true, [0xB5] = true, [0xB6] = true, [0xB7] = true,
  120. [0xB8] = true, [0xB9] = true, [0xBA] = true, [0xBB] = true, [0xBC] = true, [0xBD] = true, [0xBE] = true,
  121. [0xBF] = true, [0xC0] = true, [0xC1] = true, [0xC2] = true, [0xC3] = true, [0xC4] = true, [0xC5] = true,
  122. [0xC6] = true, [0xC7] = true, [0xC8] = true, [0xC9] = true, [0xCA] = true, [0xCB] = true, [0xCC] = true,
  123. [0xCD] = true, [0xCE] = true, [0xCF] = true, [0xD0] = true, [0xD1] = true, [0xD2] = true, [0xD3] = true,
  124. [0xD4] = true, [0xD5] = true, [0xD6] = true, [0xD7] = true, [0xD8] = true, [0xD9] = true, [0xDA] = true,
  125. [0xDB] = true, [0xDC] = true, [0xDD] = true, [0xDE] = true, [0xDF] = true, [0xE0] = true, [0xE1] = true,
  126. [0xE2] = true, [0xE3] = true, [0xE4] = true, [0xE5] = true, [0xE6] = true, [0xE7] = true, [0xE8] = true,
  127. [0xE9] = true, [0xEA] = true, [0xEB] = true, [0xEC] = true, [0xED] = true, [0xEE] = true, [0xEF] = true,
  128. [0xF0] = true, [0xF1] = true, [0xF2] = true, [0xF3] = true, [0xF4] = true, [0xF5] = true, [0xF6] = true,
  129. [0xF7] = true, [0xF8] = true, [0xF9] = true, [0xFA] = true, [0xFB] = true, [0xFC] = true, [0xFD] = true,
  130. [0xFE] = true, [0xFF] = true,
  131. /* clang-format on */
  132. };
  133. /**
  134. * From RFC7230 section 3.2:
  135. * field-value = *( field-content / obs-fold )
  136. * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  137. *
  138. * But we're forbidding obs-fold
  139. */
  140. bool aws_strutil_is_http_field_value(struct aws_byte_cursor cursor) {
  141. if (cursor.len == 0) {
  142. return true;
  143. }
  144. /* first and last char cannot be whitespace */
  145. const uint8_t first_c = cursor.ptr[0];
  146. const uint8_t last_c = cursor.ptr[cursor.len - 1];
  147. if (s_http_whitespace_table[first_c] || s_http_whitespace_table[last_c]) {
  148. return false;
  149. }
  150. /* ensure every char is legal field-content */
  151. size_t i = 0;
  152. do {
  153. const uint8_t c = cursor.ptr[i++];
  154. if (s_http_field_content_table[c] == false) {
  155. return false;
  156. }
  157. } while (i < cursor.len);
  158. return true;
  159. }
  160. /**
  161. * From RFC7230 section 3.1.2:
  162. * reason-phrase = *( HTAB / SP / VCHAR / obs-text )
  163. * VCHAR = %x21-7E ; visible (printing) characters
  164. * obs-text = %x80-FF
  165. */
  166. bool aws_strutil_is_http_reason_phrase(struct aws_byte_cursor cursor) {
  167. for (size_t i = 0; i < cursor.len; ++i) {
  168. const uint8_t c = cursor.ptr[i];
  169. /* the field-content table happens to allow the exact same characters as reason-phrase */
  170. if (s_http_field_content_table[c] == false) {
  171. return false;
  172. }
  173. }
  174. return true;
  175. }
  176. bool aws_strutil_is_http_request_target(struct aws_byte_cursor cursor) {
  177. if (cursor.len == 0) {
  178. return false;
  179. }
  180. /* TODO: Actually check the complete grammar as defined in RFC7230 5.3 and
  181. * RFC3986. Currently this just checks whether the sequence is blatantly illegal */
  182. size_t i = 0;
  183. do {
  184. const uint8_t c = cursor.ptr[i++];
  185. /* everything <= ' ' is non-visible ascii*/
  186. if (c <= ' ') {
  187. return false;
  188. }
  189. } while (i < cursor.len);
  190. return true;
  191. }
  192. bool aws_strutil_is_http_pseudo_header_name(struct aws_byte_cursor cursor) {
  193. if (cursor.len == 0) {
  194. return false;
  195. }
  196. const uint8_t c = cursor.ptr[0];
  197. if (c != ':') {
  198. /* short cut */
  199. return false;
  200. }
  201. return true;
  202. }