yajl_encode.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /*
  2. * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
  3. *
  4. * Permission to use, copy, modify, and/or distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include "yajl_encode.h"
  17. #include <assert.h>
  18. #include <stdlib.h>
  19. #include <string.h>
  20. #include <stdio.h>
  21. static void CharToHex(unsigned char c, char * hexBuf)
  22. {
  23. const char * hexchar = "0123456789ABCDEF";
  24. hexBuf[0] = hexchar[c >> 4];
  25. hexBuf[1] = hexchar[c & 0x0F];
  26. }
  27. void
  28. yajl_string_encode(const yajl_print_t print,
  29. void * ctx,
  30. const unsigned char * str,
  31. size_t len,
  32. int escape_solidus)
  33. {
  34. size_t beg = 0;
  35. size_t end = 0;
  36. char hexBuf[7];
  37. hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
  38. hexBuf[6] = 0;
  39. while (end < len) {
  40. const char * escaped = NULL;
  41. switch (str[end]) {
  42. case '\r': escaped = "\\r"; break;
  43. case '\n': escaped = "\\n"; break;
  44. case '\\': escaped = "\\\\"; break;
  45. /* it is not required to escape a solidus in JSON:
  46. * read sec. 2.5: http://www.ietf.org/rfc/rfc4627.txt
  47. * specifically, this production from the grammar:
  48. * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
  49. */
  50. case '/': if (escape_solidus) escaped = "\\/"; break;
  51. case '"': escaped = "\\\""; break;
  52. case '\f': escaped = "\\f"; break;
  53. case '\b': escaped = "\\b"; break;
  54. case '\t': escaped = "\\t"; break;
  55. default:
  56. if ((unsigned char) str[end] < 32) {
  57. CharToHex(str[end], hexBuf + 4);
  58. escaped = hexBuf;
  59. }
  60. break;
  61. }
  62. if (escaped != NULL) {
  63. print(ctx, (const char *) (str + beg), end - beg);
  64. print(ctx, escaped, (unsigned int)strlen(escaped));
  65. beg = ++end;
  66. } else {
  67. ++end;
  68. }
  69. }
  70. print(ctx, (const char *) (str + beg), end - beg);
  71. }
  72. static void hexToDigit(unsigned int * val, const unsigned char * hex)
  73. {
  74. unsigned int i;
  75. for (i=0;i<4;i++) {
  76. unsigned char c = hex[i];
  77. if (c >= 'A') c = (c & ~0x20) - 7;
  78. c -= '0';
  79. assert(!(c & 0xF0));
  80. *val = (*val << 4) | c;
  81. }
  82. }
  83. static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf)
  84. {
  85. if (codepoint < 0x80) {
  86. utf8Buf[0] = (char) codepoint;
  87. utf8Buf[1] = 0;
  88. } else if (codepoint < 0x0800) {
  89. utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0);
  90. utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80);
  91. utf8Buf[2] = 0;
  92. } else if (codepoint < 0x10000) {
  93. utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0);
  94. utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80);
  95. utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80);
  96. utf8Buf[3] = 0;
  97. } else if (codepoint < 0x200000) {
  98. utf8Buf[0] =(char)((codepoint >> 18) | 0xF0);
  99. utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80);
  100. utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80);
  101. utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80);
  102. utf8Buf[4] = 0;
  103. } else {
  104. utf8Buf[0] = '?';
  105. utf8Buf[1] = 0;
  106. }
  107. }
  108. void yajl_string_decode(yajl_buf buf, const unsigned char * str,
  109. size_t len)
  110. {
  111. size_t beg = 0;
  112. size_t end = 0;
  113. while (end < len) {
  114. if (str[end] == '\\') {
  115. char utf8Buf[5];
  116. const char * unescaped = "?";
  117. yajl_buf_append(buf, str + beg, end - beg);
  118. switch (str[++end]) {
  119. case 'r': unescaped = "\r"; break;
  120. case 'n': unescaped = "\n"; break;
  121. case '\\': unescaped = "\\"; break;
  122. case '/': unescaped = "/"; break;
  123. case '"': unescaped = "\""; break;
  124. case 'f': unescaped = "\f"; break;
  125. case 'b': unescaped = "\b"; break;
  126. case 't': unescaped = "\t"; break;
  127. case 'u': {
  128. unsigned int codepoint = 0;
  129. hexToDigit(&codepoint, str + ++end);
  130. end+=3;
  131. /* check if this is a surrogate */
  132. if ((codepoint & 0xFC00) == 0xD800) {
  133. end++;
  134. if (str[end] == '\\' && str[end + 1] == 'u') {
  135. unsigned int surrogate = 0;
  136. hexToDigit(&surrogate, str + end + 2);
  137. codepoint =
  138. (((codepoint & 0x3F) << 10) |
  139. ((((codepoint >> 6) & 0xF) + 1) << 16) |
  140. (surrogate & 0x3FF));
  141. end += 5;
  142. } else {
  143. unescaped = "?";
  144. break;
  145. }
  146. }
  147. Utf32toUtf8(codepoint, utf8Buf);
  148. unescaped = utf8Buf;
  149. if (codepoint == 0) {
  150. yajl_buf_append(buf, unescaped, 1);
  151. beg = ++end;
  152. continue;
  153. }
  154. break;
  155. }
  156. default:
  157. assert("this should never happen" == NULL);
  158. }
  159. yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped));
  160. beg = ++end;
  161. } else {
  162. end++;
  163. }
  164. }
  165. yajl_buf_append(buf, str + beg, end - beg);
  166. }
  167. #define ADV_PTR s++; if (!(len--)) return 0;
  168. int yajl_string_validate_utf8(const unsigned char * s, size_t len)
  169. {
  170. if (!len) return 1;
  171. if (!s) return 0;
  172. while (len--) {
  173. /* single byte */
  174. if (*s <= 0x7f) {
  175. /* noop */
  176. }
  177. /* two byte */
  178. else if ((*s >> 5) == 0x6) {
  179. ADV_PTR;
  180. if (!((*s >> 6) == 0x2)) return 0;
  181. }
  182. /* three byte */
  183. else if ((*s >> 4) == 0x0e) {
  184. ADV_PTR;
  185. if (!((*s >> 6) == 0x2)) return 0;
  186. ADV_PTR;
  187. if (!((*s >> 6) == 0x2)) return 0;
  188. }
  189. /* four byte */
  190. else if ((*s >> 3) == 0x1e) {
  191. ADV_PTR;
  192. if (!((*s >> 6) == 0x2)) return 0;
  193. ADV_PTR;
  194. if (!((*s >> 6) == 0x2)) return 0;
  195. ADV_PTR;
  196. if (!((*s >> 6) == 0x2)) return 0;
  197. } else {
  198. return 0;
  199. }
  200. s++;
  201. }
  202. return 1;
  203. }