123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- /*
- * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #include "yajl_encode.h"
- #include <assert.h>
- #include <stdlib.h>
- #include <string.h>
- #include <stdio.h>
- static void CharToHex(unsigned char c, char * hexBuf)
- {
- const char * hexchar = "0123456789ABCDEF";
- hexBuf[0] = hexchar[c >> 4];
- hexBuf[1] = hexchar[c & 0x0F];
- }
- void
- yajl_string_encode(const yajl_print_t print,
- void * ctx,
- const unsigned char * str,
- size_t len,
- int escape_solidus)
- {
- size_t beg = 0;
- size_t end = 0;
- char hexBuf[7];
- hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
- hexBuf[6] = 0;
- while (end < len) {
- const char * escaped = NULL;
- switch (str[end]) {
- case '\r': escaped = "\\r"; break;
- case '\n': escaped = "\\n"; break;
- case '\\': escaped = "\\\\"; break;
- /* it is not required to escape a solidus in JSON:
- * read sec. 2.5: http://www.ietf.org/rfc/rfc4627.txt
- * specifically, this production from the grammar:
- * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
- */
- case '/': if (escape_solidus) escaped = "\\/"; break;
- case '"': escaped = "\\\""; break;
- case '\f': escaped = "\\f"; break;
- case '\b': escaped = "\\b"; break;
- case '\t': escaped = "\\t"; break;
- default:
- if ((unsigned char) str[end] < 32) {
- CharToHex(str[end], hexBuf + 4);
- escaped = hexBuf;
- }
- break;
- }
- if (escaped != NULL) {
- print(ctx, (const char *) (str + beg), end - beg);
- print(ctx, escaped, (unsigned int)strlen(escaped));
- beg = ++end;
- } else {
- ++end;
- }
- }
- print(ctx, (const char *) (str + beg), end - beg);
- }
- static void hexToDigit(unsigned int * val, const unsigned char * hex)
- {
- unsigned int i;
- for (i=0;i<4;i++) {
- unsigned char c = hex[i];
- if (c >= 'A') c = (c & ~0x20) - 7;
- c -= '0';
- assert(!(c & 0xF0));
- *val = (*val << 4) | c;
- }
- }
- static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf)
- {
- if (codepoint < 0x80) {
- utf8Buf[0] = (char) codepoint;
- utf8Buf[1] = 0;
- } else if (codepoint < 0x0800) {
- utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0);
- utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80);
- utf8Buf[2] = 0;
- } else if (codepoint < 0x10000) {
- utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0);
- utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80);
- utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80);
- utf8Buf[3] = 0;
- } else if (codepoint < 0x200000) {
- utf8Buf[0] =(char)((codepoint >> 18) | 0xF0);
- utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80);
- utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80);
- utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80);
- utf8Buf[4] = 0;
- } else {
- utf8Buf[0] = '?';
- utf8Buf[1] = 0;
- }
- }
- void yajl_string_decode(yajl_buf buf, const unsigned char * str,
- size_t len)
- {
- size_t beg = 0;
- size_t end = 0;
- while (end < len) {
- if (str[end] == '\\') {
- char utf8Buf[5];
- const char * unescaped = "?";
- yajl_buf_append(buf, str + beg, end - beg);
- switch (str[++end]) {
- case 'r': unescaped = "\r"; break;
- case 'n': unescaped = "\n"; break;
- case '\\': unescaped = "\\"; break;
- case '/': unescaped = "/"; break;
- case '"': unescaped = "\""; break;
- case 'f': unescaped = "\f"; break;
- case 'b': unescaped = "\b"; break;
- case 't': unescaped = "\t"; break;
- case 'u': {
- unsigned int codepoint = 0;
- hexToDigit(&codepoint, str + ++end);
- end+=3;
- /* check if this is a surrogate */
- if ((codepoint & 0xFC00) == 0xD800) {
- end++;
- if (str[end] == '\\' && str[end + 1] == 'u') {
- unsigned int surrogate = 0;
- hexToDigit(&surrogate, str + end + 2);
- codepoint =
- (((codepoint & 0x3F) << 10) |
- ((((codepoint >> 6) & 0xF) + 1) << 16) |
- (surrogate & 0x3FF));
- end += 5;
- } else {
- unescaped = "?";
- break;
- }
- }
-
- Utf32toUtf8(codepoint, utf8Buf);
- unescaped = utf8Buf;
- if (codepoint == 0) {
- yajl_buf_append(buf, unescaped, 1);
- beg = ++end;
- continue;
- }
- break;
- }
- default:
- assert("this should never happen" == NULL);
- }
- yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped));
- beg = ++end;
- } else {
- end++;
- }
- }
- yajl_buf_append(buf, str + beg, end - beg);
- }
- #define ADV_PTR s++; if (!(len--)) return 0;
- int yajl_string_validate_utf8(const unsigned char * s, size_t len)
- {
- if (!len) return 1;
- if (!s) return 0;
-
- while (len--) {
- /* single byte */
- if (*s <= 0x7f) {
- /* noop */
- }
- /* two byte */
- else if ((*s >> 5) == 0x6) {
- ADV_PTR;
- if (!((*s >> 6) == 0x2)) return 0;
- }
- /* three byte */
- else if ((*s >> 4) == 0x0e) {
- ADV_PTR;
- if (!((*s >> 6) == 0x2)) return 0;
- ADV_PTR;
- if (!((*s >> 6) == 0x2)) return 0;
- }
- /* four byte */
- else if ((*s >> 3) == 0x1e) {
- ADV_PTR;
- if (!((*s >> 6) == 0x2)) return 0;
- ADV_PTR;
- if (!((*s >> 6) == 0x2)) return 0;
- ADV_PTR;
- if (!((*s >> 6) == 0x2)) return 0;
- } else {
- return 0;
- }
-
- s++;
- }
-
- return 1;
- }
|