ucx/json.c

changeset 854
1c8401ece69e
parent 852
83fdf679df99
equal deleted inserted replaced
853:2ad93ebdc8d9 854:1c8401ece69e
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29 #include "cx/json.h" 29 #include "cx/json.h"
30 #include "cx/compare.h"
31 30
32 #include <string.h> 31 #include <string.h>
33 #include <ctype.h>
34 #include <assert.h> 32 #include <assert.h>
35 #include <stdio.h> 33 #include <stdio.h>
36 #include <errno.h>
37 #include <inttypes.h> 34 #include <inttypes.h>
38 35
39 /* 36 /*
40 * RFC 8259 37 * RFC 8259
41 * https://tools.ietf.org/html/rfc8259 38 * https://tools.ietf.org/html/rfc8259
133 if (token->allocated) { 130 if (token->allocated) {
134 cx_strfree(&token->content); 131 cx_strfree(&token->content);
135 } 132 }
136 } 133 }
137 134
135 static bool json_isdigit(char c) {
136 // TODO: remove once UCX has public API for this
137 return c >= '0' && c <= '9';
138 }
139
140 static bool json_isspace(char c) {
141 // TODO: remove once UCX has public API for this
142 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f';
143 }
144
138 static int num_isexp(const char *content, size_t length, size_t pos) { 145 static int num_isexp(const char *content, size_t length, size_t pos) {
139 if (pos >= length) { 146 if (pos >= length) {
140 return 0; 147 return 0;
141 } 148 }
142 149
143 int ok = 0; 150 int ok = 0;
144 for (size_t i = pos; i < length; i++) { 151 for (size_t i = pos; i < length; i++) {
145 char c = content[i]; 152 char c = content[i];
146 if (isdigit(c)) { 153 if (json_isdigit(c)) {
147 ok = 1; 154 ok = 1;
148 } else if (i == pos) { 155 } else if (i == pos) {
149 if (!(c == '+' || c == '-')) { 156 if (!(c == '+' || c == '-')) {
150 return 0; 157 return 0;
151 } 158 }
158 } 165 }
159 166
160 static CxJsonTokenType token_numbertype(const char *content, size_t length) { 167 static CxJsonTokenType token_numbertype(const char *content, size_t length) {
161 if (length == 0) return CX_JSON_TOKEN_ERROR; 168 if (length == 0) return CX_JSON_TOKEN_ERROR;
162 169
163 if (content[0] != '-' && !isdigit(content[0])) { 170 if (content[0] != '-' && !json_isdigit(content[0])) {
164 return CX_JSON_TOKEN_ERROR; 171 return CX_JSON_TOKEN_ERROR;
165 } 172 }
166 173
167 CxJsonTokenType type = CX_JSON_TOKEN_INTEGER; 174 CxJsonTokenType type = CX_JSON_TOKEN_INTEGER;
168 for (size_t i = 1; i < length; i++) { 175 for (size_t i = 1; i < length; i++) {
171 return CX_JSON_TOKEN_ERROR; // more than one decimal separator 178 return CX_JSON_TOKEN_ERROR; // more than one decimal separator
172 } 179 }
173 type = CX_JSON_TOKEN_NUMBER; 180 type = CX_JSON_TOKEN_NUMBER;
174 } else if (content[i] == 'e' || content[i] == 'E') { 181 } else if (content[i] == 'e' || content[i] == 'E') {
175 return num_isexp(content, length, i + 1) ? CX_JSON_TOKEN_NUMBER : CX_JSON_TOKEN_ERROR; 182 return num_isexp(content, length, i + 1) ? CX_JSON_TOKEN_NUMBER : CX_JSON_TOKEN_ERROR;
176 } else if (!isdigit(content[i])) { 183 } else if (!json_isdigit(content[i])) {
177 return CX_JSON_TOKEN_ERROR; // char is not a digit, decimal separator or exponent sep 184 return CX_JSON_TOKEN_ERROR; // char is not a digit, decimal separator or exponent sep
178 } 185 }
179 } 186 }
180 187
181 return type; 188 return type;
235 } 242 }
236 case '"': { 243 case '"': {
237 return CX_JSON_TOKEN_STRING; 244 return CX_JSON_TOKEN_STRING;
238 } 245 }
239 default: { 246 default: {
240 if (isspace(c)) { 247 if (json_isspace(c)) {
241 return CX_JSON_TOKEN_SPACE; 248 return CX_JSON_TOKEN_SPACE;
242 } 249 }
243 } 250 }
244 } 251 }
245 return CX_JSON_NO_TOKEN; 252 return CX_JSON_NO_TOKEN;
252 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; 259 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA;
253 } 260 }
254 261
255 // current token type and start index 262 // current token type and start index
256 CxJsonTokenType ttype = json->uncompleted.tokentype; 263 CxJsonTokenType ttype = json->uncompleted.tokentype;
257 size_t token_start = json->buffer.pos; 264 size_t token_part_start = json->buffer.pos;
265
266 bool escape_end_of_string = ttype == CX_JSON_TOKEN_STRING
267 && json->uncompleted.content.ptr[json->uncompleted.content.length-1] == '\\';
258 268
259 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { 269 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) {
260 char c = json->buffer.space[i]; 270 char c = json->buffer.space[i];
261 if (ttype != CX_JSON_TOKEN_STRING) { 271 if (ttype != CX_JSON_TOKEN_STRING) {
262 // currently non-string token 272 // currently non-string token
266 json->buffer.pos++; 276 json->buffer.pos++;
267 continue; 277 continue;
268 } else if (ctype == CX_JSON_TOKEN_STRING) { 278 } else if (ctype == CX_JSON_TOKEN_STRING) {
269 // begin string 279 // begin string
270 ttype = CX_JSON_TOKEN_STRING; 280 ttype = CX_JSON_TOKEN_STRING;
271 token_start = i; 281 token_part_start = i;
272 } else if (ctype != CX_JSON_NO_TOKEN) { 282 } else if (ctype != CX_JSON_NO_TOKEN) {
273 // single-char token 283 // single-char token
274 json->buffer.pos = i + 1; 284 json->buffer.pos = i + 1;
275 *result = (CxJsonToken){ctype, false, {NULL, 0}}; 285 *result = (CxJsonToken){ctype, false, {NULL, 0}};
276 return CX_JSON_NO_ERROR; 286 return CX_JSON_NO_ERROR;
277 } else { 287 } else {
278 ttype = CX_JSON_TOKEN_LITERAL; // number or literal 288 ttype = CX_JSON_TOKEN_LITERAL; // number or literal
279 token_start = i; 289 token_part_start = i;
280 } 290 }
281 } else { 291 } else {
282 // finish token 292 // finish token
283 if (ctype != CX_JSON_NO_TOKEN) { 293 if (ctype != CX_JSON_NO_TOKEN) {
284 *result = token_create(json, false, token_start, i); 294 *result = token_create(json, false, token_part_start, i);
285 if (result->tokentype == CX_JSON_NO_TOKEN) { 295 if (result->tokentype == CX_JSON_NO_TOKEN) {
286 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 296 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
287 } 297 }
288 if (result->tokentype == CX_JSON_TOKEN_ERROR) { 298 if (result->tokentype == CX_JSON_TOKEN_ERROR) {
289 return CX_JSON_FORMAT_ERROR_NUMBER; 299 return CX_JSON_FORMAT_ERROR_NUMBER;
292 return CX_JSON_NO_ERROR; 302 return CX_JSON_NO_ERROR;
293 } 303 }
294 } 304 }
295 } else { 305 } else {
296 // currently inside a string 306 // currently inside a string
297 if (json->tokenizer_escape) { 307 if (escape_end_of_string) {
298 json->tokenizer_escape = false; 308 escape_end_of_string = false;
299 } else { 309 } else {
300 if (c == '"') { 310 if (c == '"') {
301 *result = token_create(json, true, token_start, i + 1); 311 *result = token_create(json, true, token_part_start, i + 1);
302 if (result->tokentype == CX_JSON_NO_TOKEN) { 312 if (result->tokentype == CX_JSON_NO_TOKEN) {
303 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 313 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
304 } 314 }
305 json->buffer.pos = i + 1; 315 json->buffer.pos = i + 1;
306 return CX_JSON_NO_ERROR; 316 return CX_JSON_NO_ERROR;
307 } else if (c == '\\') { 317 } else if (c == '\\') {
308 json->tokenizer_escape = true; 318 escape_end_of_string = true;
309 } 319 }
310 } 320 }
311 } 321 }
312 } 322 }
313 323
314 if (ttype != CX_JSON_NO_TOKEN) { 324 if (ttype != CX_JSON_NO_TOKEN) {
315 // uncompleted token 325 // uncompleted token
316 size_t uncompleted_len = json->buffer.size - token_start; 326 size_t uncompleted_len = json->buffer.size - token_part_start;
317 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { 327 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
318 // current token is uncompleted 328 // current token is uncompleted
319 // save current token content 329 // save current token content
320 CxJsonToken uncompleted = { 330 CxJsonToken uncompleted = {
321 ttype, true, 331 ttype, true,
322 cx_strdup(cx_strn(json->buffer.space + token_start, uncompleted_len)) 332 cx_strdup(cx_strn(json->buffer.space + token_part_start, uncompleted_len))
323 }; 333 };
324 if (uncompleted.content.ptr == NULL) { 334 if (uncompleted.content.ptr == NULL) {
325 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 335 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
326 } 336 }
327 json->uncompleted = uncompleted; 337 json->uncompleted = uncompleted;
328 } else { 338 } else {
329 // previously we also had an uncompleted token 339 // previously we also had an uncompleted token
330 // combine the uncompleted token with the current token 340 // combine the uncompleted token with the current token
331 assert(json->uncompleted.allocated); 341 assert(json->uncompleted.allocated);
332 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, 342 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1,
333 cx_strn(json->buffer.space + token_start, uncompleted_len)); 343 cx_strn(json->buffer.space + token_part_start, uncompleted_len));
334 if (str.ptr == NULL) { 344 if (str.ptr == NULL) {
335 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 345 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
336 } 346 }
337 json->uncompleted.content = str; 347 json->uncompleted.content = str;
338 } 348 }
341 } 351 }
342 352
343 return CX_JSON_INCOMPLETE_DATA; 353 return CX_JSON_INCOMPLETE_DATA;
344 } 354 }
345 355
356 // converts a Unicode codepoint to utf8
357 static unsigned codepoint_to_utf8(uint32_t codepoint, char *output_buf) {
358 if (codepoint <= 0x7F) {
359 *output_buf = (char)codepoint;
360 return 1;
361 } else if (codepoint <= 0x7FF) {
362 output_buf[0] = (char)(0xC0 | ((codepoint >> 6) & 0x1F));
363 output_buf[1] = (char)(0x80 | (codepoint & 0x3F));
364 return 2;
365 } else if (codepoint <= 0xFFFF) {
366 output_buf[0] = (char)(0xE0 | ((codepoint >> 12) & 0x0F));
367 output_buf[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
368 output_buf[2] = (char)(0x80 | (codepoint & 0x3F));
369 return 3;
370 } else if (codepoint <= 0x10FFFF) {
371 output_buf[0] = (char)(0xF0 | ((codepoint >> 18) & 0x07));
372 output_buf[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F));
373 output_buf[2] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
374 output_buf[3] = (char)(0x80 | (codepoint & 0x3F));
375 return 4;
376 }
377
378 return 0; // LCOV_EXCL_LINE
379 }
380
381 // converts a utf16 surrogate pair to utf8
382 static inline uint32_t utf16pair_to_codepoint(uint16_t c0, uint16_t c1) {
383 return ((c0 - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000;
384 }
385
386 static unsigned unescape_unicode_string(cxstring str, char *utf8buf) {
387 // str is supposed to start with "\uXXXX" or "\uXXXX\uXXXX"
388 // remaining bytes in the string are ignored (str may be larger!)
389
390 if (str.length < 6 || str.ptr[0] != '\\' || str.ptr[1] != 'u') {
391 return 0;
392 }
393
394 unsigned utf8len = 0;
395 cxstring ustr1 = { str.ptr + 2, 4};
396 uint16_t utf16a, utf16b;
397 if (!cx_strtou16_lc(ustr1, &utf16a, 16, "")) {
398 uint32_t codepoint;
399 if (utf16a < 0xD800 || utf16a > 0xE000) {
400 // character is in the Basic Multilingual Plane
401 // and encoded as a single utf16 char
402 codepoint = utf16a;
403 utf8len = codepoint_to_utf8(codepoint, utf8buf);
404 } else if (utf16a >= 0xD800 && utf16a <= 0xDBFF) {
405 // character is encoded as a surrogate pair
406 // get next 6 bytes
407 if (str.length >= 12) {
408 if (str.ptr[6] == '\\' && str.ptr[7] == 'u') {
409 cxstring ustr2 = { str.ptr+8, 4 };
410 if (!cx_strtou16_lc(ustr2, &utf16b, 16, "")
411 && utf16b >= 0xDC00 && utf16b <= 0xDFFF) {
412 codepoint = utf16pair_to_codepoint(utf16a, utf16b);
413 utf8len = codepoint_to_utf8(codepoint, utf8buf);
414 }
415 }
416 }
417 }
418 }
419 return utf8len;
420 }
421
346 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { 422 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) {
347 // TODO: support more escape sequences 423 // note: this function expects that str contains the enclosing quotes!
348 // we know that the unescaped string will be shorter by at least 2 chars 424
349 cxmutstr result; 425 cxmutstr result;
350 result.length = 0; 426 result.length = 0;
351 result.ptr = cxMalloc(a, str.length - 1); 427 result.ptr = cxMalloc(a, str.length - 1);
352 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE 428 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE
353 429
356 char c = str.ptr[i]; 432 char c = str.ptr[i];
357 if (u) { 433 if (u) {
358 u = false; 434 u = false;
359 if (c == 'n') { 435 if (c == 'n') {
360 c = '\n'; 436 c = '\n';
437 } else if (c == '"') {
438 c = '"';
361 } else if (c == 't') { 439 } else if (c == 't') {
362 c = '\t'; 440 c = '\t';
363 } 441 } else if (c == 'r') {
442 c = '\r';
443 } else if (c == '\\') {
444 c = '\\';
445 } else if (c == '/') {
446 c = '/'; // always unescape, we don't need settings here
447 } else if (c == 'f') {
448 c = '\f';
449 } else if (c == 'b') {
450 c = '\b';
451 } else if (c == 'u') {
452 char utf8buf[4];
453 unsigned utf8len = unescape_unicode_string(
454 cx_strn(str.ptr + i - 1, str.length + 1 - i),
455 utf8buf
456 );
457 if(utf8len > 0) {
458 i += utf8len < 4 ? 4 : 10;
459 // add all bytes from utf8buf except the last char
460 // to the result (last char will be added below)
461 utf8len--;
462 c = utf8buf[utf8len];
463 for (unsigned x = 0; x < utf8len; x++) {
464 result.ptr[result.length++] = utf8buf[x];
465 }
466 } else {
467 // decoding failed, ignore the entire sequence
468 result.ptr[result.length++] = '\\';
469 }
470 } else {
471 // TODO: discuss the behavior for unrecognized escape sequences
472 // most parsers throw an error here - we just ignore it
473 result.ptr[result.length++] = '\\';
474 }
475
364 result.ptr[result.length++] = c; 476 result.ptr[result.length++] = c;
365 } else { 477 } else {
366 if (c == '\\') { 478 if (c == '\\') {
367 u = true; 479 u = true;
368 } else { 480 } else {
373 result.ptr[result.length] = 0; 485 result.ptr[result.length] = 0;
374 486
375 return result; 487 return result;
376 } 488 }
377 489
378 static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) { 490 static cxmutstr escape_string(cxmutstr str, bool escape_slash) {
491 // note: this function produces the string without enclosing quotes
492 // the reason is that we don't want to allocate memory just for that
493 CxBuffer buf = {0};
494
495 bool all_printable = true;
496 for (size_t i = 0; i < str.length; i++) {
497 unsigned char c = str.ptr[i];
498 bool escape = c < 0x20 || c == '\\' || c == '"'
499 || (escape_slash && c == '/');
500
501 if (all_printable && escape) {
502 size_t capa = str.length + 32;
503 char *space = malloc(capa);
504 if (space == NULL) return cx_mutstrn(NULL, 0);
505 cxBufferInit(&buf, space, capa, NULL, CX_BUFFER_AUTO_EXTEND);
506 cxBufferWrite(str.ptr, 1, i, &buf);
507 all_printable = false;
508 }
509 if (escape) {
510 cxBufferPut(&buf, '\\');
511 if (c == '\"') {
512 cxBufferPut(&buf, '\"');
513 } else if (c == '\n') {
514 cxBufferPut(&buf, 'n');
515 } else if (c == '\t') {
516 cxBufferPut(&buf, 't');
517 } else if (c == '\r') {
518 cxBufferPut(&buf, 'r');
519 } else if (c == '\\') {
520 cxBufferPut(&buf, '\\');
521 } else if (c == '/') {
522 cxBufferPut(&buf, '/');
523 } else if (c == '\f') {
524 cxBufferPut(&buf, 'f');
525 } else if (c == '\b') {
526 cxBufferPut(&buf, 'b');
527 } else {
528 char code[6];
529 snprintf(code, sizeof(code), "u%04x", (unsigned int) c);
530 cxBufferPutString(&buf, code);
531 }
532 } else if (!all_printable) {
533 cxBufferPut(&buf, c);
534 }
535 }
536 if (!all_printable) {
537 str = cx_mutstrn(buf.space, buf.size);
538 }
539 cxBufferDestroy(&buf);
540 return str;
541 }
542
543 static CxJsonValue* json_create_value(CxJson *json, CxJsonValueType type) {
379 CxJsonValue *v = cxCalloc(json->allocator, 1, sizeof(CxJsonValue)); 544 CxJsonValue *v = cxCalloc(json->allocator, 1, sizeof(CxJsonValue));
380 if (v == NULL) return NULL; // LCOV_EXCL_LINE 545 if (v == NULL) return NULL; // LCOV_EXCL_LINE
381 546
382 // initialize the value 547 // initialize the value
383 v->type = type; 548 v->type = type;
539 if (state < 3) { 704 if (state < 3) {
540 // push expected end state to the stack 705 // push expected end state to the stack
541 json_add_state(json, 10 + state); 706 json_add_state(json, 10 + state);
542 switch (token.tokentype) { 707 switch (token.tokentype) {
543 case CX_JSON_TOKEN_BEGIN_ARRAY: { 708 case CX_JSON_TOKEN_BEGIN_ARRAY: {
544 if (create_json_value(json, CX_JSON_ARRAY) == NULL) { 709 if (json_create_value(json, CX_JSON_ARRAY) == NULL) {
545 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE 710 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
546 } 711 }
547 json_add_state(json, JP_STATE_VALUE_BEGIN_AR); 712 json_add_state(json, JP_STATE_VALUE_BEGIN_AR);
548 return_rec(CX_JSON_NO_ERROR); 713 return_rec(CX_JSON_NO_ERROR);
549 } 714 }
550 case CX_JSON_TOKEN_BEGIN_OBJECT: { 715 case CX_JSON_TOKEN_BEGIN_OBJECT: {
551 if (create_json_value(json, CX_JSON_OBJECT) == NULL) { 716 if (json_create_value(json, CX_JSON_OBJECT) == NULL) {
552 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE 717 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
553 } 718 }
554 json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE); 719 json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE);
555 return_rec(CX_JSON_NO_ERROR); 720 return_rec(CX_JSON_NO_ERROR);
556 } 721 }
557 case CX_JSON_TOKEN_STRING: { 722 case CX_JSON_TOKEN_STRING: {
558 if ((vbuf = create_json_value(json, CX_JSON_STRING)) == NULL) { 723 if ((vbuf = json_create_value(json, CX_JSON_STRING)) == NULL) {
559 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE 724 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
560 } 725 }
561 cxmutstr str = unescape_string(json->allocator, token.content); 726 cxmutstr str = unescape_string(json->allocator, token.content);
562 if (str.ptr == NULL) { 727 if (str.ptr == NULL) {
563 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE 728 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
566 return_rec(CX_JSON_NO_ERROR); 731 return_rec(CX_JSON_NO_ERROR);
567 } 732 }
568 case CX_JSON_TOKEN_INTEGER: 733 case CX_JSON_TOKEN_INTEGER:
569 case CX_JSON_TOKEN_NUMBER: { 734 case CX_JSON_TOKEN_NUMBER: {
570 int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER; 735 int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER;
571 if (NULL == (vbuf = create_json_value(json, type))) { 736 if (NULL == (vbuf = json_create_value(json, type))) {
572 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE 737 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
573 } 738 }
574 if (type == CX_JSON_INTEGER) { 739 if (type == CX_JSON_INTEGER) {
575 if (cx_strtoi64(token.content, &vbuf->value.integer, 10)) { 740 if (cx_strtoi64(token.content, &vbuf->value.integer, 10)) {
576 return_rec(CX_JSON_FORMAT_ERROR_NUMBER); 741 return_rec(CX_JSON_FORMAT_ERROR_NUMBER);
581 } 746 }
582 } 747 }
583 return_rec(CX_JSON_NO_ERROR); 748 return_rec(CX_JSON_NO_ERROR);
584 } 749 }
585 case CX_JSON_TOKEN_LITERAL: { 750 case CX_JSON_TOKEN_LITERAL: {
586 if ((vbuf = create_json_value(json, CX_JSON_LITERAL)) == NULL) { 751 if ((vbuf = json_create_value(json, CX_JSON_LITERAL)) == NULL) {
587 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE 752 return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
588 } 753 }
589 if (0 == cx_strcmp(cx_strcast(token.content), cx_str("true"))) { 754 if (0 == cx_strcmp(cx_strcast(token.content), cx_str("true"))) {
590 vbuf->value.literal = CX_JSON_TRUE; 755 vbuf->value.literal = CX_JSON_TRUE;
591 } else if (0 == cx_strcmp(cx_strcast(token.content), cx_str("false"))) { 756 } else if (0 == cx_strcmp(cx_strcast(token.content), cx_str("false"))) {
732 } 897 }
733 cxFree(value->allocator, value); 898 cxFree(value->allocator, value);
734 } 899 }
735 900
736 CxJsonValue* cxJsonCreateObj(const CxAllocator* allocator) { 901 CxJsonValue* cxJsonCreateObj(const CxAllocator* allocator) {
902 if (allocator == NULL) allocator = cxDefaultAllocator;
737 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); 903 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
738 if (v == NULL) return NULL; 904 if (v == NULL) return NULL;
739 v->allocator = allocator; 905 v->allocator = allocator;
740 v->type = CX_JSON_OBJECT; 906 v->type = CX_JSON_OBJECT;
741 cx_array_initialize_a(allocator, v->value.object.values, 16); 907 cx_array_initialize_a(allocator, v->value.object.values, 16);
753 } 919 }
754 return v; 920 return v;
755 } 921 }
756 922
757 CxJsonValue* cxJsonCreateArr(const CxAllocator* allocator) { 923 CxJsonValue* cxJsonCreateArr(const CxAllocator* allocator) {
924 if (allocator == NULL) allocator = cxDefaultAllocator;
758 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); 925 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
759 if (v == NULL) return NULL; 926 if (v == NULL) return NULL;
760 v->allocator = allocator; 927 v->allocator = allocator;
761 v->type = CX_JSON_ARRAY; 928 v->type = CX_JSON_ARRAY;
762 cx_array_initialize_a(allocator, v->value.array.array, 16); 929 cx_array_initialize_a(allocator, v->value.array.array, 16);
763 if (v->value.array.array == NULL) { cxFree(allocator, v); return NULL; } 930 if (v->value.array.array == NULL) { cxFree(allocator, v); return NULL; }
764 return v; 931 return v;
765 } 932 }
766 933
767 CxJsonValue* cxJsonCreateNumber(const CxAllocator* allocator, double num) { 934 CxJsonValue* cxJsonCreateNumber(const CxAllocator* allocator, double num) {
935 if (allocator == NULL) allocator = cxDefaultAllocator;
768 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); 936 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
769 if (v == NULL) return NULL; 937 if (v == NULL) return NULL;
770 v->allocator = allocator; 938 v->allocator = allocator;
771 v->type = CX_JSON_NUMBER; 939 v->type = CX_JSON_NUMBER;
772 v->value.number = num; 940 v->value.number = num;
773 return v; 941 return v;
774 } 942 }
775 943
776 CxJsonValue* cxJsonCreateInteger(const CxAllocator* allocator, int64_t num) { 944 CxJsonValue* cxJsonCreateInteger(const CxAllocator* allocator, int64_t num) {
945 if (allocator == NULL) allocator = cxDefaultAllocator;
777 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); 946 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
778 if (v == NULL) return NULL; 947 if (v == NULL) return NULL;
779 v->allocator = allocator; 948 v->allocator = allocator;
780 v->type = CX_JSON_INTEGER; 949 v->type = CX_JSON_INTEGER;
781 v->value.integer = num; 950 v->value.integer = num;
785 CxJsonValue* cxJsonCreateString(const CxAllocator* allocator, const char* str) { 954 CxJsonValue* cxJsonCreateString(const CxAllocator* allocator, const char* str) {
786 return cxJsonCreateCxString(allocator, cx_str(str)); 955 return cxJsonCreateCxString(allocator, cx_str(str));
787 } 956 }
788 957
789 CxJsonValue* cxJsonCreateCxString(const CxAllocator* allocator, cxstring str) { 958 CxJsonValue* cxJsonCreateCxString(const CxAllocator* allocator, cxstring str) {
959 if (allocator == NULL) allocator = cxDefaultAllocator;
790 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); 960 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
791 if (v == NULL) return NULL; 961 if (v == NULL) return NULL;
792 v->allocator = allocator; 962 v->allocator = allocator;
793 v->type = CX_JSON_STRING; 963 v->type = CX_JSON_STRING;
794 cxmutstr s = cx_strdup_a(allocator, str); 964 cxmutstr s = cx_strdup_a(allocator, str);
796 v->value.string = s; 966 v->value.string = s;
797 return v; 967 return v;
798 } 968 }
799 969
800 CxJsonValue* cxJsonCreateLiteral(const CxAllocator* allocator, CxJsonLiteral lit) { 970 CxJsonValue* cxJsonCreateLiteral(const CxAllocator* allocator, CxJsonLiteral lit) {
971 if (allocator == NULL) allocator = cxDefaultAllocator;
801 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); 972 CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
802 if (v == NULL) return NULL; 973 if (v == NULL) return NULL;
803 v->allocator = allocator; 974 v->allocator = allocator;
804 v->type = CX_JSON_LITERAL; 975 v->type = CX_JSON_LITERAL;
805 v->value.literal = lit; 976 v->value.literal = lit;
806 return v; 977 return v;
807 } 978 }
808 979
809 // LCOV_EXCL_START 980 // LCOV_EXCL_START
810 // never called as long as malloc() does not return NULL 981 // never called as long as malloc() does not return NULL
811 static void cx_json_arr_free_temp(CxJsonValue** values, size_t count) { 982 static void json_arr_free_temp(CxJsonValue** values, size_t count) {
812 for (size_t i = 0; i < count; i++) { 983 for (size_t i = 0; i < count; i++) {
813 if (values[i] == NULL) break; 984 if (values[i] == NULL) break;
814 cxJsonValueFree(values[i]); 985 cxJsonValueFree(values[i]);
815 } 986 }
816 free(values); 987 free(values);
820 int cxJsonArrAddNumbers(CxJsonValue* arr, const double* num, size_t count) { 991 int cxJsonArrAddNumbers(CxJsonValue* arr, const double* num, size_t count) {
821 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); 992 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
822 if (values == NULL) return -1; 993 if (values == NULL) return -1;
823 for (size_t i = 0; i < count; i++) { 994 for (size_t i = 0; i < count; i++) {
824 values[i] = cxJsonCreateNumber(arr->allocator, num[i]); 995 values[i] = cxJsonCreateNumber(arr->allocator, num[i]);
825 if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; } 996 if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; }
826 } 997 }
827 int ret = cxJsonArrAddValues(arr, values, count); 998 int ret = cxJsonArrAddValues(arr, values, count);
828 free(values); 999 free(values);
829 return ret; 1000 return ret;
830 } 1001 }
832 int cxJsonArrAddIntegers(CxJsonValue* arr, const int64_t* num, size_t count) { 1003 int cxJsonArrAddIntegers(CxJsonValue* arr, const int64_t* num, size_t count) {
833 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); 1004 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
834 if (values == NULL) return -1; 1005 if (values == NULL) return -1;
835 for (size_t i = 0; i < count; i++) { 1006 for (size_t i = 0; i < count; i++) {
836 values[i] = cxJsonCreateInteger(arr->allocator, num[i]); 1007 values[i] = cxJsonCreateInteger(arr->allocator, num[i]);
837 if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; } 1008 if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; }
838 } 1009 }
839 int ret = cxJsonArrAddValues(arr, values, count); 1010 int ret = cxJsonArrAddValues(arr, values, count);
840 free(values); 1011 free(values);
841 return ret; 1012 return ret;
842 } 1013 }
844 int cxJsonArrAddStrings(CxJsonValue* arr, const char* const* str, size_t count) { 1015 int cxJsonArrAddStrings(CxJsonValue* arr, const char* const* str, size_t count) {
845 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); 1016 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
846 if (values == NULL) return -1; 1017 if (values == NULL) return -1;
847 for (size_t i = 0; i < count; i++) { 1018 for (size_t i = 0; i < count; i++) {
848 values[i] = cxJsonCreateString(arr->allocator, str[i]); 1019 values[i] = cxJsonCreateString(arr->allocator, str[i]);
849 if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; } 1020 if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; }
850 } 1021 }
851 int ret = cxJsonArrAddValues(arr, values, count); 1022 int ret = cxJsonArrAddValues(arr, values, count);
852 free(values); 1023 free(values);
853 return ret; 1024 return ret;
854 } 1025 }
856 int cxJsonArrAddCxStrings(CxJsonValue* arr, const cxstring* str, size_t count) { 1027 int cxJsonArrAddCxStrings(CxJsonValue* arr, const cxstring* str, size_t count) {
857 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); 1028 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
858 if (values == NULL) return -1; 1029 if (values == NULL) return -1;
859 for (size_t i = 0; i < count; i++) { 1030 for (size_t i = 0; i < count; i++) {
860 values[i] = cxJsonCreateCxString(arr->allocator, str[i]); 1031 values[i] = cxJsonCreateCxString(arr->allocator, str[i]);
861 if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; } 1032 if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; }
862 } 1033 }
863 int ret = cxJsonArrAddValues(arr, values, count); 1034 int ret = cxJsonArrAddValues(arr, values, count);
864 free(values); 1035 free(values);
865 return ret; 1036 return ret;
866 } 1037 }
868 int cxJsonArrAddLiterals(CxJsonValue* arr, const CxJsonLiteral* lit, size_t count) { 1039 int cxJsonArrAddLiterals(CxJsonValue* arr, const CxJsonLiteral* lit, size_t count) {
869 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); 1040 CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
870 if (values == NULL) return -1; 1041 if (values == NULL) return -1;
871 for (size_t i = 0; i < count; i++) { 1042 for (size_t i = 0; i < count; i++) {
872 values[i] = cxJsonCreateLiteral(arr->allocator, lit[i]); 1043 values[i] = cxJsonCreateLiteral(arr->allocator, lit[i]);
873 if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; } 1044 if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; }
874 } 1045 }
875 int ret = cxJsonArrAddValues(arr, values, count); 1046 int ret = cxJsonArrAddValues(arr, values, count);
876 free(values); 1047 free(values);
877 return ret; 1048 return ret;
878 } 1049 }
977 } else { 1148 } else {
978 return member->value; 1149 return member->value;
979 } 1150 }
980 } 1151 }
981 1152
982 static const CxJsonWriter cx_json_writer_default = {
983 false,
984 true,
985 255,
986 false,
987 4
988 };
989
990 CxJsonWriter cxJsonWriterCompact(void) { 1153 CxJsonWriter cxJsonWriterCompact(void) {
991 return cx_json_writer_default; 1154 return (CxJsonWriter) {
1155 false,
1156 true,
1157 6,
1158 false,
1159 4,
1160 false
1161 };
992 } 1162 }
993 1163
994 CxJsonWriter cxJsonWriterPretty(bool use_spaces) { 1164 CxJsonWriter cxJsonWriterPretty(bool use_spaces) {
995 return (CxJsonWriter) { 1165 return (CxJsonWriter) {
996 true, 1166 true,
997 true, 1167 true,
998 255, 1168 6,
999 use_spaces, 1169 use_spaces,
1000 4 1170 4,
1171 false
1001 }; 1172 };
1002 } 1173 }
1003 1174
1004 static int cx_json_writer_indent( 1175 static int cx_json_writer_indent(
1005 void *target, 1176 void *target,
1042 // keep track of written items 1213 // keep track of written items
1043 // the idea is to reduce the number of jumps for error checking 1214 // the idea is to reduce the number of jumps for error checking
1044 size_t actual = 0, expected = 0; 1215 size_t actual = 0, expected = 0;
1045 1216
1046 // small buffer for number to string conversions 1217 // small buffer for number to string conversions
1047 char numbuf[32]; 1218 char numbuf[40];
1048 1219
1049 // recursively write the values 1220 // recursively write the values
1050 switch (value->type) { 1221 switch (value->type) {
1051 case CX_JSON_OBJECT: { 1222 case CX_JSON_OBJECT: {
1052 const char *begin_obj = "{\n"; 1223 const char *begin_obj = "{\n";
1076 } 1247 }
1077 } 1248 }
1078 1249
1079 // the name 1250 // the name
1080 actual += wfunc("\"", 1, 1, target); 1251 actual += wfunc("\"", 1, 1, target);
1081 // TODO: escape the string 1252 cxmutstr name = escape_string(member->name, settings->escape_slash);
1082 actual += wfunc(member->name.ptr, 1, 1253 actual += wfunc(name.ptr, 1, name.length, target);
1083 member->name.length, target); 1254 if (name.ptr != member->name.ptr) {
1255 cx_strfree(&name);
1256 }
1084 actual += wfunc("\"", 1, 1, target); 1257 actual += wfunc("\"", 1, 1, target);
1085 const char *obj_name_sep = ": "; 1258 const char *obj_name_sep = ": ";
1086 if (settings->pretty) { 1259 if (settings->pretty) {
1087 actual += wfunc(obj_name_sep, 1, 2, target); 1260 actual += wfunc(obj_name_sep, 1, 2, target);
1088 expected += 4 + member->name.length; 1261 expected += 4 + member->name.length;
1144 expected++; 1317 expected++;
1145 break; 1318 break;
1146 } 1319 }
1147 case CX_JSON_STRING: { 1320 case CX_JSON_STRING: {
1148 actual += wfunc("\"", 1, 1, target); 1321 actual += wfunc("\"", 1, 1, target);
1149 // TODO: escape the string 1322 cxmutstr str = escape_string(value->value.string, settings->escape_slash);
1150 actual += wfunc(value->value.string.ptr, 1, 1323 actual += wfunc(str.ptr, 1, str.length, target);
1151 value->value.string.length, target); 1324 if (str.ptr != value->value.string.ptr) {
1325 cx_strfree(&str);
1326 }
1152 actual += wfunc("\"", 1, 1, target); 1327 actual += wfunc("\"", 1, 1, target);
1153 expected += 2 + value->value.string.length; 1328 expected += 2 + value->value.string.length;
1154 break; 1329 break;
1155 } 1330 }
1156 case CX_JSON_NUMBER: { 1331 case CX_JSON_NUMBER: {
1157 // TODO: locale bullshit 1332 int precision = settings->frac_max_digits;
1158 // TODO: formatting settings 1333 // because of the way how %g is defined, we need to
1159 snprintf(numbuf, 32, "%g", value->value.number); 1334 // double the precision and truncate ourselves
1160 size_t len = strlen(numbuf); 1335 precision = 1 + (precision > 15 ? 30 : 2 * precision);
1161 actual += wfunc(numbuf, 1, len, target); 1336 snprintf(numbuf, 40, "%.*g", precision, value->value.number);
1162 expected += len; 1337 char *dot, *exp;
1338 unsigned char max_digits;
1339 // find the decimal separator and hope that it's one of . or ,
1340 dot = strchr(numbuf, '.');
1341 if (dot == NULL) {
1342 dot = strchr(numbuf, ',');
1343 }
1344 if (dot == NULL) {
1345 // no decimal separator found
1346 // output everything until a possible exponent
1347 max_digits = 30;
1348 dot = numbuf;
1349 } else {
1350 // found a decimal separator
1351 // output everything until the separator
1352 // and set max digits to what the settings say
1353 size_t len = dot - numbuf;
1354 actual += wfunc(numbuf, 1, len, target);
1355 expected += len;
1356 max_digits = settings->frac_max_digits;
1357 if (max_digits > 15) {
1358 max_digits = 15;
1359 }
1360 // locale independent separator
1361 if (max_digits > 0) {
1362 actual += wfunc(".", 1, 1, target);
1363 expected++;
1364 }
1365 dot++;
1366 }
1367 // find the exponent
1368 exp = strchr(dot, 'e');
1369 if (exp == NULL) {
1370 // no exponent - output the rest
1371 if (max_digits > 0) {
1372 size_t len = strlen(dot);
1373 if (len > max_digits) {
1374 len = max_digits;
1375 }
1376 actual += wfunc(dot, 1, len, target);
1377 expected += len;
1378 }
1379 } else {
1380 // exponent found - truncate the frac digits
1381 // and then output the rest
1382 if (max_digits > 0) {
1383 size_t len = exp - dot - 1;
1384 if (len > max_digits) {
1385 len = max_digits;
1386 }
1387 actual += wfunc(dot, 1, len, target);
1388 expected += len;
1389 }
1390 actual += wfunc("e", 1, 1, target);
1391 expected++;
1392 exp++;
1393 size_t len = strlen(exp);
1394 actual += wfunc(exp, 1, len, target);
1395 expected += len;
1396 }
1163 break; 1397 break;
1164 } 1398 }
1165 case CX_JSON_INTEGER: { 1399 case CX_JSON_INTEGER: {
1166 snprintf(numbuf, 32, "%" PRIi64, value->value.integer); 1400 snprintf(numbuf, 32, "%" PRIi64, value->value.integer);
1167 size_t len = strlen(numbuf); 1401 size_t len = strlen(numbuf);
1199 void *target, 1433 void *target,
1200 const CxJsonValue *value, 1434 const CxJsonValue *value,
1201 cx_write_func wfunc, 1435 cx_write_func wfunc,
1202 const CxJsonWriter *settings 1436 const CxJsonWriter *settings
1203 ) { 1437 ) {
1204 if (settings == NULL) {
1205 settings = &cx_json_writer_default;
1206 }
1207 assert(target != NULL); 1438 assert(target != NULL);
1208 assert(value != NULL); 1439 assert(value != NULL);
1209 assert(wfunc != NULL); 1440 assert(wfunc != NULL);
1210 1441
1442 CxJsonWriter writer_default = cxJsonWriterCompact();
1443 if (settings == NULL) {
1444 settings = &writer_default;
1445 }
1211 return cx_json_write_rec(target, value, wfunc, settings, 0); 1446 return cx_json_write_rec(target, value, wfunc, settings, 0);
1212 } 1447 }

mercurial