libidav/davqlparser.c

changeset 106
9cec78f23cbf
parent 103
b29692d5f7a7
child 107
a0903d2d8e3e
equal deleted inserted replaced
105:ee0de2b1872e 106:9cec78f23cbf
47 } 47 }
48 } 48 }
49 49
50 static const char* _map_exprtype(davqlexprtype_t type) { 50 static const char* _map_exprtype(davqlexprtype_t type) {
51 switch(type) { 51 switch(type) {
52 case DAVQL_UNDEFINED_TYP: return "undefined"; 52 case DAVQL_UNDEFINED_TYPE: return "undefined";
53 case DAVQL_NUMBER: return "NUMBER"; 53 case DAVQL_NUMBER: return "NUMBER";
54 case DAVQL_STRING: return "STRING"; 54 case DAVQL_STRING: return "STRING";
55 case DAVQL_TIMESTAMP: return "TIMESTAMP"; 55 case DAVQL_TIMESTAMP: return "TIMESTAMP";
56 case DAVQL_IDENTIFIER: return "IDENTIFIER"; 56 case DAVQL_IDENTIFIER: return "IDENTIFIER";
57 case DAVQL_UNARY: return "UNARY"; 57 case DAVQL_UNARY: return "UNARY";
88 default: return "unknown"; 88 default: return "unknown";
89 } 89 }
90 } 90 }
91 91
92 static void dav_debug_ql_fnames_print(DavQLStatement *stmt) { 92 static void dav_debug_ql_fnames_print(DavQLStatement *stmt) {
93 printf("Field names: "); 93 if (stmt->fields) {
94 UCX_FOREACH(field, stmt->fields) { 94 printf("Field names: ");
95 DavQLField *f = field->data; 95 UCX_FOREACH(field, stmt->fields) {
96 printf("%.*s, ", sfmtarg(f->name)); 96 DavQLField *f = field->data;
97 } 97 printf("%.*s, ", sfmtarg(f->name));
98 printf("\b\b \b\b\n"); 98 }
99 printf("\b\b \b\b\n");
100 }
99 } 101 }
100 102
101 static void dav_debug_ql_stmt_print(DavQLStatement *stmt) { 103 static void dav_debug_ql_stmt_print(DavQLStatement *stmt) {
102 // Basic information 104 // Basic information
103 size_t fieldcount = ucx_list_size(stmt->fields); 105 size_t fieldcount = ucx_list_size(stmt->fields);
347 349
348 // ------------------------------------------------------------------------ 350 // ------------------------------------------------------------------------
349 // P A R S E R 351 // P A R S E R
350 // ------------------------------------------------------------------------ 352 // ------------------------------------------------------------------------
351 353
352 #define _unexpected_end_msg "unexpected end of statement" 354 #define _error_context "(%.*s [->]%.*s %.*s)"
353 #define _invalid_msg "invalid statement" 355 #define _error_invalid "invalid statement"
354 #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)" 356 #define _error_unhandled "unhandled error " _error_context
355 #define _expected_token "expected token '%s' before '%.*s'" 357 #define _error_unexpected_token "unexpected token " _error_context
356 #define _expected_by "expected 'by' after 'order' (order [->]%.*s)" 358 #define _error_invalid_token "invalid token " _error_context
357 #define _missing_fmtspec "format specifier missing (%.*s [->]%.*s %.*s)" 359 #define _error_missing_from "missing FROM keyword " _error_context
358 #define _invalid_fmtspec "invalid format specifier (%.*s [->]%.*s %.*s)" 360 #define _error_missing_by "missing BY keyword " _error_context
359 #define _unknown_fmtspec "unknown format specifier (%.*s [->]%.*s %.*s)" 361 #define _error_invalid_depth "invalid depth " _error_context
360 #define _missing_quote "missing closing quote symbol (%.*s)" 362 #define _error_missing_expr "missing expression " _error_context
361 #define _parser_state "parser reached invalid state" 363 #define _error_invalid_unary_op "invalid unary operator " _error_context
362 #define _unknown_attribute "unknown attribute '%.*s'" 364
363 #define _duplicated_attribute "duplicated attribute '%.*s'" 365 #define token_sstr(token) (((DavQLToken*)(token)->data)->value)
364 #define _invalid_depth "invalid depth"
365 #define _invalid_path "invalid path"
366
367 #define _identifier_expected "identifier expected (%.*s [->]%.*s %.*s)"
368 #define _idornum_expected "identifier or number expected (%.*s [->]%.*s %.*s)"
369 #define _idorstr_expected "identifier or string expected (%.*s [->]%.*s %.*s)"
370 #define _idorts_expected "identifier or timestamp expected (%.*s [->]%.*s %.*s)"
371
372 #define token_sstr(listelem) ((sstr_t*)(listelem)->data)
373 366
374 static void dav_error_in_context(int errorcode, const char *errormsg, 367 static void dav_error_in_context(int errorcode, const char *errormsg,
375 DavQLStatement *stmt, UcxList *token) { 368 DavQLStatement *stmt, UcxList *token) {
376 sstr_t emptystring = ST(""); 369 sstr_t emptystring = ST("");
377 stmt->errorcode = errorcode; 370 stmt->errorcode = errorcode;
378 stmt->errormessage = ucx_sprintf(errormsg, 371 stmt->errormessage = ucx_sprintf(errormsg,
379 sfmtarg(token->prev?*token_sstr(token->prev):emptystring), 372 sfmtarg(token->prev?token_sstr(token->prev):emptystring),
380 sfmtarg(*token_sstr(token)), 373 sfmtarg(token_sstr(token)),
381 sfmtarg(token->next?*token_sstr(token->next):emptystring)).ptr; 374 sfmtarg(token->next?token_sstr(token->next):emptystring)).ptr;
382 } 375 }
383 376
384 // special symbols are single tokens - the % sign MUST NOT be a special symbol 377 // special symbols are single tokens - the % sign MUST NOT be a special symbol
385 static const char *special_token_symbols = ",()+-*/&|^~=!<>"; 378 static const char *special_token_symbols = ",()+-*/&|^~=!<>";
386 379
380 static _Bool iskeyword(DavQLToken *token) {
381 sstr_t keywords[] = {ST("get"), ST("set"), ST("from"), ST("at"), ST("as"),
382 ST("where"), ST("with"), ST("order"), ST("by"), ST("asc"), ST("desc")
383 };
384 for (int i = 0 ; i < sizeof(keywords)/sizeof(char*) ; i++) {
385 if (!sstrcasecmp(token->value, keywords[i])) {
386 return 1;
387 }
388 }
389 return 0;
390 }
391
392 static UcxList* dav_parse_add_token(UcxList *tokenlist, DavQLToken *token) {
393
394 // determine token class (order of if-statements is very important!)
395 char firstchar = token->value.ptr[0];
396
397 if (isdigit(firstchar)) {
398 token->tokenclass = DAVQL_TOKEN_NUMBER;
399 } else if (firstchar == '%') {
400 token->tokenclass = DAVQL_TOKEN_FMTSPEC;
401 } else if (token->value.length == 1) {
402 switch (firstchar) {
403 case '(': token->tokenclass = DAVQL_TOKEN_OPENP; break;
404 case ')': token->tokenclass = DAVQL_TOKEN_CLOSEP; break;
405 case ',': token->tokenclass = DAVQL_TOKEN_COMMA; break;
406 case '=': token->tokenclass = DAVQL_TOKEN_EQ; break;
407 case '<': token->tokenclass = DAVQL_TOKEN_LT; break;
408 case '>': token->tokenclass = DAVQL_TOKEN_GT; break;
409 case '!': token->tokenclass = DAVQL_TOKEN_EXCLAIM; break;
410 default:
411 token->tokenclass = strchr(special_token_symbols, firstchar) ?
412 DAVQL_TOKEN_OPERATOR : DAVQL_TOKEN_IDENTIFIER;
413 }
414 } else if (firstchar == '\'') {
415 token->tokenclass = DAVQL_TOKEN_STRING;
416 } else if (firstchar == '`') {
417 token->tokenclass = DAVQL_TOKEN_IDENTIFIER;
418 } else if (iskeyword(token)) {
419 token->tokenclass = DAVQL_TOKEN_KEYWORD;
420 } else {
421 token->tokenclass = DAVQL_TOKEN_IDENTIFIER;
422 }
423
424 // remove quotes (extreme cool feature)
425 if (token->tokenclass == DAVQL_TOKEN_STRING ||
426 (token->tokenclass == DAVQL_TOKEN_IDENTIFIER && firstchar == '`')) {
427
428 char lastchar = token->value.ptr[token->value.length-1];
429 if (firstchar == lastchar) {
430 token->value.ptr++;
431 token->value.length -= 2;
432 } else {
433 token->tokenclass = DAVQL_TOKEN_INVALID;
434 }
435 }
436
437
438 return ucx_list_append(tokenlist, token);
439 }
440
387 static UcxList* dav_parse_tokenize(sstr_t src) { 441 static UcxList* dav_parse_tokenize(sstr_t src) {
388 UcxList *tokens = NULL; 442 UcxList *tokens = NULL;
389 443
390 sstr_t *token = NULL; 444 DavQLToken *token = NULL;
391 char insequence = '\0'; 445 char insequence = '\0';
392 for (size_t i = 0 ; i < src.length ; i++) { 446 for (size_t i = 0 ; i < src.length ; i++) {
393 // quoted strings / identifiers are a single token 447 // quoted strings / identifiers are a single token
394 if (src.ptr[i] == '\'' || src.ptr[i] == '`') { 448 if (src.ptr[i] == '\'' || src.ptr[i] == '`') {
395 if (src.ptr[i] == insequence) { 449 if (src.ptr[i] == insequence) {
396 // add quoted token to list 450 // add quoted token to list
397 token->length++; 451 token->value.length++;
398 tokens = ucx_list_append(tokens, token); 452 tokens = dav_parse_add_token(tokens, token);
399 token = NULL; 453 token = NULL;
400 insequence = '\0'; 454 insequence = '\0';
401 } else if (insequence == '\0') { 455 } else if (insequence == '\0') {
402 insequence = src.ptr[i]; 456 insequence = src.ptr[i];
403 // always create new token for quoted strings 457 // always create new token for quoted strings
404 if (token) { 458 if (token) {
405 tokens = ucx_list_append(tokens, token); 459 tokens = dav_parse_add_token(tokens, token);
406 } 460 }
407 token = malloc(sizeof(sstr_t)); 461 token = malloc(sizeof(DavQLToken));
408 token->ptr = src.ptr + i; 462 token->value.ptr = src.ptr + i;
409 token->length = 1; 463 token->value.length = 1;
410 } else { 464 } else {
411 // add other kind of quotes to token 465 // add other kind of quotes to token
412 token->length++; 466 token->value.length++;
413 } 467 }
414 } else if (insequence) { 468 } else if (insequence) {
415 token->length++; 469 token->value.length++;
416 } else if (isspace(src.ptr[i])) { 470 } else if (isspace(src.ptr[i])) {
417 // add token before spaces to list (if any) 471 // add token before spaces to list (if any)
418 if (token) { 472 if (token) {
419 tokens = ucx_list_append(tokens, token); 473 tokens = dav_parse_add_token(tokens, token);
420 token = NULL; 474 token = NULL;
421 } 475 }
422 } else if (strchr(special_token_symbols, src.ptr[i])) { 476 } else if (strchr(special_token_symbols, src.ptr[i])) {
423 // add token before special symbol to list (if any) 477 // add token before special symbol to list (if any)
424 if (token) { 478 if (token) {
425 tokens = ucx_list_append(tokens, token); 479 tokens = dav_parse_add_token(tokens, token);
426 token = NULL; 480 token = NULL;
427 } 481 }
428 // add special symbol as single token to list 482 // add special symbol as single token to list
429 token = malloc(sizeof(sstr_t)); 483 token = malloc(sizeof(DavQLToken));
430 token->ptr = src.ptr + i; 484 token->value.ptr = src.ptr + i;
431 token->length = 1; 485 token->value.length = 1;
432 tokens = ucx_list_append(tokens, token); 486 tokens = dav_parse_add_token(tokens, token);
433 // set tokenizer ready to read more tokens 487 // set tokenizer ready to read more tokens
434 token = NULL; 488 token = NULL;
435 } else { 489 } else {
436 // if this is a new token, create memory for it 490 // if this is a new token, create memory for it
437 if (!token) { 491 if (!token) {
438 token = malloc(sizeof(sstr_t)); 492 token = malloc(sizeof(DavQLToken));
439 token->ptr = src.ptr + i; 493 token->value.ptr = src.ptr + i;
440 token->length = 0; 494 token->value.length = 0;
441 } 495 }
442 // extend token length when reading more bytes 496 // extend token length when reading more bytes
443 token->length++; 497 token->value.length++;
444 } 498 }
445 } 499 }
446 500
447 if (token) { 501 if (token) {
448 tokens = ucx_list_append(tokens, token); 502 tokens = dav_parse_add_token(tokens, token);
449 } 503 }
450 504
451 return tokens; 505 return tokens;
452 }
453
454 static DavQLExpression* dav_parse_expression(
455 DavQLStatement* stmt, UcxList* starttoken, size_t n) {
456 if (n == 0) {
457 return NULL;
458 }
459
460 DavQLExpression *expr = calloc(1, sizeof(DavQLExpression));
461
462 // set pointer for source text
463 expr->srctext.ptr = token_sstr(starttoken)->ptr;
464
465 // special case - only one token
466 if (n == 1) {
467 expr->srctext.length = token_sstr(starttoken)->length;
468 char firstchar = expr->srctext.ptr[0];
469 char lastchar = expr->srctext.ptr[expr->srctext.length-1];
470 if (firstchar == '\'') {
471 expr->type = DAVQL_STRING;
472 } else if (isdigit(firstchar)) {
473 expr->type = DAVQL_NUMBER;
474 } else if (firstchar == '%') {
475 if (expr->srctext.length == 1) {
476 dav_error_in_context(DAVQL_ERROR_MISSING_FMTSPEC,
477 _missing_fmtspec, stmt, starttoken);
478 } else if (expr->srctext.length == 2) {
479 switch (expr->srctext.ptr[1]) {
480 case 'd': expr->type = DAVQL_NUMBER; break;
481 case 's': expr->type = DAVQL_STRING; break;
482 case 't': expr->type = DAVQL_TIMESTAMP; break;
483 default:
484 dav_error_in_context(DAVQL_ERROR_UNKNOWN_FMTSPEC,
485 _unknown_fmtspec, stmt, starttoken);
486 }
487 } else {
488 dav_error_in_context(DAVQL_ERROR_INVALID_FMTSPEC,
489 _invalid_fmtspec, stmt, starttoken);
490 }
491 } else {
492 expr->type = DAVQL_IDENTIFIER;
493 }
494 // remove quotes (if any)
495 if (firstchar == '\'' || firstchar == '`') {
496 if (lastchar != firstchar) {
497 stmt->errorcode = DAVQL_ERROR_MISSING_QUOTE;
498 stmt->errormessage =
499 ucx_sprintf(_missing_quote, sfmtarg(expr->srctext)).ptr;
500 }
501 expr->srctext.ptr++;
502 if (expr->srctext.length > 2) {
503 expr->srctext.length -= 2;
504 } else {
505 expr->srctext.length = 0;
506 }
507 }
508 } else {
509 UcxList* token = starttoken;
510
511 // check, if first token is (
512 // if so, verify that last token is ) and throw both away
513 if (!sstrcmp(*token_sstr(token), S("("))) {
514 if (!sstrcmp(*token_sstr(ucx_list_get(token, n-1)), S(")"))) {
515 token = token->next;
516 n -= 2;
517 } else {
518 // TODO: throw syntax error
519 }
520 }
521
522 // process tokens
523 for (size_t i = 0 ; i < n ; i++) {
524 sstr_t tokendata = *token_sstr(token);
525
526 // TODO: make it so
527
528 // go to next token (if this is not the last token)
529 if (i < n-1) {
530 token = token->next;
531 }
532 }
533
534 // compute length of source text (including delimiters)
535 expr->srctext.length = token_sstr(token)->ptr +
536 token_sstr(token)->length - expr->srctext.ptr;
537 }
538
539 return expr;
540 } 506 }
541 507
542 static void dav_free_expression(DavQLExpression *expr) { 508 static void dav_free_expression(DavQLExpression *expr) {
543 if (expr->left) { 509 if (expr->left) {
544 dav_free_expression(expr->left); 510 dav_free_expression(expr->left);
546 if (expr->right) { 512 if (expr->right) {
547 dav_free_expression(expr->right); 513 dav_free_expression(expr->right);
548 } 514 }
549 free(expr); 515 free(expr);
550 } 516 }
551
552 #define _step_fieldlist_ 10 // field list
553 #define _step_FROM_ 20 // FROM clause
554 #define _step_WITH_ 30 // WITH clause
555 #define _step_WITHopt_ 530 // expecting more WITH details or end
556 #define _step_WHERE_ 40 // WHERE clause
557 #define _step_ORDER_BY_ 50 // ORDER BY clause
558 #define _step_ORDER_BYopt_ 550 // expecting more ORDER BY details or end
559 #define _step_end_ 500 // expect end
560
561 struct fieldlist_parser_state {
562 UcxList *expr_firsttoken;
563 DavQLField *currentfield;
564 size_t expr_len;
565 /*
566 * 0: begin of field list - may encounter "*" or "-" special fields
567 * 1: collect expression token
568 * switch to step 2 on keyword "as"
569 * expect "," or "from" only if expr_len is 1 (add to list and continue)
570 * 2: expect one token (identifier) for as clause
571 * 3: expect a ",": continue with step 1
572 * or a "from": leave field list parser
573 * 4: expect end of field list (i.e. a "from" keyword)
574 */
575 int step;
576 };
577 517
578 static void dav_free_field(DavQLField *field) { 518 static void dav_free_field(DavQLField *field) {
579 dav_free_expression(field->expr); 519 dav_free_expression(field->expr);
580 free(field); 520 free(field);
581 } 521 }
582 522
583 static int dav_parse_fieldlist(DavQLStatement *stmt, UcxList *token, 523 static void dav_free_order_criterion(DavQLOrderCriterion *crit) {
584 struct fieldlist_parser_state *state) { 524 if (crit->column) { // do it null-safe though column is expected to be set
585 sstr_t tokendata = *token_sstr(token); 525 dav_free_expression(crit->column);
586 526 }
587 _Bool fromkeyword = !sstrcasecmp(tokendata, S("from")); 527 free(crit);
588 _Bool comma = !sstrcmp(tokendata, S(",")); 528 }
589 529
590 switch (state->step) { 530 #define token_is(token, expectedclass) (token && \
591 case 0: 531 (((DavQLToken*)(token)->data)->tokenclass == expectedclass))
592 if (!sstrcmp(tokendata, S("*")) || !sstrcmp(tokendata, S("-"))) { 532
593 DavQLField *field = malloc(sizeof(DavQLField)); 533 #define tokenvalue_is(token, expectedvalue) (token && \
594 field->name = tokendata; 534 !sstrcasecmp(((DavQLToken*)(token)->data)->value, S(expectedvalue)))
595 field->expr = calloc(1, sizeof(DavQLExpression)); 535
596 field->expr->type = DAVQL_IDENTIFIER; 536 typedef int(*exprparser_f)(DavQLStatement*,UcxList*,DavQLExpression*);
597 field->expr->srctext = tokendata; 537
598 stmt->fields = ucx_list_append(stmt->fields, field); 538 static int dav_parse_binary_expr(DavQLStatement* stmt, UcxList* token,
539 DavQLExpression* expr, exprparser_f parseL, char* opc, int* opv,
540 exprparser_f parseR) {
541
542 int total_consumed = 0, consumed;
543
544 // save temporarily on stack (copy to heap later on)
545 DavQLExpression left, right;
546
547 // RULE: LEFT, [Operator, RIGHT]
548 memset(&left, 0, sizeof(DavQLExpression));
549 consumed = parseL(stmt, token, &left);
550 if (!consumed) {
551 return 0;
552 }
553 total_consumed += consumed;
554 token = ucx_list_get(token, consumed);
555
556 char *op = strchr(opc, token_sstr(token).ptr[0]); // locate operator
557 if (token_is(token, DAVQL_TOKEN_OPERATOR) && op) {
558 expr->op = opv[op-opc];
559 total_consumed++;
560 token = token->next;
561 memset(&right, 0, sizeof(DavQLExpression));
562 consumed = parseR(stmt, token, &right);
563 if (!consumed) {
564 dav_error_in_context(DAVQL_ERROR_MISSING_EXPR,
565 _error_missing_expr, stmt, token);
566 return 0;
567 }
568 total_consumed += consumed;
569 }
570
571 if (expr->op == DAVQL_NOOP) {
572 memcpy(expr, &left, sizeof(DavQLExpression));
573 } else {
574 expr->left = malloc(sizeof(DavQLExpression));
575 memcpy(expr->left, &left, sizeof(DavQLExpression));
576 expr->right = malloc(sizeof(DavQLExpression));
577 memcpy(expr->right, &right, sizeof(DavQLExpression));
578 }
579
580 return total_consumed;
581 }
582
583
584 static int dav_parse_unary_expr(DavQLStatement* stmt, UcxList* token,
585 DavQLExpression* expr) {
586
587 int total_consumed = 0;
588 DavQLExpression *litexpr = expr;
589
590 // optional unary operator
591 if (token_is(token, DAVQL_TOKEN_OPERATOR)) {
592 char *op = strchr("+-~", token_sstr(token).ptr[0]);
593 if (op) {
594 expr->type = DAVQL_UNARY;
595 switch (*op) {
596 case '+': expr->op = DAVQL_ADD; break;
597 case '-': expr->op = DAVQL_SUB; break;
598 case '~': expr->op = DAVQL_NEG; break;
599 }
600 expr->left = calloc(sizeof(DavQLExpression), 1);
601 litexpr = expr->left;
602 total_consumed++;
603 token = token->next;
604 } else {
605 dav_error_in_context(DAVQL_ERROR_INVALID_UNARY_OP,
606 _error_invalid_unary_op, stmt, token);
607 return 0;
608 }
609 }
610
611 // RULE: (ParExpression | AtomicExpression)
612 if (token_is(token, DAVQL_TOKEN_OPENP)) {
613 // TODO: make it so (and don't forget CLOSEP)
614 } else {
615 // RULE: FunctionCall
616 // TODO: make it so
617
618 // RULE: Identifier
619 /*else*/ if (token_is(token, DAVQL_TOKEN_IDENTIFIER)) {
620 total_consumed++;
621 litexpr->type = DAVQL_IDENTIFIER;
622 litexpr->srctext = token_sstr(token);
623 }
624
625 // RULE: Literal
626 // TODO: make it so
627 }
628
629
630 return total_consumed;
631 }
632
633 static int dav_parse_bitexpr(DavQLStatement* stmt, UcxList* token,
634 DavQLExpression* expr) {
635
636 return dav_parse_binary_expr(stmt, token, expr,
637 dav_parse_unary_expr,
638 "&|^", (int[]){DAVQL_AND, DAVQL_OR, DAVQL_XOR},
639 dav_parse_bitexpr);
640 }
641
642 static int dav_parse_multexpr(DavQLStatement* stmt, UcxList* token,
643 DavQLExpression* expr) {
644
645 return dav_parse_binary_expr(stmt, token, expr,
646 dav_parse_bitexpr,
647 "*/", (int[]){DAVQL_MUL, DAVQL_DIV},
648 dav_parse_multexpr);
649 }
650
651 static int dav_parse_expression(DavQLStatement* stmt, UcxList* token,
652 DavQLExpression* expr) {
653
654 // TODO: save source text
655
656 return dav_parse_binary_expr(stmt, token, expr,
657 dav_parse_multexpr,
658 "+-", (int[]){DAVQL_ADD, DAVQL_SUB},
659 dav_parse_expression);
660 }
661
662 static int dav_parse_format_spec(DavQLStatement* stmt, UcxList* token) {
663
664 return 0;
665 }
666
667 static int dav_parse_fieldlist(DavQLStatement *stmt, UcxList *token) {
668
669 // RULE: "-"
670 if (token_is(token, DAVQL_TOKEN_OPERATOR) && tokenvalue_is(token, "-")) {
671 DavQLField *field = malloc(sizeof(DavQLField));
672 field->expr = calloc(sizeof(DavQLExpression), 1);
673 field->expr->type = DAVQL_IDENTIFIER;
674 field->expr->srctext = field->name = token_sstr(token);
675 stmt->fields = ucx_list_append(stmt->fields, field);
676 return 1;
677 }
678
679 // RULE: "*", {",", Expression, " as ", Identifier}
680 if (token_is(token, DAVQL_TOKEN_OPERATOR) && tokenvalue_is(token, "*")) {
681 DavQLField *field = malloc(sizeof(DavQLField));
682 field->expr = calloc(sizeof(DavQLExpression), 1);
683 field->expr->type = DAVQL_IDENTIFIER;
684 field->expr->srctext = field->name = token_sstr(token);
685 stmt->fields = ucx_list_append(stmt->fields, field);
686
687 int total_consumed = 0;
688 int consumed = 1;
689
690 do {
691 token = ucx_list_get(token, consumed);
692 total_consumed += consumed;
599 693
600 if (tokendata.ptr[0] == '-') { 694 if (token_is(token, DAVQL_TOKEN_COMMA)) {
601 // no further fields may follow, if dash symbol has been found 695 total_consumed++; token = token->next;
602 state->step = 4; 696 DavQLExpression * expr = calloc(sizeof(DavQLExpression), 1);
603 } else { 697 consumed = dav_parse_expression(stmt, token, expr);
604 state->step = 3; 698 if (expr->type == DAVQL_UNDEFINED_TYPE) {
605 } 699 dav_free_expression(expr);
606 return _step_fieldlist_; 700 } else {
607 } 701 DavQLField *field = malloc(sizeof(DavQLField));
608 // did not encounter special field, fall through to step 1 702 field->expr = expr;
609 state->step = 1; 703 field->name = expr->srctext;
610 case 1: 704 stmt->fields = ucx_list_append(stmt->fields, field);
611 if (fromkeyword || comma) {
612 // add possible identifier to list
613 if (state->expr_firsttoken) {
614 // TODO: skip comma in function call)
615 if (state->expr_len > 1) {
616 stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
617 stmt->errormessage = ucx_sprintf(_expected_token,
618 "AS", sfmtarg(tokendata)).ptr;
619 return 0;
620 } 705 }
621 706
622 DavQLExpression *expr = dav_parse_expression( 707 // TODO: parse "as"
623 stmt, state->expr_firsttoken, state->expr_len); 708 } else {
709 consumed = 0;
710 }
711 } while (consumed > 0);
712
713 return total_consumed;
714 }
715
716 // RULE: FieldExpression, {",", FieldExpression}
717 // TODO: make it so
718
719 return 0;
720 }
721
722 static int dav_parse_where_clause(DavQLStatement *stmt, UcxList *token) {
723 return 0;
724 }
725
726 static int dav_parse_with_clause(DavQLStatement *stmt, UcxList *token) {
727
728 int total_consumed = 0;
729
730 // RULE: "depth", "=", (Number | "infinity")
731 if (tokenvalue_is(token, "depth")) {
732 token = token->next; total_consumed++;
733 if (token_is(token, DAVQL_TOKEN_EQ)) {
734 token = token->next; total_consumed++;
735 if (tokenvalue_is(token, "infinity")) {
736 stmt->depth = DAV_DEPTH_INFINITY;
737 token = token->next; total_consumed++;
738 } else {
739 DavQLExpression *depthexpr = calloc(sizeof(DavQLExpression), 1);
624 740
625 if (expr->type != DAVQL_IDENTIFIER) { 741 int consumed = dav_parse_expression(stmt, token, depthexpr);
626 dav_free_expression(expr); 742
627 stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN; 743 if (consumed) {
628 stmt->errormessage = ucx_sprintf(_expected_token, 744 if (depthexpr->type == DAVQL_NUMBER) {
629 "AS", sfmtarg(tokendata)).ptr; 745 if (depthexpr->srctext.ptr[0] == '%') {
630 return 0; 746 stmt->depth = DAV_DEPTH_PLACEHOLDER;
631 } // TODO: do not allow identifier when wildcard is present 747 } else {
632 748 sstr_t depthstr = depthexpr->srctext;
633 DavQLField *field = malloc(sizeof(DavQLField)); 749 char *conv = malloc(depthstr.length+1);
634 field->expr = expr; 750 char *chk;
635 field->name = field->expr->srctext; 751 memcpy(conv, depthstr.ptr, depthstr.length);
636 stmt->fields = ucx_list_append(stmt->fields, field); 752 conv[depthstr.length] = '\0';
637 753 stmt->depth = strtol(conv, &chk, 10);
638 state->expr_firsttoken = NULL; 754 if (*chk || stmt->depth < -1) {
639 state->expr_len = 0; 755 dav_error_in_context(DAVQL_ERROR_INVALID_DEPTH,
640 756 _error_invalid_depth, stmt, token);
641 if (fromkeyword) { 757 }
642 return _step_FROM_; 758 free(conv);
643 } 759 }
644 } else { 760 total_consumed += consumed;
645 dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
646 _unexpected_token, stmt, token);
647 return 0;
648 }
649 } else if (!sstrcasecmp(tokendata, S("as"))) {
650 // TODO: return error, if expr_first_token is NULL
651 state->currentfield = malloc(sizeof(DavQLField));
652 state->currentfield->expr = dav_parse_expression(
653 stmt, state->expr_firsttoken, state->expr_len);
654
655 state->expr_firsttoken = NULL;
656 state->expr_len = 0;
657
658 state->step = 2;
659 } else {
660 // collect tokens for field expression
661 if (state->expr_firsttoken) {
662 state->expr_len++;
663 } else {
664 state->expr_firsttoken = token;
665 state->expr_len = 1;
666 }
667 }
668
669 return _step_fieldlist_;
670 case 2: {
671 DavQLExpression *expr = dav_parse_expression(stmt, token, 1);
672 if (expr->type == DAVQL_IDENTIFIER) {
673 state->currentfield->name = expr->srctext;
674 stmt->fields = ucx_list_append(stmt->fields, state->currentfield);
675 state->currentfield = NULL;
676 } else {
677 dav_free_field(state->currentfield);
678 dav_error_in_context(DAVQL_ERROR_IDENTIFIER_EXPECTED,
679 _identifier_expected, stmt, token);
680
681 }
682 dav_free_expression(expr);
683 state->step = 3;
684
685 return _step_fieldlist_;
686 }
687 case 3:
688 if (fromkeyword) {
689 return _step_FROM_;
690 } else if (comma) {
691 state->step = 1;
692 return _step_fieldlist_;
693 } else {
694 dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
695 _unexpected_token, stmt, token);
696 return 0;
697 }
698 case 4:
699 if (fromkeyword) {
700 return _step_FROM_;
701 } else {
702 stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
703 stmt->errormessage = ucx_sprintf(_expected_token,
704 "FROM", sfmtarg(tokendata)).ptr;
705 return 0;
706 }
707 default:
708 stmt->errorcode = DAVQL_ERROR_INVALID;
709 stmt->errormessage = strdup(_parser_state);
710 return 0;
711 }
712 }
713
714 static int dav_parse_from(DavQLStatement *stmt, UcxList *token) {
715 sstr_t tokendata = *token_sstr(token);
716
717 if (!sstrcasecmp(tokendata, S("with"))) {
718 return _step_WITH_;
719 } else if (!sstrcasecmp(tokendata, S("where"))) {
720 return _step_WHERE_;
721 } else if (!sstrcasecmp(tokendata, S("order"))) {
722 return _step_ORDER_BY_;
723 } else {
724 if (stmt->path.ptr) {
725 if (stmt->path.ptr[0] == '/') {
726 char *end = tokendata.ptr+tokendata.length;
727 stmt->path.length = end - stmt->path.ptr;
728 } else {
729 stmt->errorcode = DAVQL_ERROR_INVALID_PATH;
730 stmt->errormessage = strdup(_invalid_path);
731 }
732 } else {
733 if (tokendata.ptr[0] == '/' || !sstrcmp(tokendata, S("%s"))) {
734 stmt->path = tokendata;
735 } else {
736 stmt->errorcode = DAVQL_ERROR_INVALID_PATH;
737 stmt->errormessage = strdup(_invalid_path);
738 }
739 }
740 return _step_FROM_;
741 }
742 }
743
744 struct with_parser_state {
745 /*
746 * 0: key
747 * 1: =
748 * 2: value
749 * 3: comma or new clause or end
750 */
751 int step;
752 /*
753 * 1: depth
754 */
755 int key;
756 int keymask;
757 };
758
759 static int dav_parse_with_clause(DavQLStatement *stmt, UcxList *token,
760 struct with_parser_state *state) {
761 sstr_t tokendata = *token_sstr(token);
762
763 switch (state->step) {
764 case 0:
765 if (!sstrcasecmp(tokendata, S("depth"))) {
766 state->key = 1;
767 state->step = 1;
768 if (state->keymask & state->key) {
769 stmt->errorcode = DAVQL_ERROR_DUPLICATED_ATTRIBUTE;
770 stmt->errormessage = ucx_sprintf(_duplicated_attribute,
771 sfmtarg(tokendata)).ptr;
772 } else {
773 state->keymask |= state->key;
774 }
775 } else {
776 stmt->errorcode = DAVQL_ERROR_UNKNOWN_ATTRIBUTE;
777 stmt->errormessage = ucx_sprintf(_unknown_attribute,
778 sfmtarg(tokendata)).ptr;
779 }
780 return _step_WITH_; // continue parsing WITH clause
781 case 1:
782 if (sstrcmp(tokendata, S("="))) {
783 stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
784 stmt->errormessage = ucx_sprintf(_expected_token,
785 "=", sfmtarg(tokendata)).ptr;
786 } else {
787 state->step = 2;
788 }
789 return _step_WITH_; // continue parsing WITH clause
790 case 2:
791 switch (state->key) {
792 case 1: /* depth */
793 if (!sstrcasecmp(tokendata, S("infinity"))) {
794 stmt->depth = DAV_DEPTH_INFINITY;
795 } else {
796 DavQLExpression *depthexpr =
797 dav_parse_expression(stmt, token, 1);
798
799 if (depthexpr->type == DAVQL_NUMBER) {
800 if (depthexpr->srctext.ptr[0] == '%') {
801 stmt->depth = DAV_DEPTH_PLACEHOLDER;
802 } else { 761 } else {
803 sstr_t depthstr = depthexpr->srctext; 762 dav_error_in_context(DAVQL_ERROR_INVALID_DEPTH,
804 char *conv = malloc(depthstr.length+1); 763 _error_invalid_depth, stmt, token);
805 char *chk;
806 memcpy(conv, depthstr.ptr, depthstr.length);
807 conv[depthstr.length] = '\0';
808 stmt->depth = strtol(conv, &chk, 10);
809 if (*chk || stmt->depth < -1) {
810 stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH;
811 stmt->errormessage = strdup(_invalid_depth);
812 }
813 free(conv);
814 } 764 }
815 } else {
816 stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH;
817 stmt->errormessage = strdup(_invalid_depth);
818 } 765 }
819 766
820 dav_free_expression(depthexpr); 767 dav_free_expression(depthexpr);
821 } 768 }
822 break; 769 }
823 } 770 }
824 state->step = 3; 771
825 return _step_WITHopt_; // continue parsing WITH clause 772 return total_consumed;
826 case 3: 773 }
827 // a with clause may be continued with a comma 774
828 // or another clause may follow 775 static int dav_parse_orderby_clause(DavQLStatement *stmt, UcxList *token) {
829 if (!sstrcmp(tokendata, S(","))) { 776 return 0;
830 state->step = 0; // reset clause parser
831 return _step_WITH_;
832 } else if (!sstrcasecmp(tokendata, S("where"))) {
833 return _step_WHERE_;
834 } else if (!sstrcasecmp(tokendata, S("order"))) {
835 return _step_ORDER_BY_;
836 } else {
837 dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
838 _unexpected_token, stmt, token);
839 return 0;
840 }
841 default:
842 stmt->errorcode = DAVQL_ERROR_INVALID;
843 stmt->errormessage = strdup(_parser_state);
844 return 0;
845 }
846 }
847
848 struct orderby_parser_state {
849 /*
850 * 0: expect by keyword
851 * 1: expect identifier / number
852 * 2: expect asc / desc or comma
853 * 3: expect comma
854 */
855 int step;
856 DavQLOrderCriterion *crit;
857 };
858
859 static int dav_parse_orderby_clause(DavQLStatement *stmt, UcxList *token,
860 struct orderby_parser_state *state) {
861
862 sstr_t tokendata = *token_sstr(token);
863
864 switch (state->step) {
865 case 0:
866 if (!sstrcasecmp(tokendata, S("by"))) {
867 state->step++;
868 } else {
869 stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
870 stmt->errormessage = ucx_sprintf(_expected_by,
871 sfmtarg(tokendata)).ptr;
872 }
873 return _step_ORDER_BY_;
874 case 1:
875 state->crit = malloc(sizeof(DavQLOrderCriterion));
876 state->crit->column = dav_parse_expression(stmt, token, 1);
877 state->crit->descending = 0;
878
879 if (!state->crit->column || (
880 state->crit->column->type != DAVQL_NUMBER &&
881 state->crit->column->type != DAVQL_IDENTIFIER)) {
882 free(state->crit);
883 dav_error_in_context(DAVQL_ERROR_IDORNUM_EXPECTED,
884 _idornum_expected, stmt, token);
885 } else {
886 stmt->orderby = ucx_list_append(stmt->orderby, state->crit);
887 }
888
889 // continue parsing clause, if more tokens available
890 state->step++;
891 return _step_ORDER_BYopt_;
892 case 2:
893 if (!sstrcasecmp(tokendata, S("desc"))) {
894 state->crit->descending = 1;
895 } else if (!sstrcasecmp(tokendata, S("asc"))) {
896 state->crit->descending = 0;
897 } else if (!sstrcmp(tokendata, S(","))) {
898 state->step = 1; // reset clause parser
899 return _step_ORDER_BY_; // statement must not end now
900 } else {
901 dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
902 _unexpected_token, stmt, token);
903 return 0;
904 }
905 // continue parsing clause, if more tokens available
906 state++;
907 return _step_ORDER_BYopt_;
908 case 3:
909 if (!sstrcmp(tokendata, S(","))) {
910 state->step = 1; // reset clause parser
911 return _step_ORDER_BY_; // statement must not end now
912 } else {
913 dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
914 _unexpected_token, stmt, token);
915 return 0;
916 }
917 }
918
919 return _step_end_;
920 }
921
922 static void dav_free_order_criterion(DavQLOrderCriterion *crit) {
923 if (crit->column) { // do it null-safe though column is expected to be set
924 dav_free_expression(crit->column);
925 }
926 free(crit);
927 } 777 }
928 778
929 /** 779 /**
930 * Semantic analysis of a get statement. 780 * Semantic analysis of a get statement.
931 * @param stmt the statement to analyze. 781 * @param stmt the statement to analyze.
941 * @param tokens the token list 791 * @param tokens the token list
942 */ 792 */
943 static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) { 793 static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) {
944 stmt->type = DAVQL_GET; 794 stmt->type = DAVQL_GET;
945 795
946 int step = _step_fieldlist_; 796 // Consume field list
947 797 tokens = ucx_list_get(tokens, dav_parse_fieldlist(stmt, tokens));
948 struct with_parser_state state_with; 798
949 memset(&state_with, 0, sizeof(struct with_parser_state)); 799 // Consume from keyword
950 struct orderby_parser_state state_orderby; 800 if (token_is(tokens, DAVQL_TOKEN_KEYWORD)
951 memset(&state_orderby, 0, sizeof(struct orderby_parser_state)); 801 && tokenvalue_is(tokens, "from")) {
952 struct fieldlist_parser_state state_fieldlist; 802 tokens = tokens->next;
953 memset(&state_fieldlist, 0, sizeof(struct fieldlist_parser_state)); 803 } else {
954 804 dav_error_in_context(DAVQL_ERROR_MISSING_TOKEN,
955 // Process tokens 805 _error_missing_from, stmt, tokens);
956 UCX_FOREACH(token, tokens) { 806 return;
957 switch (step) { 807 }
958 // too much input data 808
959 case _step_end_: 809 // Consume path
810 if (token_is(tokens, DAVQL_TOKEN_STRING)) {
811 stmt->path = token_sstr(tokens);
812 tokens = tokens->next;
813 } else if (token_is(tokens, DAVQL_TOKEN_OPERATOR)
814 && tokenvalue_is(tokens, "/")) {
815 stmt->path.ptr = token_sstr(tokens).ptr;
816 tokens = tokens->next;
817 while (!token_is(tokens, DAVQL_TOKEN_KEYWORD)) {
818 sstr_t toksstr = token_sstr(tokens);
819 stmt->path.length = toksstr.ptr-stmt->path.ptr+toksstr.length;
820 tokens = tokens->next;
821 }
822 } else if (token_is(tokens, DAVQL_TOKEN_FMTSPEC)) {
823 // TODO: make it so
824 }
825
826 // Consume with clause (if any)
827 if (token_is(tokens, DAVQL_TOKEN_KEYWORD)
828 && tokenvalue_is(tokens, "with")) {
829 tokens = tokens->next;
830 tokens = ucx_list_get(tokens,
831 dav_parse_with_clause(stmt, tokens));
832 }
833 if (stmt->errorcode) {
834 return;
835 }
836
837 // Consume where clause (if any)
838 if (token_is(tokens, DAVQL_TOKEN_KEYWORD)
839 && tokenvalue_is(tokens, "where")) {
840 tokens = tokens->next;
841 tokens = ucx_list_get(tokens,
842 dav_parse_where_clause(stmt, tokens));
843 }
844 if (stmt->errorcode) {
845 return;
846 }
847
848 // Consume order by clause (if any)
849 if (token_is(tokens, DAVQL_TOKEN_KEYWORD)
850 && tokenvalue_is(tokens, "order")) {
851 tokens = tokens->next;
852 if (token_is(tokens, DAVQL_TOKEN_KEYWORD)
853 && tokenvalue_is(tokens, "by")) {
854 tokens = tokens->next;
855 tokens = ucx_list_get(tokens,
856 dav_parse_orderby_clause(stmt, tokens));
857 } else {
858 dav_error_in_context(DAVQL_ERROR_MISSING_TOKEN,
859 _error_missing_by, stmt, tokens);
860 return;
861 }
862 }
863 if (stmt->errorcode) {
864 return;
865 }
866
867
868 if (tokens) {
869 if (token_is(tokens, DAVQL_TOKEN_INVALID)) {
870 dav_error_in_context(DAVQL_ERROR_INVALID_TOKEN,
871 _error_invalid_token, stmt, tokens);
872 } else {
960 dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN, 873 dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
961 _unexpected_token, stmt, token); 874 _error_unexpected_token, stmt, tokens);
962 break; 875 }
963 // field list 876 } else {
964 case _step_fieldlist_: { 877 dav_analyze_get_statement(stmt);
965 step = dav_parse_fieldlist(stmt, token, &state_fieldlist);
966 break;
967 }
968 // from clause
969 case _step_FROM_: {
970 step = dav_parse_from(stmt, token);
971 break;
972 }
973 // with clause
974 case _step_WITH_:
975 case _step_WITHopt_: {
976 step = dav_parse_with_clause(stmt, token, &state_with);
977 break;
978 }
979 // where clause
980 case _step_WHERE_:
981 // TODO: implement
982 step = _step_end_;
983 break;
984 // order by clause
985 case _step_ORDER_BY_:
986 case _step_ORDER_BYopt_:
987 step = dav_parse_orderby_clause(stmt, token, &state_orderby);
988 break;
989 default:
990 stmt->errorcode = DAVQL_ERROR_INVALID;
991 stmt->errormessage = strdup(_parser_state);
992 }
993
994 // cancel processing, when an error has been detected
995 if (stmt->errorcode) {
996 break;
997 }
998 }
999
1000 if (!stmt->errorcode) {
1001 if (step < _step_end_) {
1002 stmt->errorcode = DAVQL_ERROR_UNEXPECTED_END;
1003 stmt->errormessage = strdup(_unexpected_end_msg);
1004 } else {
1005 dav_analyze_get_statement(stmt);
1006 }
1007 } 878 }
1008 } 879 }
1009 880
1010 static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) { 881 static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) {
1011 stmt->type = DAVQL_SET; 882 stmt->type = DAVQL_SET;
1012 883
1013 UCX_FOREACH(token, tokens) { 884 // TODO: make it so
1014 sstr_t tokendata = *token_sstr(token);
1015
1016 }
1017 } 885 }
1018 886
1019 DavQLStatement* dav_parse_statement(sstr_t srctext) { 887 DavQLStatement* dav_parse_statement(sstr_t srctext) {
1020 DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement)); 888 DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement));
1021 889
1029 // tokenization 897 // tokenization
1030 UcxList* tokens = dav_parse_tokenize(stmt->srctext); 898 UcxList* tokens = dav_parse_tokenize(stmt->srctext);
1031 899
1032 if (tokens) { 900 if (tokens) {
1033 // use first token to determine query type 901 // use first token to determine query type
1034 sstr_t token = *token_sstr(tokens);
1035 free(tokens->data);
1036 tokens = ucx_list_remove(tokens, tokens);
1037 902
1038 if (!sstrcasecmp(token, S("get"))) { 903 if (tokenvalue_is(tokens, "get")) {
1039 dav_parse_get_statement(stmt, tokens); 904 dav_parse_get_statement(stmt, tokens->next);
1040 } else if (!sstrcasecmp(token, S("set"))) { 905 } else if (tokenvalue_is(tokens, "set")) {
1041 dav_parse_set_statement(stmt, tokens); 906 dav_parse_set_statement(stmt, tokens->next);
1042 } else { 907 } else {
1043 stmt->type = DAVQL_ERROR; 908 stmt->type = DAVQL_ERROR;
1044 stmt->errorcode = DAVQL_ERROR_INVALID; 909 stmt->errorcode = DAVQL_ERROR_INVALID;
1045 stmt->errormessage = strdup(_invalid_msg); 910 stmt->errormessage = strdup(_error_invalid);
1046 } 911 }
1047 912
1048 // free token data 913 // free token data
1049 UCX_FOREACH(token, tokens) { 914 UCX_FOREACH(token, tokens) {
1050 free(token->data); 915 free(token->data);
1051 } 916 }
1052 ucx_list_free(tokens); 917 ucx_list_free(tokens);
1053 } else { 918 } else {
1054 stmt->type = DAVQL_ERROR; 919 stmt->type = DAVQL_ERROR;
1055 stmt->errorcode = DAVQL_ERROR_INVALID; 920 stmt->errorcode = DAVQL_ERROR_INVALID;
1056 stmt->errormessage = strdup(_invalid_msg); 921 stmt->errormessage = strdup(_error_invalid);
1057 } 922 }
1058 923
1059 return stmt; 924 return stmt;
1060 } 925 }
1061 926

mercurial