# HG changeset patch # User Mike Becker # Date 1427881358 -7200 # Node ID 7d20ce5d235b957829a8795d5998e9b22c272482 # Parent 0567444f2d76c32f7de9279eacc0a3ecf51f111f improved expression parser (can now detect literals and identifiers) - TODO: tokenizer must not split strings and "backstick-identifiers" diff -r 0567444f2d76 -r 7d20ce5d235b libidav/davqlparser.c --- a/libidav/davqlparser.c Tue Mar 31 13:00:17 2015 +0200 +++ b/libidav/davqlparser.c Wed Apr 01 11:42:38 2015 +0200 @@ -61,13 +61,17 @@ static const char* _map_operator(davqloperator_t op) { // don't use string array, because enum values may change switch(op) { - case ADD: return "+"; case SUB: return "-"; case MUL: return "*"; - case DIV: return "/"; case AND: return "&"; case OR: return "|"; - case XOR: return "^"; case NEG: return "~"; case NOT: return "NOT"; - case LAND: return "AND"; case LOR: return "OR"; case LXOR: return "XOR"; - case EQ: return "="; case NEQ: return "!="; case LT: return "<"; - case GT: return ">"; case LE: return "<="; case GE: return ">="; - case LIKE: return "LIKE"; case UNLIKE: return "UNLIKE"; + case DAVQL_NOOP: return "no operator"; + case DAVQL_ADD: return "+"; case DAVQL_SUB: return "-"; + case DAVQL_MUL: return "*"; case DAVQL_DIV: return "/"; + case DAVQL_AND: return "&"; case DAVQL_OR: return "|"; + case DAVQL_XOR: return "^"; case DAVQL_NEG: return "~"; + case DAVQL_NOT: return "NOT"; case DAVQL_LAND: return "AND"; + case DAVQL_LOR: return "OR"; case DAVQL_LXOR: return "XOR"; + case DAVQL_EQ: return "="; case DAVQL_NEQ: return "!="; + case DAVQL_LT: return "<"; case DAVQL_GT: return ">"; + case DAVQL_LE: return "<="; case DAVQL_GE: return ">="; + case DAVQL_LIKE: return "LIKE"; case DAVQL_UNLIKE: return "UNLIKE"; default: return "unknown"; } } @@ -86,7 +90,7 @@ UCX_FOREACH(elm, stmt->fields) { DavQLExpression* expr = (DavQLExpression*)elm->data; if (expr->type == DAVQL_IDENTIFIER && - expr->srctext.length == 1 && *(expr->srctext.ptr) == '*') { + expr->srctext.length == 1 && expr->srctext.ptr[0] == '*') { wildcard = 1; } } @@ -265,12 +269,61 @@ tokens = ucx_list_append(tokens, token); } + // now find quotes and backsticks and merge enclosed tokens + // TODO: make it so or disable tokenization in such cases in above code + return tokens; } -static DavQLExpression* dav_parse_expression(sstr_t src) { +#define token_sstr(listelem) ((sstr_t*)(listelem)->data) +static DavQLExpression* dav_parse_expression(UcxList* starttoken, size_t n) { + if (n == 0) { + return NULL; + } + DavQLExpression *expr = calloc(1, sizeof(DavQLExpression)); - expr->srctext = src; + + // set pointer for source text + expr->srctext.ptr = token_sstr(starttoken)->ptr; + + // special case - only one token + if (n == 1) { + expr->srctext.length = token_sstr(starttoken)->length; + char firstchar = expr->srctext.ptr[0]; + if (firstchar == '\'' || isdigit(firstchar)) { + expr->type = DAVQL_LITERAL; + } else { + expr->type = DAVQL_IDENTIFIER; + } + } else { + UcxList* token = starttoken; + + // check, if first token is ( + // if so, verify that last token is ) and throw both away + if (!sstrcmp(*token_sstr(token), S("("))) { + if (!sstrcmp(*token_sstr(ucx_list_get(token, n-1)), S(")"))) { + token = token->next; + n -= 2; + } else { + // TODO: throw syntax error + } + } + + // process tokens + for (size_t i = 0 ; i < n ; i++) { + + // TODO: make it so + + // go to next token (if this is not the last token) + if (i < n-1) { + token = token->next; + } + } + + // compute length of source text (including delimiters) + expr->srctext.length = token_sstr(token)->ptr + + token_sstr(token)->length - expr->srctext.ptr; + } return expr; } @@ -290,8 +343,13 @@ */ int step = 10; + // Variables for token sublists for expressions + UcxList *exprstart = NULL; + size_t exprlen = 0; + + // Process tokens UCX_FOREACH(token, tokens) { - sstr_t tokendata = *(sstr_t*)token->data; + sstr_t tokendata = *token_sstr(token); switch (step) { // optional clauses @@ -306,17 +364,35 @@ } break; // field list - case 10: - if (!sstrcasecmp(tokendata, S("from"))) { - step = 20; + case 10: { + _Bool fromkeyword = !sstrcasecmp(tokendata, S("from")); + if (fromkeyword || !sstrcmp(tokendata, S(","))) { + if (exprstart) { + stmt->fields = ucx_list_append(stmt->fields, + dav_parse_expression(exprstart, exprlen)); + exprstart = NULL; + exprlen = 0; + } else { + // TODO: throw syntax error + } + + if (fromkeyword) { + step = 20; + } } else { - stmt->fields = ucx_list_append(stmt->fields, - dav_parse_expression(tokendata)); + // collect tokens for field expression + if (exprstart) { + exprlen++; + } else { + exprstart = token; + exprlen = 1; + } } break; + } // from clause case 20: - stmt->path = dav_parse_expression(tokendata); + stmt->path = dav_parse_expression(token, 1); step = 520; break; // where clause @@ -328,8 +404,6 @@ step = 500; break; } - - free(token->data); } if (step < 500) { @@ -342,11 +416,8 @@ stmt->type = DAVQL_SET; UCX_FOREACH(token, tokens) { - sstr_t tokendata = *(sstr_t*)token->data; + sstr_t tokendata = *token_sstr(token); - // just free the tokens, until the function is implemented - - free(token->data); } } @@ -363,11 +434,12 @@ // tokenization UcxList* tokens = dav_parse_tokenize(stmt->srctext); - // use first token to determine query type if (tokens) { - sstr_t token = *(sstr_t*)tokens->data; + // use first token to determine query type + sstr_t token = *token_sstr(tokens); free(tokens->data); tokens = ucx_list_remove(tokens, tokens); + if (!sstrcasecmp(token, S("get"))) { dav_parse_get_statement(stmt, tokens); } else if (!sstrcasecmp(token, S("set"))) { @@ -375,6 +447,11 @@ } else { stmt->type = DAVQL_ERROR; } + + // free token data + UCX_FOREACH(token, tokens) { + free(token->data); + } ucx_list_free(tokens); } else { stmt->type = DAVQL_ERROR; diff -r 0567444f2d76 -r 7d20ce5d235b libidav/davqlparser.h --- a/libidav/davqlparser.h Tue Mar 31 13:00:17 2015 +0200 +++ b/libidav/davqlparser.h Wed Apr 01 11:42:38 2015 +0200 @@ -54,11 +54,12 @@ * Enumeration of possible expression operators. */ typedef enum { - ADD, SUB, MUL, DIV, - AND, OR, XOR, NEG, - NOT, LAND, LOR, LXOR, - EQ, NEQ, LT, GT, LE, GE, - LIKE, UNLIKE + DAVQL_NOOP, + DAVQL_ADD, DAVQL_SUB, DAVQL_MUL, DAVQL_DIV, + DAVQL_AND, DAVQL_OR, DAVQL_XOR, DAVQL_NEG, + DAVQL_NOT, DAVQL_LAND, DAVQL_LOR, DAVQL_LXOR, + DAVQL_EQ, DAVQL_NEQ, DAVQL_LT, DAVQL_GT, DAVQL_LE, DAVQL_GE, + DAVQL_LIKE, DAVQL_UNLIKE } davqloperator_t; /** @@ -110,7 +111,7 @@ * | "(", Expression, ")"; * * FunctionCall = Identifier, "(", Expression, ")"; - * Identifier = IdentifierChar, {IdentifierChar} + * Identifier = IdentifierChar - ?Digit?, {IdentifierChar} * | "`", ?Character?, {?Character?}, "`"; * IdentifierChar = ?Character - (" "|",")?; * Literal = ?Digit?, {?Digit?} | String;