# HG changeset patch # User Mike Becker # Date 1428172623 -7200 # Node ID 4d6b03bd703400f5c0ff31452a67cc7133750389 # Parent ed21d95984bb3a86639cd2615fc569faccd69279 tokenizer now correctly handles quoted tokens diff -r ed21d95984bb -r 4d6b03bd7034 libidav/davqlparser.c --- a/libidav/davqlparser.c Sat Apr 04 19:45:58 2015 +0200 +++ b/libidav/davqlparser.c Sat Apr 04 20:37:03 2015 +0200 @@ -245,14 +245,39 @@ #define _unexpected_end_msg "unexpected end of statement" #define _invalid_msg "invalid statement" #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)" +#define _missing_quote "missing closing quote symbol (%.*s)" static UcxList* dav_parse_tokenize(sstr_t src) { UcxList *tokens = NULL; // Delimiters: whitespace and dead whitespace around commas sstr_t *token = NULL; + char insequence = '\0'; for (size_t i = 0 ; i < src.length ; i++) { - if (isspace(src.ptr[i])) { + // quoted strings / identifiers are a single token + if (src.ptr[i] == '\'' || src.ptr[i] == '`') { + if (src.ptr[i] == insequence) { + // add quoted token to list + token->length++; + tokens = ucx_list_append(tokens, token); + token = NULL; + insequence = '\0'; + } else if (insequence == '\0') { + insequence = src.ptr[i]; + // always create new token for quoted strings + if (token) { + tokens = ucx_list_append(tokens, token); + } + token = malloc(sizeof(sstr_t)); + token->ptr = src.ptr + i; + token->length = 1; + } else { + // add other kind of quotes to token + token->length++; + } + } else if (insequence) { + token->length++; + } else if (isspace(src.ptr[i])) { // add token before spaces to list (if any) if (token) { tokens = ucx_list_append(tokens, token); @@ -287,14 +312,12 @@ tokens = ucx_list_append(tokens, token); } - // now find quotes and backsticks and merge enclosed tokens - // TODO: make it so or disable tokenization in such cases in above code - return tokens; } #define token_sstr(listelem) ((sstr_t*)(listelem)->data) -static DavQLExpression* dav_parse_expression(UcxList* starttoken, size_t n) { +static DavQLExpression* dav_parse_expression( + DavQLStatement* stmt, UcxList* starttoken, size_t n) { if (n == 0) { return NULL; } @@ -308,11 +331,26 @@ if (n == 1) { expr->srctext.length = token_sstr(starttoken)->length; char firstchar = expr->srctext.ptr[0]; + char lastchar = expr->srctext.ptr[expr->srctext.length-1]; if (firstchar == '\'' || isdigit(firstchar)) { expr->type = DAVQL_LITERAL; } else { expr->type = DAVQL_IDENTIFIER; } + // remove quotes (if any) + if (firstchar == '\'' || firstchar == '`') { + if (lastchar != firstchar) { + stmt->errorcode = DAVQL_ERROR_MISSING_QUOTE; + stmt->errormessage = + ucx_sprintf(_missing_quote, sfmtarg(expr->srctext)).ptr; + } + expr->srctext.ptr++; + if (expr->srctext.length > 2) { + expr->srctext.length -= 2; + } else { + expr->srctext.length = 0; + } + } } else { UcxList* token = starttoken; @@ -388,6 +426,10 @@ // Process tokens UCX_FOREACH(token, tokens) { + if (stmt->errorcode) { + ultrabreak: break; + } + sstr_t tokendata = *token_sstr(token); switch (step) { @@ -402,7 +444,7 @@ step = 40; } else { dav_parse_unexpected_token(stmt, token); - step = 999; + goto ultrabreak; } break; // field list @@ -411,7 +453,7 @@ if (fromkeyword || !sstrcmp(tokendata, S(","))) { if (exprstart) { stmt->fields = ucx_list_append(stmt->fields, - dav_parse_expression(exprstart, exprlen)); + dav_parse_expression(stmt, exprstart, exprlen)); exprstart = NULL; exprlen = 0; } else { @@ -434,7 +476,7 @@ } // from clause case 20: { - DavQLExpression *expr = dav_parse_expression(token, 1); + DavQLExpression *expr = dav_parse_expression(stmt, token, 1); stmt->path = expr->srctext; dav_free_expression(expr); step = 520; diff -r ed21d95984bb -r 4d6b03bd7034 libidav/davqlparser.h --- a/libidav/davqlparser.h Sat Apr 04 19:45:58 2015 +0200 +++ b/libidav/davqlparser.h Sat Apr 04 20:37:03 2015 +0200 @@ -207,11 +207,14 @@ /** Infinity recursion depth for a DavQLStatement. */ #define DAV_DEPTH_INFINITY -1 +/** A quote symbol (' or `) is missing. */ +#define DAVQL_ERROR_MISSING_QUOTE 50 + /** No more tokens to parse, but the parser expected more. */ -#define DAVQL_ERROR_UNEXPECTED_END 1000 +#define DAVQL_ERROR_UNEXPECTED_END 100 /** A token was found, which has not been expected. */ -#define DAVQL_ERROR_UNEXPECTED_TOKEN 1010 +#define DAVQL_ERROR_UNEXPECTED_TOKEN 101 /** Nothing about the statement seems legit. */ #define DAVQL_ERROR_INVALID -1