2015-04-17
special path parser + typed literals + order by parser + grammar changes
TODO: refactor static variables ...
libidav/davqlparser.c | file | annotate | diff | comparison | revisions | |
libidav/davqlparser.h | file | annotate | diff | comparison | revisions |
--- a/libidav/davqlparser.c Fri Apr 17 13:11:58 2015 +0200 +++ b/libidav/davqlparser.c Fri Apr 17 16:09:43 2015 +0200 @@ -49,7 +49,9 @@ static const char* _map_exprtype(davqlexprtype_t type) { switch(type) { - case DAVQL_LITERAL: return "LITERAL"; + case DAVQL_NUMBER: return "NUMBER"; + case DAVQL_STRING: return "STRING"; + case DAVQL_TIMESTAMP: return "TIMESTAMP"; case DAVQL_IDENTIFIER: return "IDENTIFIER"; case DAVQL_UNARY: return "UNARY"; case DAVQL_BINARY: return "BINARY"; @@ -118,15 +120,13 @@ } // WITH attributes - if (stmt->depth < 0) { + if (stmt->depth == DAV_DEPTH_INFINITY) { printf("Depth: infinity\n"); + } else if (stmt->depth == DAV_DEPTH_PLACEHOLDER) { + printf("Depth: placeholder\n"); } else { printf("Depth: %d\n", stmt->depth); } - if (stmt->errorcode) { - printf("\nError code: %d\nError: %s\n", - stmt->errorcode, stmt->errormessage); - } // order by clause printf("Order by: "); @@ -134,12 +134,18 @@ UCX_FOREACH(crit, stmt->orderby) { DavQLOrderCriterion *critdata = crit->data; printf("%.*s %s%s", sfmtarg(critdata->column->srctext), - critdata->ascending ? "asc" : "desc", + critdata->descending ? "desc" : "asc", crit->next ? ", " : "\n"); } } else { printf("nothing\n"); } + + // error messages + if (stmt->errorcode) { + printf("\nError code: %d\nError: %s\n", + stmt->errorcode, stmt->errormessage); + } } static int dav_debug_ql_expr_selected(DavQLExpression *expr) { @@ -260,13 +266,34 @@ #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)" #define _expected_token "expected token '%s' before '%.*s'" #define _expected_by "expected 'by' after 'order' (order [->]%.*s)" +#define _missing_fmtspec "format specifier missing (%.*s [->]%.*s %.*s)" +#define _invalid_fmtspec "invalid format specifier (%.*s [->]%.*s %.*s)" +#define _unknown_fmtspec "unknown format specifier (%.*s [->]%.*s %.*s)" #define _missing_quote "missing closing quote symbol (%.*s)" #define _parser_state "parser reached invalid state" #define _unknown_attribute "unknown attribute '%.*s'" #define _duplicated_attribute "duplicated attribute '%.*s'" #define _invalid_depth "invalid depth" -#define _identifier_expected "identifier expected, but found: %.*s" +#define _invalid_path "invalid path" + +#define _identifier_expected "identifier expected (%.*s [->]%.*s %.*s)" +#define _idornum_expected "identifier or number expected (%.*s [->]%.*s %.*s)" +#define _idorstr_expected "identifier or string expected (%.*s [->]%.*s %.*s)" +#define _idorts_expected "identifier or timestamp expected (%.*s [->]%.*s %.*s)" + +#define token_sstr(listelem) ((sstr_t*)(listelem)->data) +static void dav_error_in_context(int errorcode, const char *errormsg, + DavQLStatement *stmt, UcxList *token) { + sstr_t emptystring = ST(""); + stmt->errorcode = errorcode; + stmt->errormessage = ucx_sprintf(errormsg, + sfmtarg(token->prev?*token_sstr(token->prev):emptystring), + sfmtarg(*token_sstr(token)), + sfmtarg(token->next?*token_sstr(token->next):emptystring)).ptr; +} + +// special symbols are single tokens - the % sign MUST NOT be a special symbol static const char *special_token_symbols = ",()+-*/&|^~=!<>"; static UcxList* dav_parse_tokenize(sstr_t src) { @@ -336,7 +363,6 @@ return tokens; } -#define token_sstr(listelem) ((sstr_t*)(listelem)->data) static DavQLExpression* dav_parse_expression( DavQLStatement* stmt, UcxList* starttoken, size_t n) { if (n == 0) { @@ -353,8 +379,27 @@ expr->srctext.length = token_sstr(starttoken)->length; char firstchar = expr->srctext.ptr[0]; char lastchar = expr->srctext.ptr[expr->srctext.length-1]; - if (firstchar == '\'' || isdigit(firstchar)) { - expr->type = DAVQL_LITERAL; + if (firstchar == '\'') { + expr->type = DAVQL_STRING; + } else if (isdigit(firstchar)) { + expr->type = DAVQL_NUMBER; + } else if (firstchar == '%') { + if (expr->srctext.length == 1) { + dav_error_in_context(DAVQL_ERROR_MISSING_FMTSPEC, + _missing_fmtspec, stmt, starttoken); + } else if (expr->srctext.length == 2) { + switch (expr->srctext.ptr[1]) { + case 'd': expr->type = DAVQL_NUMBER; break; + case 's': expr->type = DAVQL_STRING; break; + case 't': expr->type = DAVQL_TIMESTAMP; break; + default: + dav_error_in_context(DAVQL_ERROR_UNKNOWN_FMTSPEC, + _unknown_fmtspec, stmt, starttoken); + } + } else { + dav_error_in_context(DAVQL_ERROR_INVALID_FMTSPEC, + _invalid_fmtspec, stmt, starttoken); + } } else { expr->type = DAVQL_IDENTIFIER; } @@ -414,29 +459,45 @@ } free(expr); } - -static void dav_parse_unexpected_token(DavQLStatement *stmt, UcxList *token) { - sstr_t emptystring = ST(""); - stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN; - sstr_t errormsg = ucx_sprintf(_unexpected_token, - sfmtarg(token->prev?*token_sstr(token->prev):emptystring), - sfmtarg(*token_sstr(token)), - sfmtarg(token->next?*token_sstr(token->next):emptystring)); - stmt->errormessage = errormsg.ptr; -} - #define _step_fieldlist_ 10 // field list #define _step_FROM_ 20 // FROM clause -#define _step_expect_WWO_ 530 // expecting WITH, WHERE or ORDER BY clause #define _step_WITH_ 30 // WITH clause -#define _step_expect_WO 540 // expecting WHERE or ORDER BY clause #define _step_WHERE_ 40 // WHERE clause -#define _step_expect_O 550 // expecting ORDER BY clause -#define _step_expect_BY 551 // expecting the BY token for the ORDER BY clause +#define _step_ORDER_BYopt_ 552 // expecting more ORDER BY details or end #define _step_ORDER_BY_ 50 // ORDER BY clause #define _step_end_ 500 // expect end +static int dav_parse_from(DavQLStatement *stmt, UcxList *token) { + sstr_t tokendata = *token_sstr(token); + + if (!sstrcasecmp(tokendata, S("with"))) { + return _step_WITH_; + } else if (!sstrcasecmp(tokendata, S("where"))) { + return _step_WHERE_; + } else if (!sstrcasecmp(tokendata, S("order"))) { + return _step_ORDER_BY_; + } else { + if (stmt->path.ptr) { + if (stmt->path.ptr[0] == '/') { + char *end = tokendata.ptr+tokendata.length; + stmt->path.length = end - stmt->path.ptr; + } else { + stmt->errorcode = DAVQL_ERROR_INVALID_PATH; + stmt->errormessage = strdup(_invalid_path); + } + } else { + if (tokendata.ptr[0] == '/' || !sstrcmp(tokendata, S("%s"))) { + stmt->path = tokendata; + } else { + stmt->errorcode = DAVQL_ERROR_INVALID_PATH; + stmt->errormessage = strdup(_invalid_path); + } + } + return _step_FROM_; + } +} + static int dav_parse_with_clause(DavQLStatement *stmt, UcxList *token) { sstr_t tokendata = *token_sstr(token); @@ -488,16 +549,31 @@ if (!sstrcasecmp(tokendata, S("infinity"))) { stmt->depth = DAV_DEPTH_INFINITY; } else { - char *conv = malloc(tokendata.length+1); - char *chk; - memcpy(conv, tokendata.ptr, tokendata.length); - conv[tokendata.length] = '\0'; - stmt->depth = strtol(conv, &chk, 10); - if (*chk || stmt->depth < -1) { + DavQLExpression *depthexpr = + dav_parse_expression(stmt, token, 1); + + if (depthexpr->type == DAVQL_NUMBER) { + if (depthexpr->srctext.ptr[0] == '%') { + stmt->depth = DAV_DEPTH_PLACEHOLDER; + } else { + sstr_t depthstr = depthexpr->srctext; + char *conv = malloc(depthstr.length+1); + char *chk; + memcpy(conv, depthstr.ptr, depthstr.length); + conv[depthstr.length] = '\0'; + stmt->depth = strtol(conv, &chk, 10); + if (*chk || stmt->depth < -1) { + stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH; + stmt->errormessage = strdup(_invalid_depth); + } + free(conv); + } + } else { stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH; stmt->errormessage = strdup(_invalid_depth); } - free(conv); + + dav_free_expression(depthexpr); } break; } @@ -512,9 +588,10 @@ } else if (!sstrcasecmp(tokendata, S("where"))) { return _step_WHERE_; } else if (!sstrcasecmp(tokendata, S("order"))) { - return _step_expect_BY; + return _step_ORDER_BY_; } else { - dav_parse_unexpected_token(stmt, token); + dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN, + _unexpected_token, stmt, token); return 0; } default: @@ -525,7 +602,71 @@ } static int dav_parse_orderby_clause(DavQLStatement *stmt, UcxList *token) { - // TODO: implement + + sstr_t tokendata = *token_sstr(token); + /* + * 0: expect by keyword + * 1: expect identifier / number + * 2: expect asc / desc or comma + * 3: expect comma + */ + static int state = 0; + static DavQLOrderCriterion *crit = NULL; + + switch (state) { + case 0: + if (!sstrcasecmp(tokendata, S("by"))) { + state++; + } else { + stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN; + stmt->errormessage = ucx_sprintf(_expected_by, + sfmtarg(tokendata)).ptr; + } + return _step_ORDER_BY_; + case 1: + crit = malloc(sizeof(DavQLOrderCriterion)); + crit->column = dav_parse_expression(stmt, token, 1); + crit->descending = 0; + + if (!crit->column || ( + crit->column->type != DAVQL_NUMBER && + crit->column->type != DAVQL_IDENTIFIER)) { + free(crit); + dav_error_in_context(DAVQL_ERROR_IDORNUM_EXPECTED, + _idornum_expected, stmt, token); + } else { + stmt->orderby = ucx_list_append(stmt->orderby, crit); + } + + // continue parsing clause, if more tokens available + state++; + return _step_ORDER_BYopt_; + case 2: + if (!sstrcasecmp(tokendata, S("desc"))) { + crit->descending = 1; + } else if (!sstrcasecmp(tokendata, S("asc"))) { + crit->descending = 0; + } else if (!sstrcmp(tokendata, S(","))) { + state = 1; // reset clause parser + return _step_ORDER_BY_; // statement must not end now + } else { + dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN, + _unexpected_token, stmt, token); + return 0; + } + // continue parsing clause, if more tokens available + state++; + return _step_ORDER_BYopt_; + case 3: + if (!sstrcmp(tokendata, S(","))) { + state = 1; // reset clause parser + return _step_ORDER_BY_; // statement must not end now + } else { + dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN, + _unexpected_token, stmt, token); + return 0; + } + } return _step_end_; } @@ -543,6 +684,7 @@ int step = _step_fieldlist_; // Variables for token sublists for expressions + // TODO: this is deprecated and won't work with function calls UcxList *exprstart = NULL; size_t exprlen = 0; @@ -553,37 +695,8 @@ switch (step) { // too much input data case _step_end_: - dav_parse_unexpected_token(stmt, token); - break; - // optional clauses - case _step_expect_WWO_: - if (!sstrcasecmp(tokendata, S("with"))) { - step = _step_WITH_; - continue; - } - /* no break and no else*/ - case _step_expect_WO: - if (!sstrcasecmp(tokendata, S("where"))) { - step = _step_WHERE_; - continue; - } - /* no break and no else*/ - case _step_expect_O: - if (!sstrcasecmp(tokendata, S("order"))) { - step = _step_expect_BY; - continue; - } else { // last possible clause checked and not present - dav_parse_unexpected_token(stmt, token); - } - break; - case _step_expect_BY: - if (!sstrcasecmp(tokendata, S("by"))) { - step = _step_ORDER_BY_; - } else { - stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN; - stmt->errormessage = ucx_sprintf(_expected_by, - sfmtarg(tokendata)).ptr; - } + dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN, + _unexpected_token, stmt, token); break; // field list case _step_fieldlist_: { @@ -599,7 +712,8 @@ step = _step_FROM_; } } else { - dav_parse_unexpected_token(stmt, token); + dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN, + _unexpected_token, stmt, token); } } else { // collect tokens for field expression @@ -614,16 +728,7 @@ } // from clause case _step_FROM_: { - DavQLExpression *expr = dav_parse_expression(stmt, token, 1); - stmt->path = expr->srctext; - int exprtype = expr->type; - dav_free_expression(expr); - if (exprtype != DAVQL_IDENTIFIER) { - stmt->errorcode = DAVQL_ERROR_IDENTIFIER_EXPECTED; - stmt->errormessage = ucx_sprintf(_identifier_expected, - sfmtarg(tokendata)).ptr; - } - step = _step_expect_WWO_; + step = dav_parse_from(stmt, token); break; } // with clause @@ -638,6 +743,7 @@ break; // order by clause case _step_ORDER_BY_: + case _step_ORDER_BYopt_: step = dav_parse_orderby_clause(stmt, token); break; default:
--- a/libidav/davqlparser.h Fri Apr 17 13:11:58 2015 +0200 +++ b/libidav/davqlparser.h Fri Apr 17 16:09:43 2015 +0200 @@ -46,7 +46,7 @@ * Enumeration of possible expression types. */ typedef enum { - DAVQL_LITERAL, DAVQL_IDENTIFIER, + DAVQL_NUMBER, DAVQL_STRING, DAVQL_TIMESTAMP, DAVQL_IDENTIFIER, DAVQL_UNARY, DAVQL_BINARY, DAVQL_LOGICAL, DAVQL_FUNCCALL } davqlexprtype_t; @@ -105,9 +105,10 @@ */ DavQLExpression *column; /** - * True, if the result shall be sorted ascending, false otherwise. + * True, if the result shall be sorted descending, false otherwise. + * Default is false (ascending). */ - _Bool ascending; + _Bool descending; } DavQLOrderCriterion; @@ -126,11 +127,12 @@ * FunctionCall = Identifier, "(", ArgumentList, ")"; * ArgumentList = Expression, {",", Expression}; * Identifier = IdentifierChar - ?Digit?, {IdentifierChar} - * | "`", ?Character?, {?Character?}, "`"; - * IdentifierChar = ?Character - (" "|",")?; - * Literal = Number | String; + * | "`", ?Character? - "`", {?Character? - "`"}, "`"; + * IdentifierChar = ?Character? - (" "|","); + * Literal = Number | String | Timestamp; * Number = ?Digit?, {?Digit?} | "%d"; - * String = "'", {?Character - "'"? | "'''"} , "'"; + * String = "'", {?Character? - "'" | "'''"} , "'" | "%s"; + * Timestamp = "%t"; // TODO: maybe introduce a real literal * * LogicalExpression = LogicalExpression, LogicalOperator, LogicalExpression * | "not ", LogicalExpression @@ -151,6 +153,10 @@ * SetExpressions = SetExpression, {",", SetExpressions}; * SetExpression = Identifier, "=", Expression; * + * Path = "%s" + * | "/", {?Character? - " "} + * | "'/", {?Character?}, "'"; + * * WithClause = "depth", "=", (Number | "infinity"); * * OrderByClause = OrderByCriterion, {",", OrderByCriterion}; @@ -165,7 +171,7 @@ * <b>GET:</b> * <pre> * GetStatement = "get ", FieldExpressions, - * " from ", Identifier, + * " from ", Path, * [" with ", WithClause], * [" where ", LogicalExpression], * [" order by ", OrderByClause]; @@ -174,7 +180,7 @@ * <b>SET:</b> * <pre> * "set ",SetExpressions, - * " at ", Identifier, + * " at ", Path, * [" with ", WithClause], * (" where ", LogicalExpression) | " anywhere"; * </pre> @@ -224,6 +230,8 @@ /** * The recursion depth for the statement. * Defaults to 1. + * Magic numbers are DAV_DEPTH_INFINITY for infinity and + * DAV_DEPTH_PLACEHOLDER for a placeholder. */ int depth; } DavQLStatement; @@ -231,9 +239,27 @@ /** Infinity recursion depth for a DavQLStatement. */ #define DAV_DEPTH_INFINITY -1 +/** Depth needs to be specified at runtime. */ +#define DAV_DEPTH_PLACEHOLDER -2 + +/** Invalid path. */ +#define DAVQL_ERROR_INVALID_PATH 1 + /** Expected an identifier, but found something else. */ #define DAVQL_ERROR_IDENTIFIER_EXPECTED 10 +/** Expected an identifier or literal, but found something else. */ +#define DAVQL_ERROR_IDORLIT_EXPECTED 11 + +/** Expected an identifier or number, but found something else. */ +#define DAVQL_ERROR_IDORNUM_EXPECTED 12 + +/** Expected an identifier or string, but found something else. */ +#define DAVQL_ERROR_IDORSTR_EXPECTED 13 + +/** Expected an identifier or timestamp, but found something else. */ +#define DAVQL_ERROR_IDORTS_EXPECTED 14 + /** The with-clause contains an unknown attribute. */ #define DAVQL_ERROR_UNKNOWN_ATTRIBUTE 20 @@ -243,6 +269,15 @@ /** The with-clause contains an attribute more than once. */ #define DAVQL_ERROR_DUPLICATED_ATTRIBUTE 29 +/** The format specifier is missing. */ +#define DAVQL_ERROR_MISSING_FMTSPEC 30 + +/** The format specifier is unknown. */ +#define DAVQL_ERROR_UNKNOWN_FMTSPEC 31 + +/** The format specifier is invalid. */ +#define DAVQL_ERROR_INVALID_FMTSPEC 39 + /** A quote symbol (' or `) is missing. */ #define DAVQL_ERROR_MISSING_QUOTE 50