Tue, 31 Mar 2015 13:00:17 +0200
renamed some enums to avoid collisions with macros + minor grammar change (backsticks for identifiers) + tokenizer + parser skeleton
libidav/davqlparser.c | file | annotate | diff | comparison | revisions | |
libidav/davqlparser.h | file | annotate | diff | comparison | revisions |
--- a/libidav/davqlparser.c Tue Mar 31 10:18:55 2015 +0200 +++ b/libidav/davqlparser.c Tue Mar 31 13:00:17 2015 +0200 @@ -29,25 +29,31 @@ #include "davqlparser.h" #include <string.h> #include <stdio.h> +#include <ctype.h> #define sfmtarg(s) ((int)(s).length), (s).ptr +// ------------------------------------------------------------------------ +// D E B U G E R +// ------------------------------------------------------------------------ + static const char* _map_querytype(davqltype_t type) { switch(type) { - case GET: return "GET"; - case SET: return "SET"; + case DAVQL_ERROR: return "ERROR"; + case DAVQL_GET: return "GET"; + case DAVQL_SET: return "SET"; default: return "unknown"; } } static const char* _map_exprtype(davqlexprtype_t type) { switch(type) { - case LITERAL: return "LITERAL"; - case IDENTIFIER: return "IDENTIFIER"; - case UNARY: return "UNARY"; - case BINARY: return "BINARY"; - case LOGICAL: return "LOGICAL"; - case FUNCCALL: return "FUNCCALL"; + case DAVQL_LITERAL: return "LITERAL"; + case DAVQL_IDENTIFIER: return "IDENTIFIER"; + case DAVQL_UNARY: return "UNARY"; + case DAVQL_BINARY: return "BINARY"; + case DAVQL_LOGICAL: return "LOGICAL"; + case DAVQL_FUNCCALL: return "FUNCCALL"; default: return "unknown"; } } @@ -67,9 +73,10 @@ } static void dav_debug_ql_stmt_print(DavQLStatement *stmt) { + sstr_t empty = ST("(empty)"); // Basic information - printf("Statement: %*s\nType: %s\nField count: %zu", + printf("Statement: %.*s\nType: %s\nField count: %zu", sfmtarg(stmt->srctext), _map_querytype(stmt->type), ucx_list_size(stmt->fields)); @@ -78,16 +85,16 @@ _Bool wildcard = 0; UCX_FOREACH(elm, stmt->fields) { DavQLExpression* expr = (DavQLExpression*)elm->data; - if (expr->type == IDENTIFIER && + if (expr->type == DAVQL_IDENTIFIER && expr->srctext.length == 1 && *(expr->srctext.ptr) == '*') { wildcard = 1; } } - printf(" %s wildcard\nPath: %*s\nHas where clause: %s\n", + printf(" %s wildcard\nPath: %.*s\nHas where clause: %s\n", wildcard?"with":"without", - sfmtarg(stmt->path.srctext), + sfmtarg(stmt->path ? stmt->path->srctext : empty), stmt->where ? "yes" : "no"); - if (stmt->type == SET) { + if (stmt->type == DAVQL_SET) { printf("Value list size matches: %s", ucx_list_size(stmt->fields) == ucx_list_size(stmt->setvalues) ? "yes" : "no"); @@ -112,10 +119,10 @@ static void dav_debug_ql_expr_print(DavQLExpression *expr) { if (dav_debug_ql_expr_selected(expr)) { - sstr_t empty = S("(empty)"); + sstr_t empty = ST("(empty)"); printf( - "Text: %*s\nType: %s\nOperator: %s\n" - "Left hand: %*s\nRight hand: %*s\n", + "Text: %.*s\nType: %s\nOperator: %s\n" + "Left hand: %.*s\nRight hand: %.*s\n", sfmtarg(expr->srctext), _map_exprtype(expr->type), _map_operator(expr->op), @@ -156,7 +163,7 @@ } } -void dav_debug_ql_statement(DavQLStatement *stmt) { +void dav_debug_statement(DavQLStatement *stmt) { if (!stmt) { fprintf(stderr, "Debug DavQLStatement failed: null pointer"); return; @@ -174,7 +181,7 @@ case DQLD_CMD_PS: dav_debug_ql_stmt_print(stmt); break; case DQLD_CMD_PE: dav_debug_ql_expr_print(examineexpr); break; case DQLD_CMD_P: - examineexpr = &(stmt->path); + examineexpr = stmt->path; dav_debug_ql_expr_print(examineexpr); break; case DQLD_CMD_L: @@ -213,15 +220,194 @@ } } +// ------------------------------------------------------------------------ +// P A R S E R +// ------------------------------------------------------------------------ + +static UcxList* dav_parse_tokenize(sstr_t src) { + UcxList *tokens = NULL; + + // Delimiters: whitespace and dead whitespace around commas + sstr_t *token = NULL; + for (size_t i = 0 ; i < src.length ; i++) { + if (isspace(src.ptr[i])) { + // add token before spaces to list (if any) + if (token) { + tokens = ucx_list_append(tokens, token); + token = NULL; + } + } else if (src.ptr[i] == ',') { + // add token before comma to list (if any) + if (token) { + tokens = ucx_list_append(tokens, token); + token = NULL; + } + // add comma as token to list + token = malloc(sizeof(sstr_t)); + token->ptr = src.ptr + i; + token->length = 1; + tokens = ucx_list_append(tokens, token); + // set tokenizer ready to read more tokens + token = NULL; + } else { + // if this is a new token, create memory for it + if (!token) { + token = malloc(sizeof(sstr_t)); + token->ptr = src.ptr + i; + token->length = 0; + } + // extend token length when reading more bytes + token->length++; + } + } + + if (token) { + tokens = ucx_list_append(tokens, token); + } + + return tokens; +} + +static DavQLExpression* dav_parse_expression(sstr_t src) { + DavQLExpression *expr = calloc(1, sizeof(DavQLExpression)); + expr->srctext = src; + + return expr; +} + +static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) { + stmt->type = DAVQL_GET; + + /* + * 10: field list + * 20: FROM clause + * 520: expecting WHERE or WITH clause + * 30: WHERE clause + * 530: expecting WITH clause + * 40: WITH clause + * 500: ready to quit + * + */ + int step = 10; + + UCX_FOREACH(token, tokens) { + sstr_t tokendata = *(sstr_t*)token->data; + + switch (step) { + // optional clauses + case 520: + if (!sstrcasecmp(tokendata, S("where"))) { + step = 30; + } + /* no break */ + case 530: + if (!sstrcasecmp(tokendata, S("with"))) { + step = 40; + } + break; + // field list + case 10: + if (!sstrcasecmp(tokendata, S("from"))) { + step = 20; + } else { + stmt->fields = ucx_list_append(stmt->fields, + dav_parse_expression(tokendata)); + } + break; + // from clause + case 20: + stmt->path = dav_parse_expression(tokendata); + step = 520; + break; + // where clause + case 30: + step = 530; + break; + // with clause + case 40: + step = 500; + break; + } + + free(token->data); + } + + if (step < 500) { + stmt->type = DAVQL_ERROR; + // TODO: save parse error message + } +} + +static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) { + stmt->type = DAVQL_SET; + + UCX_FOREACH(token, tokens) { + sstr_t tokendata = *(sstr_t*)token->data; + + // just free the tokens, until the function is implemented + + free(token->data); + } +} + DavQLStatement* dav_parse_statement(sstr_t srctext) { - DavQLStatement *stmt = malloc(sizeof(DavQLStatement)); + DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement)); // default values - memset(stmt, 0, sizeof(DavQLStatement)); - stmt->srctext = srctext; - stmt->type = stmt->path.type = stmt->path.op = -1; + stmt->type = -1; stmt->depth = SIZE_MAX; + // save trimmed source text + stmt->srctext = sstrtrim(srctext); + + // tokenization + UcxList* tokens = dav_parse_tokenize(stmt->srctext); + + // use first token to determine query type + if (tokens) { + sstr_t token = *(sstr_t*)tokens->data; + free(tokens->data); + tokens = ucx_list_remove(tokens, tokens); + if (!sstrcasecmp(token, S("get"))) { + dav_parse_get_statement(stmt, tokens); + } else if (!sstrcasecmp(token, S("set"))) { + dav_parse_set_statement(stmt, tokens); + } else { + stmt->type = DAVQL_ERROR; + } + ucx_list_free(tokens); + } else { + stmt->type = DAVQL_ERROR; + } return stmt; } + +static void dav_free_expression(DavQLExpression *expr) { + if (expr->left) { + dav_free_expression(expr->left); + } + if (expr->right) { + dav_free_expression(expr->right); + } + free(expr); +} + +void dav_free_statement(DavQLStatement *stmt) { + UCX_FOREACH(expr, stmt->fields) { + dav_free_expression(expr->data); + } + ucx_list_free(stmt->fields); + UCX_FOREACH(expr, stmt->setvalues) { + dav_free_expression(expr->data); + } + ucx_list_free(stmt->setvalues); + + if (stmt->path) { + dav_free_expression(stmt->path); + } + if (stmt->where) { + dav_free_expression(stmt->where); + } + free(stmt); +}
--- a/libidav/davqlparser.h Tue Mar 31 10:18:55 2015 +0200 +++ b/libidav/davqlparser.h Tue Mar 31 13:00:17 2015 +0200 @@ -40,14 +40,14 @@ /** * Enumeration of possible statement types. */ -typedef enum {GET, SET} davqltype_t; +typedef enum {DAVQL_ERROR, DAVQL_GET, DAVQL_SET} davqltype_t; /** * Enumeration of possible expression types. */ typedef enum { - LITERAL, IDENTIFIER, - UNARY, BINARY, LOGICAL, FUNCCALL + DAVQL_LITERAL, DAVQL_IDENTIFIER, + DAVQL_UNARY, DAVQL_BINARY, DAVQL_LOGICAL, DAVQL_FUNCCALL } davqlexprtype_t; /** @@ -104,15 +104,17 @@ * The grammar for a DavQLStatement is: * * <pre> - * Expression = Expression BinaryOperator Expression - * | UnaryOperator Expression + * Expression = Expression, BinaryOperator, Expression + * | UnaryOperator, Expression * | FunctionCall | Identifier | Literal * | "(", Expression, ")"; * - * FunctionCall = Identifier, "(", Expression, ")"; - * Identifier = ?Character?, {?Character?}; - * Literal = ?Digit?, {?Digit?} | String; - * String = "'", {?Character - "'"? | "'''"} , "'"; + * FunctionCall = Identifier, "(", Expression, ")"; + * Identifier = IdentifierChar, {IdentifierChar} + * | "`", ?Character?, {?Character?}, "`"; + * IdentifierChar = ?Character - (" "|",")?; + * Literal = ?Digit?, {?Digit?} | String; + * String = "'", {?Character - "'"? | "'''"} , "'"; * * LogicalExpression = LogicalExpression, LogicalOperator, LogicalExpression * | "not ", LogicalExpression @@ -178,7 +180,7 @@ /** * A DavQLExpression that denotes the queried path. */ - DavQLExpression path; + DavQLExpression* path; /** * Logical expression for selection. * <code>NULL</code>, if there is no where clause. @@ -197,7 +199,7 @@ * * @param stmt the statement to debug */ -void dav_debug_ql_statement(DavQLStatement *stmt); +void dav_debug_statement(DavQLStatement *stmt); /** * Parses a statement. @@ -211,6 +213,12 @@ */ #define dav_parse_cstr_statement(stmt) dav_parse_statement(S(stmt)) +/** + * Frees a DavQLStatement. + * @param stmt the statement object to free + */ +void dav_free_statement(DavQLStatement *stmt); + #ifdef __cplusplus } #endif