# HG changeset patch # User Mike Becker # Date 1427799617 -7200 # Node ID 0567444f2d76c32f7de9279eacc0a3ecf51f111f # Parent 8e186185422ca65f8c83f5a3c3b4284591053e22 renamed some enums to avoid collisions with macros + minor grammar change (backsticks for identifiers) + tokenizer + parser skeleton diff -r 8e186185422c -r 0567444f2d76 libidav/davqlparser.c --- a/libidav/davqlparser.c Tue Mar 31 10:18:55 2015 +0200 +++ b/libidav/davqlparser.c Tue Mar 31 13:00:17 2015 +0200 @@ -29,25 +29,31 @@ #include "davqlparser.h" #include #include +#include #define sfmtarg(s) ((int)(s).length), (s).ptr +// ------------------------------------------------------------------------ +// D E B U G E R +// ------------------------------------------------------------------------ + static const char* _map_querytype(davqltype_t type) { switch(type) { - case GET: return "GET"; - case SET: return "SET"; + case DAVQL_ERROR: return "ERROR"; + case DAVQL_GET: return "GET"; + case DAVQL_SET: return "SET"; default: return "unknown"; } } static const char* _map_exprtype(davqlexprtype_t type) { switch(type) { - case LITERAL: return "LITERAL"; - case IDENTIFIER: return "IDENTIFIER"; - case UNARY: return "UNARY"; - case BINARY: return "BINARY"; - case LOGICAL: return "LOGICAL"; - case FUNCCALL: return "FUNCCALL"; + case DAVQL_LITERAL: return "LITERAL"; + case DAVQL_IDENTIFIER: return "IDENTIFIER"; + case DAVQL_UNARY: return "UNARY"; + case DAVQL_BINARY: return "BINARY"; + case DAVQL_LOGICAL: return "LOGICAL"; + case DAVQL_FUNCCALL: return "FUNCCALL"; default: return "unknown"; } } @@ -67,9 +73,10 @@ } static void dav_debug_ql_stmt_print(DavQLStatement *stmt) { + sstr_t empty = ST("(empty)"); // Basic information - printf("Statement: %*s\nType: %s\nField count: %zu", + printf("Statement: %.*s\nType: %s\nField count: %zu", sfmtarg(stmt->srctext), _map_querytype(stmt->type), ucx_list_size(stmt->fields)); @@ -78,16 +85,16 @@ _Bool wildcard = 0; UCX_FOREACH(elm, stmt->fields) { DavQLExpression* expr = (DavQLExpression*)elm->data; - if (expr->type == IDENTIFIER && + if (expr->type == DAVQL_IDENTIFIER && expr->srctext.length == 1 && *(expr->srctext.ptr) == '*') { wildcard = 1; } } - printf(" %s wildcard\nPath: %*s\nHas where clause: %s\n", + printf(" %s wildcard\nPath: %.*s\nHas where clause: %s\n", wildcard?"with":"without", - sfmtarg(stmt->path.srctext), + sfmtarg(stmt->path ? stmt->path->srctext : empty), stmt->where ? "yes" : "no"); - if (stmt->type == SET) { + if (stmt->type == DAVQL_SET) { printf("Value list size matches: %s", ucx_list_size(stmt->fields) == ucx_list_size(stmt->setvalues) ? "yes" : "no"); @@ -112,10 +119,10 @@ static void dav_debug_ql_expr_print(DavQLExpression *expr) { if (dav_debug_ql_expr_selected(expr)) { - sstr_t empty = S("(empty)"); + sstr_t empty = ST("(empty)"); printf( - "Text: %*s\nType: %s\nOperator: %s\n" - "Left hand: %*s\nRight hand: %*s\n", + "Text: %.*s\nType: %s\nOperator: %s\n" + "Left hand: %.*s\nRight hand: %.*s\n", sfmtarg(expr->srctext), _map_exprtype(expr->type), _map_operator(expr->op), @@ -156,7 +163,7 @@ } } -void dav_debug_ql_statement(DavQLStatement *stmt) { +void dav_debug_statement(DavQLStatement *stmt) { if (!stmt) { fprintf(stderr, "Debug DavQLStatement failed: null pointer"); return; @@ -174,7 +181,7 @@ case DQLD_CMD_PS: dav_debug_ql_stmt_print(stmt); break; case DQLD_CMD_PE: dav_debug_ql_expr_print(examineexpr); break; case DQLD_CMD_P: - examineexpr = &(stmt->path); + examineexpr = stmt->path; dav_debug_ql_expr_print(examineexpr); break; case DQLD_CMD_L: @@ -213,15 +220,194 @@ } } +// ------------------------------------------------------------------------ +// P A R S E R +// ------------------------------------------------------------------------ + +static UcxList* dav_parse_tokenize(sstr_t src) { + UcxList *tokens = NULL; + + // Delimiters: whitespace and dead whitespace around commas + sstr_t *token = NULL; + for (size_t i = 0 ; i < src.length ; i++) { + if (isspace(src.ptr[i])) { + // add token before spaces to list (if any) + if (token) { + tokens = ucx_list_append(tokens, token); + token = NULL; + } + } else if (src.ptr[i] == ',') { + // add token before comma to list (if any) + if (token) { + tokens = ucx_list_append(tokens, token); + token = NULL; + } + // add comma as token to list + token = malloc(sizeof(sstr_t)); + token->ptr = src.ptr + i; + token->length = 1; + tokens = ucx_list_append(tokens, token); + // set tokenizer ready to read more tokens + token = NULL; + } else { + // if this is a new token, create memory for it + if (!token) { + token = malloc(sizeof(sstr_t)); + token->ptr = src.ptr + i; + token->length = 0; + } + // extend token length when reading more bytes + token->length++; + } + } + + if (token) { + tokens = ucx_list_append(tokens, token); + } + + return tokens; +} + +static DavQLExpression* dav_parse_expression(sstr_t src) { + DavQLExpression *expr = calloc(1, sizeof(DavQLExpression)); + expr->srctext = src; + + return expr; +} + +static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) { + stmt->type = DAVQL_GET; + + /* + * 10: field list + * 20: FROM clause + * 520: expecting WHERE or WITH clause + * 30: WHERE clause + * 530: expecting WITH clause + * 40: WITH clause + * 500: ready to quit + * + */ + int step = 10; + + UCX_FOREACH(token, tokens) { + sstr_t tokendata = *(sstr_t*)token->data; + + switch (step) { + // optional clauses + case 520: + if (!sstrcasecmp(tokendata, S("where"))) { + step = 30; + } + /* no break */ + case 530: + if (!sstrcasecmp(tokendata, S("with"))) { + step = 40; + } + break; + // field list + case 10: + if (!sstrcasecmp(tokendata, S("from"))) { + step = 20; + } else { + stmt->fields = ucx_list_append(stmt->fields, + dav_parse_expression(tokendata)); + } + break; + // from clause + case 20: + stmt->path = dav_parse_expression(tokendata); + step = 520; + break; + // where clause + case 30: + step = 530; + break; + // with clause + case 40: + step = 500; + break; + } + + free(token->data); + } + + if (step < 500) { + stmt->type = DAVQL_ERROR; + // TODO: save parse error message + } +} + +static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) { + stmt->type = DAVQL_SET; + + UCX_FOREACH(token, tokens) { + sstr_t tokendata = *(sstr_t*)token->data; + + // just free the tokens, until the function is implemented + + free(token->data); + } +} + DavQLStatement* dav_parse_statement(sstr_t srctext) { - DavQLStatement *stmt = malloc(sizeof(DavQLStatement)); + DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement)); // default values - memset(stmt, 0, sizeof(DavQLStatement)); - stmt->srctext = srctext; - stmt->type = stmt->path.type = stmt->path.op = -1; + stmt->type = -1; stmt->depth = SIZE_MAX; + // save trimmed source text + stmt->srctext = sstrtrim(srctext); + + // tokenization + UcxList* tokens = dav_parse_tokenize(stmt->srctext); + + // use first token to determine query type + if (tokens) { + sstr_t token = *(sstr_t*)tokens->data; + free(tokens->data); + tokens = ucx_list_remove(tokens, tokens); + if (!sstrcasecmp(token, S("get"))) { + dav_parse_get_statement(stmt, tokens); + } else if (!sstrcasecmp(token, S("set"))) { + dav_parse_set_statement(stmt, tokens); + } else { + stmt->type = DAVQL_ERROR; + } + ucx_list_free(tokens); + } else { + stmt->type = DAVQL_ERROR; + } return stmt; } + +static void dav_free_expression(DavQLExpression *expr) { + if (expr->left) { + dav_free_expression(expr->left); + } + if (expr->right) { + dav_free_expression(expr->right); + } + free(expr); +} + +void dav_free_statement(DavQLStatement *stmt) { + UCX_FOREACH(expr, stmt->fields) { + dav_free_expression(expr->data); + } + ucx_list_free(stmt->fields); + UCX_FOREACH(expr, stmt->setvalues) { + dav_free_expression(expr->data); + } + ucx_list_free(stmt->setvalues); + + if (stmt->path) { + dav_free_expression(stmt->path); + } + if (stmt->where) { + dav_free_expression(stmt->where); + } + free(stmt); +} diff -r 8e186185422c -r 0567444f2d76 libidav/davqlparser.h --- a/libidav/davqlparser.h Tue Mar 31 10:18:55 2015 +0200 +++ b/libidav/davqlparser.h Tue Mar 31 13:00:17 2015 +0200 @@ -40,14 +40,14 @@ /** * Enumeration of possible statement types. */ -typedef enum {GET, SET} davqltype_t; +typedef enum {DAVQL_ERROR, DAVQL_GET, DAVQL_SET} davqltype_t; /** * Enumeration of possible expression types. */ typedef enum { - LITERAL, IDENTIFIER, - UNARY, BINARY, LOGICAL, FUNCCALL + DAVQL_LITERAL, DAVQL_IDENTIFIER, + DAVQL_UNARY, DAVQL_BINARY, DAVQL_LOGICAL, DAVQL_FUNCCALL } davqlexprtype_t; /** @@ -104,15 +104,17 @@ * The grammar for a DavQLStatement is: * *
- * Expression = Expression BinaryOperator Expression
- *            | UnaryOperator Expression
+ * Expression = Expression, BinaryOperator, Expression
+ *            | UnaryOperator, Expression
  *            | FunctionCall | Identifier | Literal
  *            | "(", Expression, ")";
  * 
- * FunctionCall = Identifier, "(", Expression, ")";
- * Identifier   = ?Character?, {?Character?};
- * Literal      = ?Digit?, {?Digit?} | String;
- * String       = "'", {?Character - "'"? | "'''"} , "'";
+ * FunctionCall    = Identifier, "(", Expression, ")";
+ * Identifier      = IdentifierChar, {IdentifierChar}
+ *                 | "`", ?Character?, {?Character?}, "`";
+ * IdentifierChar  = ?Character - (" "|",")?;
+ * Literal         = ?Digit?, {?Digit?} | String;
+ * String          = "'", {?Character - "'"? | "'''"} , "'";
  * 
  * LogicalExpression = LogicalExpression, LogicalOperator, LogicalExpression
  *                   | "not ", LogicalExpression
@@ -178,7 +180,7 @@
     /**
      * A DavQLExpression that denotes the queried path.
      */
-    DavQLExpression path;
+    DavQLExpression* path;
     /**
      * Logical expression for selection.
      * NULL, if there is no where clause.
@@ -197,7 +199,7 @@
  * 
  * @param stmt the statement to debug
  */
-void dav_debug_ql_statement(DavQLStatement *stmt);
+void dav_debug_statement(DavQLStatement *stmt);
 
 /**
  * Parses a statement.
@@ -211,6 +213,12 @@
  */
 #define dav_parse_cstr_statement(stmt) dav_parse_statement(S(stmt))
 
+/**
+ * Frees a DavQLStatement.
+ * @param stmt the statement object to free
+ */
+void dav_free_statement(DavQLStatement *stmt);
+
 #ifdef	__cplusplus
 }
 #endif