renamed some enums to avoid collisions with macros + minor grammar change (backsticks for identifiers) + tokenizer + parser skeleton

Tue, 31 Mar 2015 13:00:17 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 31 Mar 2015 13:00:17 +0200
changeset 82
0567444f2d76
parent 81
8e186185422c
child 83
7d20ce5d235b

renamed some enums to avoid collisions with macros + minor grammar change (backsticks for identifiers) + tokenizer + parser skeleton

libidav/davqlparser.c file | annotate | diff | comparison | revisions
libidav/davqlparser.h file | annotate | diff | comparison | revisions
--- a/libidav/davqlparser.c	Tue Mar 31 10:18:55 2015 +0200
+++ b/libidav/davqlparser.c	Tue Mar 31 13:00:17 2015 +0200
@@ -29,25 +29,31 @@
 #include "davqlparser.h"
 #include <string.h>
 #include <stdio.h>
+#include <ctype.h>
 
 #define sfmtarg(s) ((int)(s).length), (s).ptr
 
+// ------------------------------------------------------------------------
+//                        D E B U G E R
+// ------------------------------------------------------------------------
+
 static const char* _map_querytype(davqltype_t type) {
     switch(type) {
-    case GET: return "GET";
-    case SET: return "SET";
+    case DAVQL_ERROR: return "ERROR";
+    case DAVQL_GET: return "GET";
+    case DAVQL_SET: return "SET";
     default: return "unknown";
     }
 }
 
 static const char* _map_exprtype(davqlexprtype_t type) {
     switch(type) {
-    case LITERAL: return "LITERAL";
-    case IDENTIFIER: return "IDENTIFIER";
-    case UNARY: return "UNARY";
-    case BINARY: return "BINARY";
-    case LOGICAL: return "LOGICAL";
-    case FUNCCALL: return "FUNCCALL";
+    case DAVQL_LITERAL: return "LITERAL";
+    case DAVQL_IDENTIFIER: return "IDENTIFIER";
+    case DAVQL_UNARY: return "UNARY";
+    case DAVQL_BINARY: return "BINARY";
+    case DAVQL_LOGICAL: return "LOGICAL";
+    case DAVQL_FUNCCALL: return "FUNCCALL";
     default: return "unknown";
     }
 }
@@ -67,9 +73,10 @@
 }
 
 static void dav_debug_ql_stmt_print(DavQLStatement *stmt) {
+    sstr_t empty = ST("(empty)");
     
     // Basic information
-    printf("Statement: %*s\nType: %s\nField count: %zu",
+    printf("Statement: %.*s\nType: %s\nField count: %zu",
         sfmtarg(stmt->srctext),
         _map_querytype(stmt->type),
         ucx_list_size(stmt->fields));
@@ -78,16 +85,16 @@
     _Bool wildcard = 0;
     UCX_FOREACH(elm, stmt->fields) {
         DavQLExpression* expr = (DavQLExpression*)elm->data;
-        if (expr->type == IDENTIFIER &&
+        if (expr->type == DAVQL_IDENTIFIER &&
             expr->srctext.length == 1 && *(expr->srctext.ptr) == '*') {
             wildcard = 1;
         }
     }
-    printf(" %s wildcard\nPath: %*s\nHas where clause: %s\n",
+    printf(" %s wildcard\nPath: %.*s\nHas where clause: %s\n",
         wildcard?"with":"without",
-        sfmtarg(stmt->path.srctext),
+        sfmtarg(stmt->path ? stmt->path->srctext : empty),
         stmt->where ? "yes" : "no");
-    if (stmt->type == SET) {
+    if (stmt->type == DAVQL_SET) {
         printf("Value list size matches: %s",
             ucx_list_size(stmt->fields) == ucx_list_size(stmt->setvalues)
             ? "yes" : "no");
@@ -112,10 +119,10 @@
 
 static void dav_debug_ql_expr_print(DavQLExpression *expr) {
     if (dav_debug_ql_expr_selected(expr)) {
-        sstr_t empty = S("(empty)");
+        sstr_t empty = ST("(empty)");
         printf(
-            "Text: %*s\nType: %s\nOperator: %s\n"
-            "Left hand: %*s\nRight hand: %*s\n",
+            "Text: %.*s\nType: %s\nOperator: %s\n"
+            "Left hand: %.*s\nRight hand: %.*s\n",
             sfmtarg(expr->srctext),
             _map_exprtype(expr->type),
             _map_operator(expr->op),
@@ -156,7 +163,7 @@
     }
 }
 
-void dav_debug_ql_statement(DavQLStatement *stmt) {
+void dav_debug_statement(DavQLStatement *stmt) {
     if (!stmt) {
         fprintf(stderr, "Debug DavQLStatement failed: null pointer");
         return;
@@ -174,7 +181,7 @@
         case DQLD_CMD_PS: dav_debug_ql_stmt_print(stmt); break;
         case DQLD_CMD_PE: dav_debug_ql_expr_print(examineexpr); break;
         case DQLD_CMD_P:
-            examineexpr = &(stmt->path);
+            examineexpr = stmt->path;
             dav_debug_ql_expr_print(examineexpr);
             break;
         case DQLD_CMD_L:
@@ -213,15 +220,194 @@
     }
 }
 
+// ------------------------------------------------------------------------
+//                         P A R S E R
+// ------------------------------------------------------------------------
+
+static UcxList* dav_parse_tokenize(sstr_t src) {
+    UcxList *tokens = NULL;
+    
+    // Delimiters: whitespace and dead whitespace around commas
+    sstr_t *token = NULL;
+    for (size_t i = 0 ; i < src.length ; i++) {
+        if (isspace(src.ptr[i])) {
+            // add token before spaces to list (if any)
+            if (token) {
+                tokens = ucx_list_append(tokens, token);
+                token = NULL;
+            }
+        } else if (src.ptr[i] == ',') {
+            // add token before comma to list (if any)
+            if (token) {
+                tokens = ucx_list_append(tokens, token);
+                token = NULL;
+            }
+            // add comma as token to list
+            token = malloc(sizeof(sstr_t));
+            token->ptr = src.ptr + i;
+            token->length = 1;
+            tokens = ucx_list_append(tokens, token);
+            // set tokenizer ready to read more tokens
+            token = NULL;
+        } else {
+            // if this is a new token, create memory for it
+            if (!token) {
+                token = malloc(sizeof(sstr_t));
+                token->ptr = src.ptr + i;
+                token->length = 0;
+            }
+            // extend token length when reading more bytes
+            token->length++;
+        }
+    }
+    
+    if (token) {
+        tokens = ucx_list_append(tokens, token);
+    }
+    
+    return tokens;
+}
+
+static DavQLExpression* dav_parse_expression(sstr_t src) {
+    DavQLExpression *expr = calloc(1, sizeof(DavQLExpression));
+    expr->srctext = src;
+    
+    return expr;
+}
+
+static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) {
+    stmt->type = DAVQL_GET;
+    
+    /*
+     *   10: field list
+     *   20: FROM clause
+     *  520: expecting WHERE or WITH clause
+     *   30: WHERE clause
+     *  530: expecting WITH clause
+     *   40: WITH clause
+     *  500: ready to quit
+     * 
+     */
+    int step = 10;
+    
+    UCX_FOREACH(token, tokens) {
+        sstr_t tokendata = *(sstr_t*)token->data;
+        
+        switch (step) {
+        // optional clauses
+        case 520:
+            if (!sstrcasecmp(tokendata, S("where"))) {
+                step = 30;
+            }
+            /* no break */
+        case 530:
+            if (!sstrcasecmp(tokendata, S("with"))) {
+                step = 40;
+            }
+            break;
+        // field list
+        case 10:
+            if (!sstrcasecmp(tokendata, S("from"))) {
+                step = 20;
+            } else {
+                stmt->fields = ucx_list_append(stmt->fields,
+                    dav_parse_expression(tokendata));
+            }
+            break;
+        // from clause
+        case 20:
+            stmt->path = dav_parse_expression(tokendata);
+            step = 520;
+            break;
+        // where clause
+        case 30:
+            step = 530;
+            break;
+        // with clause
+        case 40:
+            step = 500;
+            break;
+        }
+        
+        free(token->data);
+    }
+    
+    if (step < 500) {
+        stmt->type = DAVQL_ERROR;
+        // TODO: save parse error message
+    }
+}
+
+static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) {
+    stmt->type = DAVQL_SET;
+    
+    UCX_FOREACH(token, tokens) {
+        sstr_t tokendata = *(sstr_t*)token->data;
+        
+        // just free the tokens, until the function is implemented
+        
+        free(token->data);
+    }
+}
+
 DavQLStatement* dav_parse_statement(sstr_t srctext) {
-    DavQLStatement *stmt = malloc(sizeof(DavQLStatement));
+    DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement));
     
     // default values
-    memset(stmt, 0, sizeof(DavQLStatement));
-    stmt->srctext = srctext;
-    stmt->type = stmt->path.type = stmt->path.op = -1;
+    stmt->type = -1;
     stmt->depth = SIZE_MAX;
     
+    // save trimmed source text
+    stmt->srctext = sstrtrim(srctext);
+    
+    // tokenization
+    UcxList* tokens = dav_parse_tokenize(stmt->srctext);
+    
+    // use first token to determine query type
+    if (tokens) {
+        sstr_t token = *(sstr_t*)tokens->data;
+        free(tokens->data);
+        tokens = ucx_list_remove(tokens, tokens);
+        if (!sstrcasecmp(token, S("get"))) {
+            dav_parse_get_statement(stmt, tokens);
+        } else if (!sstrcasecmp(token, S("set"))) {
+            dav_parse_set_statement(stmt, tokens);
+        } else {
+            stmt->type = DAVQL_ERROR;
+        }
+        ucx_list_free(tokens);
+    } else {
+        stmt->type = DAVQL_ERROR;
+    }
     
     return stmt;
 }
+
+static void dav_free_expression(DavQLExpression *expr) {
+    if (expr->left) {
+        dav_free_expression(expr->left);
+    }
+    if (expr->right) {
+        dav_free_expression(expr->right);
+    }
+    free(expr);
+}
+
+void dav_free_statement(DavQLStatement *stmt) {
+    UCX_FOREACH(expr, stmt->fields) {
+        dav_free_expression(expr->data);
+    }
+    ucx_list_free(stmt->fields);
+    UCX_FOREACH(expr, stmt->setvalues) {
+        dav_free_expression(expr->data);
+    }
+    ucx_list_free(stmt->setvalues);
+    
+    if (stmt->path) {
+        dav_free_expression(stmt->path);
+    }
+    if (stmt->where) {
+        dav_free_expression(stmt->where);
+    }
+    free(stmt);
+}
--- a/libidav/davqlparser.h	Tue Mar 31 10:18:55 2015 +0200
+++ b/libidav/davqlparser.h	Tue Mar 31 13:00:17 2015 +0200
@@ -40,14 +40,14 @@
 /**
  * Enumeration of possible statement types.
  */
-typedef enum {GET, SET} davqltype_t;
+typedef enum {DAVQL_ERROR, DAVQL_GET, DAVQL_SET} davqltype_t;
 
 /**
  * Enumeration of possible expression types.
  */
 typedef enum {
-    LITERAL, IDENTIFIER,
-    UNARY, BINARY, LOGICAL, FUNCCALL
+    DAVQL_LITERAL, DAVQL_IDENTIFIER,
+    DAVQL_UNARY, DAVQL_BINARY, DAVQL_LOGICAL, DAVQL_FUNCCALL
 } davqlexprtype_t;
 
 /**
@@ -104,15 +104,17 @@
  * The grammar for a DavQLStatement is:
  * 
  * <pre>
- * Expression = Expression BinaryOperator Expression
- *            | UnaryOperator Expression
+ * Expression = Expression, BinaryOperator, Expression
+ *            | UnaryOperator, Expression
  *            | FunctionCall | Identifier | Literal
  *            | "(", Expression, ")";
  * 
- * FunctionCall = Identifier, "(", Expression, ")";
- * Identifier   = ?Character?, {?Character?};
- * Literal      = ?Digit?, {?Digit?} | String;
- * String       = "'", {?Character - "'"? | "'''"} , "'";
+ * FunctionCall    = Identifier, "(", Expression, ")";
+ * Identifier      = IdentifierChar, {IdentifierChar}
+ *                 | "`", ?Character?, {?Character?}, "`";
+ * IdentifierChar  = ?Character - (" "|",")?;
+ * Literal         = ?Digit?, {?Digit?} | String;
+ * String          = "'", {?Character - "'"? | "'''"} , "'";
  * 
  * LogicalExpression = LogicalExpression, LogicalOperator, LogicalExpression
  *                   | "not ", LogicalExpression
@@ -178,7 +180,7 @@
     /**
      * A DavQLExpression that denotes the queried path.
      */
-    DavQLExpression path;
+    DavQLExpression* path;
     /**
      * Logical expression for selection.
      * <code>NULL</code>, if there is no where clause.
@@ -197,7 +199,7 @@
  * 
  * @param stmt the statement to debug
  */
-void dav_debug_ql_statement(DavQLStatement *stmt);
+void dav_debug_statement(DavQLStatement *stmt);
 
 /**
  * Parses a statement.
@@ -211,6 +213,12 @@
  */
 #define dav_parse_cstr_statement(stmt) dav_parse_statement(S(stmt))
 
+/**
+ * Frees a DavQLStatement.
+ * @param stmt the statement object to free
+ */
+void dav_free_statement(DavQLStatement *stmt);
+
 #ifdef	__cplusplus
 }
 #endif

mercurial