special path parser + typed literals + order by parser + grammar changes

2015-04-17

author
Mike Becker <universe@uap-core.de>
date
Fri, 17 Apr 2015 16:09:43 +0200 (2015-04-17)
changeset 96
896022673e0e
parent 95
8ed7d8df6427
child 97
f82cb65a78ec

special path parser + typed literals + order by parser + grammar changes
TODO: refactor static variables ...

libidav/davqlparser.c file | annotate | diff | comparison | revisions
libidav/davqlparser.h file | annotate | diff | comparison | revisions
--- a/libidav/davqlparser.c	Fri Apr 17 13:11:58 2015 +0200
+++ b/libidav/davqlparser.c	Fri Apr 17 16:09:43 2015 +0200
@@ -49,7 +49,9 @@
 
 static const char* _map_exprtype(davqlexprtype_t type) {
     switch(type) {
-    case DAVQL_LITERAL: return "LITERAL";
+    case DAVQL_NUMBER: return "NUMBER";
+    case DAVQL_STRING: return "STRING";
+    case DAVQL_TIMESTAMP: return "TIMESTAMP";
     case DAVQL_IDENTIFIER: return "IDENTIFIER";
     case DAVQL_UNARY: return "UNARY";
     case DAVQL_BINARY: return "BINARY";
@@ -118,15 +120,13 @@
     }
     
     // WITH attributes
-    if (stmt->depth < 0) {
+    if (stmt->depth == DAV_DEPTH_INFINITY) {
         printf("Depth: infinity\n");
+    } else if (stmt->depth == DAV_DEPTH_PLACEHOLDER) {
+        printf("Depth: placeholder\n");
     } else {
         printf("Depth: %d\n", stmt->depth);
     }
-    if (stmt->errorcode) {
-        printf("\nError code: %d\nError: %s\n",
-            stmt->errorcode, stmt->errormessage);
-    }
     
     // order by clause
     printf("Order by: ");
@@ -134,12 +134,18 @@
         UCX_FOREACH(crit, stmt->orderby) {
             DavQLOrderCriterion *critdata = crit->data;
             printf("%.*s %s%s", sfmtarg(critdata->column->srctext),
-                critdata->ascending ? "asc" : "desc",
+                critdata->descending ? "desc" : "asc",
                 crit->next ? ", " : "\n");
         }
     } else {
         printf("nothing\n");
     }
+    
+    // error messages
+    if (stmt->errorcode) {
+        printf("\nError code: %d\nError: %s\n",
+            stmt->errorcode, stmt->errormessage);
+    }
 }
 
 static int dav_debug_ql_expr_selected(DavQLExpression *expr) {
@@ -260,13 +266,34 @@
 #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)"
 #define _expected_token "expected token '%s' before '%.*s'"
 #define _expected_by "expected 'by' after 'order' (order [->]%.*s)"
+#define _missing_fmtspec "format specifier missing (%.*s [->]%.*s %.*s)"
+#define _invalid_fmtspec "invalid format specifier (%.*s [->]%.*s %.*s)"
+#define _unknown_fmtspec "unknown format specifier (%.*s [->]%.*s %.*s)"
 #define _missing_quote "missing closing quote symbol (%.*s)"
 #define _parser_state "parser reached invalid state"
 #define _unknown_attribute "unknown attribute '%.*s'"
 #define _duplicated_attribute "duplicated attribute '%.*s'"
 #define _invalid_depth "invalid depth"
-#define _identifier_expected "identifier expected, but found: %.*s"
+#define _invalid_path "invalid path"
+
+#define _identifier_expected "identifier expected (%.*s [->]%.*s %.*s)"
+#define _idornum_expected "identifier or number expected (%.*s [->]%.*s %.*s)"
+#define _idorstr_expected "identifier or string expected (%.*s [->]%.*s %.*s)"
+#define _idorts_expected "identifier or timestamp expected (%.*s [->]%.*s %.*s)"
+
+#define token_sstr(listelem) ((sstr_t*)(listelem)->data)
 
+static void dav_error_in_context(int errorcode, const char *errormsg,
+        DavQLStatement *stmt, UcxList *token) {
+    sstr_t emptystring = ST("");
+    stmt->errorcode = errorcode;
+    stmt->errormessage = ucx_sprintf(errormsg,
+        sfmtarg(token->prev?*token_sstr(token->prev):emptystring),
+        sfmtarg(*token_sstr(token)),
+        sfmtarg(token->next?*token_sstr(token->next):emptystring)).ptr;
+}
+
+// special symbols are single tokens - the % sign MUST NOT be a special symbol
 static const char *special_token_symbols = ",()+-*/&|^~=!<>";
 
 static UcxList* dav_parse_tokenize(sstr_t src) {
@@ -336,7 +363,6 @@
     return tokens;
 }
 
-#define token_sstr(listelem) ((sstr_t*)(listelem)->data)
 static DavQLExpression* dav_parse_expression(
         DavQLStatement* stmt, UcxList* starttoken, size_t n) {
     if (n == 0) {
@@ -353,8 +379,27 @@
         expr->srctext.length = token_sstr(starttoken)->length;
         char firstchar = expr->srctext.ptr[0];
         char lastchar = expr->srctext.ptr[expr->srctext.length-1];
-        if (firstchar == '\'' || isdigit(firstchar)) {
-            expr->type = DAVQL_LITERAL;
+        if (firstchar == '\'') {
+            expr->type = DAVQL_STRING;
+        } else if (isdigit(firstchar)) {
+            expr->type = DAVQL_NUMBER;
+        } else if (firstchar == '%') {
+            if (expr->srctext.length == 1) {
+                dav_error_in_context(DAVQL_ERROR_MISSING_FMTSPEC,
+                    _missing_fmtspec, stmt, starttoken);
+            } else if (expr->srctext.length == 2) {
+                switch (expr->srctext.ptr[1]) {
+                case 'd': expr->type = DAVQL_NUMBER; break;
+                case 's': expr->type = DAVQL_STRING; break;
+                case 't': expr->type = DAVQL_TIMESTAMP; break;
+                default:
+                    dav_error_in_context(DAVQL_ERROR_UNKNOWN_FMTSPEC,
+                        _unknown_fmtspec, stmt, starttoken);
+                }
+            } else {
+                dav_error_in_context(DAVQL_ERROR_INVALID_FMTSPEC,
+                        _invalid_fmtspec, stmt, starttoken);
+            }
         } else {
             expr->type = DAVQL_IDENTIFIER;
         }
@@ -414,29 +459,45 @@
     }
     free(expr);
 }
-
-static void dav_parse_unexpected_token(DavQLStatement *stmt, UcxList *token) {
-    sstr_t emptystring = ST("");
-    stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
-    sstr_t errormsg = ucx_sprintf(_unexpected_token,
-        sfmtarg(token->prev?*token_sstr(token->prev):emptystring),
-        sfmtarg(*token_sstr(token)),
-        sfmtarg(token->next?*token_sstr(token->next):emptystring));
-    stmt->errormessage = errormsg.ptr;
-}
-
     
 #define _step_fieldlist_    10 // field list
 #define _step_FROM_         20 // FROM clause
-#define _step_expect_WWO_  530 // expecting WITH, WHERE or ORDER BY clause
 #define _step_WITH_         30 // WITH clause
-#define _step_expect_WO    540 // expecting WHERE or ORDER BY clause
 #define _step_WHERE_        40 // WHERE clause
-#define _step_expect_O     550 // expecting ORDER BY clause
-#define _step_expect_BY    551 // expecting the BY token for the ORDER BY clause
+#define _step_ORDER_BYopt_ 552 // expecting more ORDER BY details or end
 #define _step_ORDER_BY_     50 // ORDER BY clause
 #define _step_end_         500 // expect end
 
+static int dav_parse_from(DavQLStatement *stmt, UcxList *token) {
+    sstr_t tokendata = *token_sstr(token);
+    
+    if (!sstrcasecmp(tokendata, S("with"))) {
+        return _step_WITH_;
+    } else if (!sstrcasecmp(tokendata, S("where"))) {
+        return _step_WHERE_;
+    } else if (!sstrcasecmp(tokendata, S("order"))) {
+        return _step_ORDER_BY_;
+    } else {
+        if (stmt->path.ptr) {
+            if (stmt->path.ptr[0] == '/') {
+                char *end = tokendata.ptr+tokendata.length;
+                stmt->path.length = end - stmt->path.ptr;
+            } else {
+                stmt->errorcode = DAVQL_ERROR_INVALID_PATH;
+                stmt->errormessage = strdup(_invalid_path);
+            }
+        } else {
+            if (tokendata.ptr[0] == '/' || !sstrcmp(tokendata, S("%s"))) {
+                stmt->path = tokendata;
+            } else {
+                stmt->errorcode = DAVQL_ERROR_INVALID_PATH;
+                stmt->errormessage = strdup(_invalid_path);
+            }
+        }
+        return _step_FROM_;
+    }
+}
+
 static int dav_parse_with_clause(DavQLStatement *stmt, UcxList *token) {
     sstr_t tokendata = *token_sstr(token);
     
@@ -488,16 +549,31 @@
             if (!sstrcasecmp(tokendata, S("infinity"))) {
                 stmt->depth = DAV_DEPTH_INFINITY;
             } else {
-                char *conv = malloc(tokendata.length+1);
-                char *chk;
-                memcpy(conv, tokendata.ptr, tokendata.length);
-                conv[tokendata.length] = '\0';
-                stmt->depth = strtol(conv, &chk, 10);
-                if (*chk || stmt->depth < -1) {
+                DavQLExpression *depthexpr =
+                    dav_parse_expression(stmt, token, 1);
+                
+                if (depthexpr->type == DAVQL_NUMBER) {
+                    if (depthexpr->srctext.ptr[0] == '%') {
+                        stmt->depth = DAV_DEPTH_PLACEHOLDER;
+                    } else {
+                        sstr_t depthstr = depthexpr->srctext;
+                        char *conv = malloc(depthstr.length+1);
+                        char *chk;
+                        memcpy(conv, depthstr.ptr, depthstr.length);
+                        conv[depthstr.length] = '\0';
+                        stmt->depth = strtol(conv, &chk, 10);
+                        if (*chk || stmt->depth < -1) {
+                            stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH;
+                            stmt->errormessage = strdup(_invalid_depth);
+                        }
+                        free(conv);
+                    }
+                } else {
                     stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH;
                     stmt->errormessage = strdup(_invalid_depth);
                 }
-                free(conv);
+                
+                dav_free_expression(depthexpr);
             }
             break;
         }
@@ -512,9 +588,10 @@
         } else if (!sstrcasecmp(tokendata, S("where"))) {
             return _step_WHERE_;
         } else if (!sstrcasecmp(tokendata, S("order"))) {
-            return _step_expect_BY;
+            return _step_ORDER_BY_;
         } else {
-            dav_parse_unexpected_token(stmt, token);
+            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
+                _unexpected_token, stmt, token);
             return 0;
         }
     default:
@@ -525,7 +602,71 @@
 }
 
 static int dav_parse_orderby_clause(DavQLStatement *stmt, UcxList *token) {
-    // TODO: implement
+    
+    sstr_t tokendata = *token_sstr(token);
+    /*
+     * 0: expect by keyword
+     * 1: expect identifier / number
+     * 2: expect asc / desc or comma
+     * 3: expect comma
+     */
+    static int state = 0;
+    static DavQLOrderCriterion *crit = NULL;
+    
+    switch (state) {
+    case 0:
+        if (!sstrcasecmp(tokendata, S("by"))) {
+            state++;
+        } else {
+            stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
+            stmt->errormessage = ucx_sprintf(_expected_by,
+                sfmtarg(tokendata)).ptr;
+        }
+        return _step_ORDER_BY_;
+    case 1:
+        crit = malloc(sizeof(DavQLOrderCriterion));
+        crit->column = dav_parse_expression(stmt, token, 1);
+        crit->descending = 0;
+        
+        if (!crit->column || (
+            crit->column->type != DAVQL_NUMBER &&
+            crit->column->type != DAVQL_IDENTIFIER)) {
+            free(crit);
+            dav_error_in_context(DAVQL_ERROR_IDORNUM_EXPECTED,
+                _idornum_expected, stmt, token);
+        } else {
+            stmt->orderby = ucx_list_append(stmt->orderby, crit);
+        }
+        
+        // continue parsing clause, if more tokens available
+        state++;
+        return _step_ORDER_BYopt_;
+    case 2:
+        if (!sstrcasecmp(tokendata, S("desc"))) {
+            crit->descending = 1;
+        } else if (!sstrcasecmp(tokendata, S("asc"))) {
+            crit->descending = 0;
+        } else if (!sstrcmp(tokendata, S(","))) {
+            state = 1; // reset clause parser
+            return _step_ORDER_BY_; // statement must not end now
+        } else {
+            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
+                _unexpected_token, stmt, token);
+            return 0;
+        }
+         // continue parsing clause, if more tokens available
+        state++;
+        return _step_ORDER_BYopt_;
+    case 3:
+        if (!sstrcmp(tokendata, S(","))) {
+            state = 1; // reset clause parser
+            return _step_ORDER_BY_; // statement must not end now
+        } else {
+            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
+                _unexpected_token, stmt, token);
+            return 0;
+        }
+    }
     
     return _step_end_;
 }
@@ -543,6 +684,7 @@
     int step = _step_fieldlist_;
     
     // Variables for token sublists for expressions
+    // TODO: this is deprecated and won't work with function calls
     UcxList *exprstart = NULL;
     size_t exprlen = 0;
     
@@ -553,37 +695,8 @@
         switch (step) {
         // too much input data
         case _step_end_:
-            dav_parse_unexpected_token(stmt, token);
-            break;
-        // optional clauses
-        case _step_expect_WWO_:
-            if (!sstrcasecmp(tokendata, S("with"))) {
-                step = _step_WITH_;
-                continue;
-            }
-            /* no break and no else*/
-        case _step_expect_WO:
-            if (!sstrcasecmp(tokendata, S("where"))) {
-                step = _step_WHERE_;
-                continue;
-            }
-            /* no break and no else*/
-        case _step_expect_O:
-            if (!sstrcasecmp(tokendata, S("order"))) {
-                step = _step_expect_BY;
-                continue;
-            } else { // last possible clause checked and not present
-                dav_parse_unexpected_token(stmt, token);
-            }
-            break;
-        case _step_expect_BY:
-            if (!sstrcasecmp(tokendata, S("by"))) {
-                step = _step_ORDER_BY_;
-            } else {
-                stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
-                stmt->errormessage = ucx_sprintf(_expected_by,
-                    sfmtarg(tokendata)).ptr;
-            }
+            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
+                _unexpected_token, stmt, token);
             break;
         // field list
         case _step_fieldlist_: {
@@ -599,7 +712,8 @@
                         step = _step_FROM_;
                     }
                 } else {
-                    dav_parse_unexpected_token(stmt, token);
+                    dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
+                        _unexpected_token, stmt, token);
                 }
             } else {
                 // collect tokens for field expression
@@ -614,16 +728,7 @@
         }
         // from clause
         case _step_FROM_: {
-            DavQLExpression *expr = dav_parse_expression(stmt, token, 1);
-            stmt->path = expr->srctext;
-            int exprtype = expr->type;
-            dav_free_expression(expr);
-            if (exprtype != DAVQL_IDENTIFIER) {
-                stmt->errorcode = DAVQL_ERROR_IDENTIFIER_EXPECTED;
-                stmt->errormessage = ucx_sprintf(_identifier_expected,
-                    sfmtarg(tokendata)).ptr;
-            }
-            step = _step_expect_WWO_;
+            step = dav_parse_from(stmt, token);
             break;
         }
         // with clause
@@ -638,6 +743,7 @@
             break;
         // order by clause
         case _step_ORDER_BY_:
+        case _step_ORDER_BYopt_:
             step = dav_parse_orderby_clause(stmt, token);
             break;
         default:
--- a/libidav/davqlparser.h	Fri Apr 17 13:11:58 2015 +0200
+++ b/libidav/davqlparser.h	Fri Apr 17 16:09:43 2015 +0200
@@ -46,7 +46,7 @@
  * Enumeration of possible expression types.
  */
 typedef enum {
-    DAVQL_LITERAL, DAVQL_IDENTIFIER,
+    DAVQL_NUMBER, DAVQL_STRING, DAVQL_TIMESTAMP, DAVQL_IDENTIFIER,
     DAVQL_UNARY, DAVQL_BINARY, DAVQL_LOGICAL, DAVQL_FUNCCALL
 } davqlexprtype_t;
 
@@ -105,9 +105,10 @@
      */
     DavQLExpression *column;
     /**
-     * True, if the result shall be sorted ascending, false otherwise.
+     * True, if the result shall be sorted descending, false otherwise.
+     * Default is false (ascending).
      */
-    _Bool ascending;
+    _Bool descending;
 } DavQLOrderCriterion;
 
 
@@ -126,11 +127,12 @@
  * FunctionCall    = Identifier, "(", ArgumentList, ")";
  * ArgumentList    = Expression, {",", Expression};
  * Identifier      = IdentifierChar - ?Digit?, {IdentifierChar}
- *                 | "`", ?Character?, {?Character?}, "`";
- * IdentifierChar  = ?Character - (" "|",")?;
- * Literal         = Number | String;
+ *                 | "`", ?Character? - "`", {?Character? - "`"}, "`";
+ * IdentifierChar  = ?Character? - (" "|",");
+ * Literal         = Number | String | Timestamp;
  * Number          = ?Digit?, {?Digit?} | "%d";
- * String          = "'", {?Character - "'"? | "'''"} , "'";
+ * String          = "'", {?Character? - "'" | "'''"} , "'" | "%s";
+ * Timestamp       = "%t"; // TODO: maybe introduce a real literal 
  * 
  * LogicalExpression = LogicalExpression, LogicalOperator, LogicalExpression
  *                   | "not ", LogicalExpression
@@ -151,6 +153,10 @@
  * SetExpressions   = SetExpression, {",", SetExpressions};
  * SetExpression    = Identifier, "=", Expression;
  * 
+ * Path = "%s"
+ *      | "/", {?Character? - " "}
+ *      | "'/", {?Character?}, "'";
+ * 
  * WithClause = "depth", "=", (Number | "infinity");
  * 
  * OrderByClause    = OrderByCriterion, {",", OrderByCriterion};
@@ -165,7 +171,7 @@
  * <b>GET:</b>
  * <pre>
  * GetStatement = "get ", FieldExpressions,
- * " from ", Identifier,
+ * " from ", Path,
  * [" with ", WithClause],
  * [" where ", LogicalExpression],
  * [" order by ", OrderByClause];
@@ -174,7 +180,7 @@
  * <b>SET:</b>
  * <pre>
  * "set ",SetExpressions,
- * " at ", Identifier,
+ * " at ", Path,
  * [" with ", WithClause],
  * (" where ", LogicalExpression) | " anywhere";
  * </pre>
@@ -224,6 +230,8 @@
     /**
      * The recursion depth for the statement.
      * Defaults to 1.
+     * Magic numbers are DAV_DEPTH_INFINITY for infinity and
+     * DAV_DEPTH_PLACEHOLDER for a placeholder.
      */
     int depth;
 } DavQLStatement;
@@ -231,9 +239,27 @@
 /** Infinity recursion depth for a DavQLStatement. */
 #define DAV_DEPTH_INFINITY -1
 
+/** Depth needs to be specified at runtime. */
+#define DAV_DEPTH_PLACEHOLDER -2
+
+/** Invalid path. */
+#define DAVQL_ERROR_INVALID_PATH 1
+
 /** Expected an identifier, but found something else. */
 #define DAVQL_ERROR_IDENTIFIER_EXPECTED 10
 
+/** Expected an identifier or literal, but found something else. */
+#define DAVQL_ERROR_IDORLIT_EXPECTED 11
+
+/** Expected an identifier or number, but found something else. */
+#define DAVQL_ERROR_IDORNUM_EXPECTED 12
+
+/** Expected an identifier or string, but found something else. */
+#define DAVQL_ERROR_IDORSTR_EXPECTED 13
+
+/** Expected an identifier or timestamp, but found something else. */
+#define DAVQL_ERROR_IDORTS_EXPECTED 14
+
 /** The with-clause contains an unknown attribute. */
 #define DAVQL_ERROR_UNKNOWN_ATTRIBUTE 20
 
@@ -243,6 +269,15 @@
 /** The with-clause contains an attribute more than once. */
 #define DAVQL_ERROR_DUPLICATED_ATTRIBUTE 29
 
+/** The format specifier is missing. */
+#define DAVQL_ERROR_MISSING_FMTSPEC 30
+
+/** The format specifier is unknown. */
+#define DAVQL_ERROR_UNKNOWN_FMTSPEC 31
+
+/** The format specifier is invalid. */
+#define DAVQL_ERROR_INVALID_FMTSPEC 39
+
 /** A quote symbol (' or `) is missing. */
 #define DAVQL_ERROR_MISSING_QUOTE 50
 

mercurial