Tue, 31 Mar 2015 13:00:17 +0200
renamed some enums to avoid collisions with macros + minor grammar change (backsticks for identifiers) + tokenizer + parser skeleton
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2015 Olaf Wintermann. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "davqlparser.h" #include <string.h> #include <stdio.h> #include <ctype.h> #define sfmtarg(s) ((int)(s).length), (s).ptr // ------------------------------------------------------------------------ // D E B U G E R // ------------------------------------------------------------------------ static const char* _map_querytype(davqltype_t type) { switch(type) { case DAVQL_ERROR: return "ERROR"; case DAVQL_GET: return "GET"; case DAVQL_SET: return "SET"; default: return "unknown"; } } static const char* _map_exprtype(davqlexprtype_t type) { switch(type) { case DAVQL_LITERAL: return "LITERAL"; case DAVQL_IDENTIFIER: return "IDENTIFIER"; case DAVQL_UNARY: return "UNARY"; case DAVQL_BINARY: return "BINARY"; case DAVQL_LOGICAL: return "LOGICAL"; case DAVQL_FUNCCALL: return "FUNCCALL"; default: return "unknown"; } } static const char* _map_operator(davqloperator_t op) { // don't use string array, because enum values may change switch(op) { case ADD: return "+"; case SUB: return "-"; case MUL: return "*"; case DIV: return "/"; case AND: return "&"; case OR: return "|"; case XOR: return "^"; case NEG: return "~"; case NOT: return "NOT"; case LAND: return "AND"; case LOR: return "OR"; case LXOR: return "XOR"; case EQ: return "="; case NEQ: return "!="; case LT: return "<"; case GT: return ">"; case LE: return "<="; case GE: return ">="; case LIKE: return "LIKE"; case UNLIKE: return "UNLIKE"; default: return "unknown"; } } static void dav_debug_ql_stmt_print(DavQLStatement *stmt) { sstr_t empty = ST("(empty)"); // Basic information printf("Statement: %.*s\nType: %s\nField count: %zu", sfmtarg(stmt->srctext), _map_querytype(stmt->type), ucx_list_size(stmt->fields)); // Has wildcard _Bool wildcard = 0; UCX_FOREACH(elm, stmt->fields) { DavQLExpression* expr = (DavQLExpression*)elm->data; if (expr->type == DAVQL_IDENTIFIER && expr->srctext.length == 1 && *(expr->srctext.ptr) == '*') { wildcard = 1; } } printf(" %s wildcard\nPath: %.*s\nHas where clause: %s\n", wildcard?"with":"without", sfmtarg(stmt->path ? stmt->path->srctext : empty), stmt->where ? "yes" : "no"); if (stmt->type == DAVQL_SET) { printf("Value list size matches: %s", ucx_list_size(stmt->fields) == ucx_list_size(stmt->setvalues) ? "yes" : "no"); } // WITH attributes if (stmt->depth == SIZE_MAX) { printf("Depth: unbound\n"); } else { printf("Depth: %zu\n", stmt->depth); } } static int dav_debug_ql_expr_selected(DavQLExpression *expr) { if (!expr) { printf("Currently no expression selected.\n"); return 0; } else { return 1; } } static void dav_debug_ql_expr_print(DavQLExpression *expr) { if (dav_debug_ql_expr_selected(expr)) { sstr_t empty = ST("(empty)"); printf( "Text: %.*s\nType: %s\nOperator: %s\n" "Left hand: %.*s\nRight hand: %.*s\n", sfmtarg(expr->srctext), _map_exprtype(expr->type), _map_operator(expr->op), sfmtarg(expr->left?expr->left->srctext:empty), sfmtarg(expr->right?expr->right->srctext:empty)); } } #define DQLD_CMD_Q 0 #define DQLD_CMD_PS 1 #define DQLD_CMD_PE 2 #define DQLD_CMD_P 10 #define DQLD_CMD_L 21 #define DQLD_CMD_R 22 #define DQLD_CMD_H 100 static int dav_debug_ql_command() { printf("> "); char buffer[16]; fgets(buffer, 16, stdin); if (!strcmp(buffer, "q\n")) { return DQLD_CMD_Q; } else if (!strcmp(buffer, "ps\n")) { return DQLD_CMD_PS; } else if (!strcmp(buffer, "pe\n")) { return DQLD_CMD_PE; } else if (!strcmp(buffer, "p\n")) { return DQLD_CMD_P; } else if (!strcmp(buffer, "l\n")) { return DQLD_CMD_L; } else if (!strcmp(buffer, "r\n")) { return DQLD_CMD_R; } else if (!strcmp(buffer, "h\n")) { return DQLD_CMD_H; } else { return -1; } } void dav_debug_statement(DavQLStatement *stmt) { if (!stmt) { fprintf(stderr, "Debug DavQLStatement failed: null pointer"); return; } printf("Starting DavQL debugger (type 'h' for help)...\n\n"); dav_debug_ql_stmt_print(stmt); DavQLExpression *examineexpr = NULL; while(1) { int cmd = dav_debug_ql_command(); switch (cmd) { case DQLD_CMD_Q: return; case DQLD_CMD_PS: dav_debug_ql_stmt_print(stmt); break; case DQLD_CMD_PE: dav_debug_ql_expr_print(examineexpr); break; case DQLD_CMD_P: examineexpr = stmt->path; dav_debug_ql_expr_print(examineexpr); break; case DQLD_CMD_L: if (dav_debug_ql_expr_selected(examineexpr)) { if (examineexpr->left) { examineexpr = examineexpr->left; dav_debug_ql_expr_print(examineexpr); } else { printf("There is no left subtree.\n"); } } break; case DQLD_CMD_R: if (dav_debug_ql_expr_selected(examineexpr)) { if (examineexpr->right) { examineexpr = examineexpr->right; dav_debug_ql_expr_print(examineexpr); } else { printf("There is no right subtree.\n"); } } break; case DQLD_CMD_H: printf( "\nCommands:\n" "p: examine path\n" "ps: print statement information\n" "q: quit\n\n" "\nExpression examination:\n" "pe: print expression information\n" "l: enter left subtree\n" "r: enter right subtree\n"); break; default: printf("unknown command\n"); } } } // ------------------------------------------------------------------------ // P A R S E R // ------------------------------------------------------------------------ static UcxList* dav_parse_tokenize(sstr_t src) { UcxList *tokens = NULL; // Delimiters: whitespace and dead whitespace around commas sstr_t *token = NULL; for (size_t i = 0 ; i < src.length ; i++) { if (isspace(src.ptr[i])) { // add token before spaces to list (if any) if (token) { tokens = ucx_list_append(tokens, token); token = NULL; } } else if (src.ptr[i] == ',') { // add token before comma to list (if any) if (token) { tokens = ucx_list_append(tokens, token); token = NULL; } // add comma as token to list token = malloc(sizeof(sstr_t)); token->ptr = src.ptr + i; token->length = 1; tokens = ucx_list_append(tokens, token); // set tokenizer ready to read more tokens token = NULL; } else { // if this is a new token, create memory for it if (!token) { token = malloc(sizeof(sstr_t)); token->ptr = src.ptr + i; token->length = 0; } // extend token length when reading more bytes token->length++; } } if (token) { tokens = ucx_list_append(tokens, token); } return tokens; } static DavQLExpression* dav_parse_expression(sstr_t src) { DavQLExpression *expr = calloc(1, sizeof(DavQLExpression)); expr->srctext = src; return expr; } static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) { stmt->type = DAVQL_GET; /* * 10: field list * 20: FROM clause * 520: expecting WHERE or WITH clause * 30: WHERE clause * 530: expecting WITH clause * 40: WITH clause * 500: ready to quit * */ int step = 10; UCX_FOREACH(token, tokens) { sstr_t tokendata = *(sstr_t*)token->data; switch (step) { // optional clauses case 520: if (!sstrcasecmp(tokendata, S("where"))) { step = 30; } /* no break */ case 530: if (!sstrcasecmp(tokendata, S("with"))) { step = 40; } break; // field list case 10: if (!sstrcasecmp(tokendata, S("from"))) { step = 20; } else { stmt->fields = ucx_list_append(stmt->fields, dav_parse_expression(tokendata)); } break; // from clause case 20: stmt->path = dav_parse_expression(tokendata); step = 520; break; // where clause case 30: step = 530; break; // with clause case 40: step = 500; break; } free(token->data); } if (step < 500) { stmt->type = DAVQL_ERROR; // TODO: save parse error message } } static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) { stmt->type = DAVQL_SET; UCX_FOREACH(token, tokens) { sstr_t tokendata = *(sstr_t*)token->data; // just free the tokens, until the function is implemented free(token->data); } } DavQLStatement* dav_parse_statement(sstr_t srctext) { DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement)); // default values stmt->type = -1; stmt->depth = SIZE_MAX; // save trimmed source text stmt->srctext = sstrtrim(srctext); // tokenization UcxList* tokens = dav_parse_tokenize(stmt->srctext); // use first token to determine query type if (tokens) { sstr_t token = *(sstr_t*)tokens->data; free(tokens->data); tokens = ucx_list_remove(tokens, tokens); if (!sstrcasecmp(token, S("get"))) { dav_parse_get_statement(stmt, tokens); } else if (!sstrcasecmp(token, S("set"))) { dav_parse_set_statement(stmt, tokens); } else { stmt->type = DAVQL_ERROR; } ucx_list_free(tokens); } else { stmt->type = DAVQL_ERROR; } return stmt; } static void dav_free_expression(DavQLExpression *expr) { if (expr->left) { dav_free_expression(expr->left); } if (expr->right) { dav_free_expression(expr->right); } free(expr); } void dav_free_statement(DavQLStatement *stmt) { UCX_FOREACH(expr, stmt->fields) { dav_free_expression(expr->data); } ucx_list_free(stmt->fields); UCX_FOREACH(expr, stmt->setvalues) { dav_free_expression(expr->data); } ucx_list_free(stmt->setvalues); if (stmt->path) { dav_free_expression(stmt->path); } if (stmt->where) { dav_free_expression(stmt->where); } free(stmt); }