libidav/davqlparser.c

Tue, 31 Mar 2015 13:00:17 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 31 Mar 2015 13:00:17 +0200
changeset 82
0567444f2d76
parent 80
a2832c054c98
child 83
7d20ce5d235b
permissions
-rw-r--r--

renamed some enums to avoid collisions with macros + minor grammar change (backsticks for identifiers) + tokenizer + parser skeleton

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2015 Olaf Wintermann. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "davqlparser.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>

#define sfmtarg(s) ((int)(s).length), (s).ptr

// ------------------------------------------------------------------------
//                        D E B U G E R
// ------------------------------------------------------------------------

static const char* _map_querytype(davqltype_t type) {
    switch(type) {
    case DAVQL_ERROR: return "ERROR";
    case DAVQL_GET: return "GET";
    case DAVQL_SET: return "SET";
    default: return "unknown";
    }
}

static const char* _map_exprtype(davqlexprtype_t type) {
    switch(type) {
    case DAVQL_LITERAL: return "LITERAL";
    case DAVQL_IDENTIFIER: return "IDENTIFIER";
    case DAVQL_UNARY: return "UNARY";
    case DAVQL_BINARY: return "BINARY";
    case DAVQL_LOGICAL: return "LOGICAL";
    case DAVQL_FUNCCALL: return "FUNCCALL";
    default: return "unknown";
    }
}

static const char* _map_operator(davqloperator_t op) {
    // don't use string array, because enum values may change
    switch(op) {
    case ADD: return "+"; case SUB: return "-"; case MUL: return "*";
    case DIV: return "/"; case AND: return "&"; case OR: return "|";
    case XOR: return "^"; case NEG: return "~"; case NOT: return "NOT";
    case LAND: return "AND"; case LOR: return "OR"; case LXOR: return "XOR";
    case EQ: return "="; case NEQ: return "!="; case LT: return "<";
    case GT: return ">"; case LE: return "<="; case GE: return ">=";
    case LIKE: return "LIKE"; case UNLIKE: return "UNLIKE";
    default: return "unknown";
    }
}

static void dav_debug_ql_stmt_print(DavQLStatement *stmt) {
    sstr_t empty = ST("(empty)");
    
    // Basic information
    printf("Statement: %.*s\nType: %s\nField count: %zu",
        sfmtarg(stmt->srctext),
        _map_querytype(stmt->type),
        ucx_list_size(stmt->fields));
    
    // Has wildcard
    _Bool wildcard = 0;
    UCX_FOREACH(elm, stmt->fields) {
        DavQLExpression* expr = (DavQLExpression*)elm->data;
        if (expr->type == DAVQL_IDENTIFIER &&
            expr->srctext.length == 1 && *(expr->srctext.ptr) == '*') {
            wildcard = 1;
        }
    }
    printf(" %s wildcard\nPath: %.*s\nHas where clause: %s\n",
        wildcard?"with":"without",
        sfmtarg(stmt->path ? stmt->path->srctext : empty),
        stmt->where ? "yes" : "no");
    if (stmt->type == DAVQL_SET) {
        printf("Value list size matches: %s",
            ucx_list_size(stmt->fields) == ucx_list_size(stmt->setvalues)
            ? "yes" : "no");
    }
    
    // WITH attributes
    if (stmt->depth == SIZE_MAX) {
        printf("Depth: unbound\n");
    } else {
        printf("Depth: %zu\n", stmt->depth);
    }
}

static int dav_debug_ql_expr_selected(DavQLExpression *expr) {
    if (!expr) {
        printf("Currently no expression selected.\n");
        return 0;
    } else {
        return 1;
    }
}

static void dav_debug_ql_expr_print(DavQLExpression *expr) {
    if (dav_debug_ql_expr_selected(expr)) {
        sstr_t empty = ST("(empty)");
        printf(
            "Text: %.*s\nType: %s\nOperator: %s\n"
            "Left hand: %.*s\nRight hand: %.*s\n",
            sfmtarg(expr->srctext),
            _map_exprtype(expr->type),
            _map_operator(expr->op),
            sfmtarg(expr->left?expr->left->srctext:empty),
            sfmtarg(expr->right?expr->right->srctext:empty));
    }
}

#define DQLD_CMD_Q     0
#define DQLD_CMD_PS    1
#define DQLD_CMD_PE    2
#define DQLD_CMD_P    10
#define DQLD_CMD_L    21
#define DQLD_CMD_R    22
#define DQLD_CMD_H   100

static int dav_debug_ql_command() {
    printf("> ");
    
    char buffer[16];
    fgets(buffer, 16, stdin);
    if (!strcmp(buffer, "q\n")) {
        return DQLD_CMD_Q;
    } else if (!strcmp(buffer, "ps\n")) {
        return DQLD_CMD_PS;
    } else if (!strcmp(buffer, "pe\n")) {
        return DQLD_CMD_PE;
    } else if (!strcmp(buffer, "p\n")) {
        return DQLD_CMD_P;
    } else if (!strcmp(buffer, "l\n")) {
        return DQLD_CMD_L;
    } else if (!strcmp(buffer, "r\n")) {
        return DQLD_CMD_R;
    } else if (!strcmp(buffer, "h\n")) {
        return DQLD_CMD_H;
    } else {
        return -1;
    }
}

void dav_debug_statement(DavQLStatement *stmt) {
    if (!stmt) {
        fprintf(stderr, "Debug DavQLStatement failed: null pointer");
        return;
    }

    printf("Starting DavQL debugger (type 'h' for help)...\n\n");
    dav_debug_ql_stmt_print(stmt);
    
    DavQLExpression *examineexpr = NULL;
    
    while(1) {
        int cmd = dav_debug_ql_command();
        switch (cmd) {
        case DQLD_CMD_Q: return;
        case DQLD_CMD_PS: dav_debug_ql_stmt_print(stmt); break;
        case DQLD_CMD_PE: dav_debug_ql_expr_print(examineexpr); break;
        case DQLD_CMD_P:
            examineexpr = stmt->path;
            dav_debug_ql_expr_print(examineexpr);
            break;
        case DQLD_CMD_L:
            if (dav_debug_ql_expr_selected(examineexpr)) {
                if (examineexpr->left) {
                    examineexpr = examineexpr->left;
                    dav_debug_ql_expr_print(examineexpr);
                } else {
                    printf("There is no left subtree.\n");
                }
            }
            break;
        case DQLD_CMD_R:
            if (dav_debug_ql_expr_selected(examineexpr)) {
                if (examineexpr->right) {
                    examineexpr = examineexpr->right;
                    dav_debug_ql_expr_print(examineexpr);
                } else {
                    printf("There is no right subtree.\n");
                }
            }
            break;
        case DQLD_CMD_H:
            printf(
                "\nCommands:\n"
                "p:   examine path\n"
                "ps:  print statement information\n"
                "q:   quit\n\n"
                "\nExpression examination:\n"
                "pe:  print expression information\n"
                "l:   enter left subtree\n"
                "r:   enter right subtree\n");
            break;
        default: printf("unknown command\n");
        }
    }
}

// ------------------------------------------------------------------------
//                         P A R S E R
// ------------------------------------------------------------------------

static UcxList* dav_parse_tokenize(sstr_t src) {
    UcxList *tokens = NULL;
    
    // Delimiters: whitespace and dead whitespace around commas
    sstr_t *token = NULL;
    for (size_t i = 0 ; i < src.length ; i++) {
        if (isspace(src.ptr[i])) {
            // add token before spaces to list (if any)
            if (token) {
                tokens = ucx_list_append(tokens, token);
                token = NULL;
            }
        } else if (src.ptr[i] == ',') {
            // add token before comma to list (if any)
            if (token) {
                tokens = ucx_list_append(tokens, token);
                token = NULL;
            }
            // add comma as token to list
            token = malloc(sizeof(sstr_t));
            token->ptr = src.ptr + i;
            token->length = 1;
            tokens = ucx_list_append(tokens, token);
            // set tokenizer ready to read more tokens
            token = NULL;
        } else {
            // if this is a new token, create memory for it
            if (!token) {
                token = malloc(sizeof(sstr_t));
                token->ptr = src.ptr + i;
                token->length = 0;
            }
            // extend token length when reading more bytes
            token->length++;
        }
    }
    
    if (token) {
        tokens = ucx_list_append(tokens, token);
    }
    
    return tokens;
}

static DavQLExpression* dav_parse_expression(sstr_t src) {
    DavQLExpression *expr = calloc(1, sizeof(DavQLExpression));
    expr->srctext = src;
    
    return expr;
}

static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) {
    stmt->type = DAVQL_GET;
    
    /*
     *   10: field list
     *   20: FROM clause
     *  520: expecting WHERE or WITH clause
     *   30: WHERE clause
     *  530: expecting WITH clause
     *   40: WITH clause
     *  500: ready to quit
     * 
     */
    int step = 10;
    
    UCX_FOREACH(token, tokens) {
        sstr_t tokendata = *(sstr_t*)token->data;
        
        switch (step) {
        // optional clauses
        case 520:
            if (!sstrcasecmp(tokendata, S("where"))) {
                step = 30;
            }
            /* no break */
        case 530:
            if (!sstrcasecmp(tokendata, S("with"))) {
                step = 40;
            }
            break;
        // field list
        case 10:
            if (!sstrcasecmp(tokendata, S("from"))) {
                step = 20;
            } else {
                stmt->fields = ucx_list_append(stmt->fields,
                    dav_parse_expression(tokendata));
            }
            break;
        // from clause
        case 20:
            stmt->path = dav_parse_expression(tokendata);
            step = 520;
            break;
        // where clause
        case 30:
            step = 530;
            break;
        // with clause
        case 40:
            step = 500;
            break;
        }
        
        free(token->data);
    }
    
    if (step < 500) {
        stmt->type = DAVQL_ERROR;
        // TODO: save parse error message
    }
}

static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) {
    stmt->type = DAVQL_SET;
    
    UCX_FOREACH(token, tokens) {
        sstr_t tokendata = *(sstr_t*)token->data;
        
        // just free the tokens, until the function is implemented
        
        free(token->data);
    }
}

DavQLStatement* dav_parse_statement(sstr_t srctext) {
    DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement));
    
    // default values
    stmt->type = -1;
    stmt->depth = SIZE_MAX;
    
    // save trimmed source text
    stmt->srctext = sstrtrim(srctext);
    
    // tokenization
    UcxList* tokens = dav_parse_tokenize(stmt->srctext);
    
    // use first token to determine query type
    if (tokens) {
        sstr_t token = *(sstr_t*)tokens->data;
        free(tokens->data);
        tokens = ucx_list_remove(tokens, tokens);
        if (!sstrcasecmp(token, S("get"))) {
            dav_parse_get_statement(stmt, tokens);
        } else if (!sstrcasecmp(token, S("set"))) {
            dav_parse_set_statement(stmt, tokens);
        } else {
            stmt->type = DAVQL_ERROR;
        }
        ucx_list_free(tokens);
    } else {
        stmt->type = DAVQL_ERROR;
    }
    
    return stmt;
}

static void dav_free_expression(DavQLExpression *expr) {
    if (expr->left) {
        dav_free_expression(expr->left);
    }
    if (expr->right) {
        dav_free_expression(expr->right);
    }
    free(expr);
}

void dav_free_statement(DavQLStatement *stmt) {
    UCX_FOREACH(expr, stmt->fields) {
        dav_free_expression(expr->data);
    }
    ucx_list_free(stmt->fields);
    UCX_FOREACH(expr, stmt->setvalues) {
        dav_free_expression(expr->data);
    }
    ucx_list_free(stmt->setvalues);
    
    if (stmt->path) {
        dav_free_expression(stmt->path);
    }
    if (stmt->where) {
        dav_free_expression(stmt->where);
    }
    free(stmt);
}

mercurial