libidav/davqlparser.c

Sat, 02 May 2015 18:52:04 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 02 May 2015 18:52:04 +0200
changeset 105
ee0de2b1872e
parent 103
b29692d5f7a7
child 106
9cec78f23cbf
permissions
-rw-r--r--

fixed control flow in int_cmd + committed testcase (enable with #define DO_THE_TEST)

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2015 Olaf Wintermann. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "davqlparser.h"
#include <ucx/utils.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>

#define sfmtarg(s) ((int)(s).length), (s).ptr

// ------------------------------------------------------------------------
//                        D E B U G E R
// ------------------------------------------------------------------------

static const char* _map_querytype(davqltype_t type) {
    switch(type) {
    case DAVQL_ERROR: return "ERROR";
    case DAVQL_GET: return "GET";
    case DAVQL_SET: return "SET";
    default: return "unknown";
    }
}

static const char* _map_exprtype(davqlexprtype_t type) {
    switch(type) {
    case DAVQL_UNDEFINED_TYP: return "undefined";
    case DAVQL_NUMBER: return "NUMBER";
    case DAVQL_STRING: return "STRING";
    case DAVQL_TIMESTAMP: return "TIMESTAMP";
    case DAVQL_IDENTIFIER: return "IDENTIFIER";
    case DAVQL_UNARY: return "UNARY";
    case DAVQL_BINARY: return "BINARY";
    case DAVQL_LOGICAL: return "LOGICAL";
    case DAVQL_FUNCCALL: return "FUNCCALL";
    default: return "unknown";
    }
}

static const char* _map_specialfield(int info) {
    switch(info) {
    case 0: return "";
    case 1: return "with wildcard";
    case 2: return "(resource data only)";
    default: return "with mysterious identifier";
    }
}

static const char* _map_operator(davqloperator_t op) {
    // don't use string array, because enum values may change
    switch(op) {
    case DAVQL_NOOP: return "no operator";
    case DAVQL_ADD: return "+"; case DAVQL_SUB: return "-";
    case DAVQL_MUL: return "*"; case DAVQL_DIV: return "/";
    case DAVQL_AND: return "&"; case DAVQL_OR: return "|";
    case DAVQL_XOR: return "^"; case DAVQL_NEG: return "~";
    case DAVQL_NOT: return "NOT"; case DAVQL_LAND: return "AND";
    case DAVQL_LOR: return "OR"; case DAVQL_LXOR: return "XOR";
    case DAVQL_EQ: return "="; case DAVQL_NEQ: return "!=";
    case DAVQL_LT: return "<"; case DAVQL_GT: return ">";
    case DAVQL_LE: return "<="; case DAVQL_GE: return ">=";
    case DAVQL_LIKE: return "LIKE"; case DAVQL_UNLIKE: return "UNLIKE";
    default: return "unknown";
    }
}

static void dav_debug_ql_fnames_print(DavQLStatement *stmt) {
    printf("Field names: ");
    UCX_FOREACH(field, stmt->fields) {
        DavQLField *f = field->data;
        printf("%.*s, ", sfmtarg(f->name));
    }
    printf("\b\b  \b\b\n");
}

static void dav_debug_ql_stmt_print(DavQLStatement *stmt) {
    // Basic information
    size_t fieldcount = ucx_list_size(stmt->fields);
    int specialfield = 0;
    if (stmt->fields) {
        DavQLField* firstfield = (DavQLField*)stmt->fields->data;
        if (firstfield->expr->type == DAVQL_IDENTIFIER) {
            switch (firstfield->expr->srctext.ptr[0]) {
            case '*': specialfield = 1; break;
            case '-': specialfield = 2; break;
            }
        }
    }
    if (specialfield) {
        fieldcount--;
    }
    printf("Statement: %.*s\nType: %s\nField count: %zu %s\n",
        sfmtarg(stmt->srctext),
        _map_querytype(stmt->type),
        fieldcount,
        _map_specialfield(specialfield));
    
    dav_debug_ql_fnames_print(stmt);
    printf("Path: %.*s\nHas where clause: %s\n",
        sfmtarg(stmt->path),
        stmt->where ? "yes" : "no");
    
    // WITH attributes
    if (stmt->depth == DAV_DEPTH_INFINITY) {
        printf("Depth: infinity\n");
    } else if (stmt->depth == DAV_DEPTH_PLACEHOLDER) {
        printf("Depth: placeholder\n");
    } else {
        printf("Depth: %d\n", stmt->depth);
    }
    
    // order by clause
    printf("Order by: ");
    if (stmt->orderby) {
        UCX_FOREACH(crit, stmt->orderby) {
            DavQLOrderCriterion *critdata = crit->data;
            printf("%.*s %s%s", sfmtarg(critdata->column->srctext),
                critdata->descending ? "desc" : "asc",
                crit->next ? ", " : "\n");
        }
    } else {
        printf("nothing\n");
    }
    
    // error messages
    if (stmt->errorcode) {
        printf("\nError code: %d\nError: %s\n",
            stmt->errorcode, stmt->errormessage);
    }
}

static int dav_debug_ql_expr_selected(DavQLExpression *expr) {
    if (!expr) {
        printf("Currently no expression selected.\n");
        return 0;
    } else {
        return 1;
    }
}

static void dav_debug_ql_expr_print(DavQLExpression *expr) {
    if (dav_debug_ql_expr_selected(expr)) {
        sstr_t empty = ST("(empty)");
        printf(
            "Text: %.*s\nType: %s\nOperator: %s\n",
            sfmtarg(expr->srctext),
            _map_exprtype(expr->type),
            _map_operator(expr->op));
        if (expr->left || expr->right) {
            printf("Left hand: %.*s\nRight hand: %.*s\n",
                sfmtarg(expr->left?expr->left->srctext:empty),
                sfmtarg(expr->right?expr->right->srctext:empty));
        }
    }
}

#define DQLD_CMD_Q     0
#define DQLD_CMD_PS    1
#define DQLD_CMD_PE    2
#define DQLD_CMD_PF    3
#define DQLD_CMD_F    10
#define DQLD_CMD_W    11
#define DQLD_CMD_O    12
#define DQLD_CMD_L    21
#define DQLD_CMD_R    22
#define DQLD_CMD_N    23
#define DQLD_CMD_P    24
#define DQLD_CMD_H   100

static int dav_debug_ql_command() {
    printf("> ");
    
    char buffer[8];
    fgets(buffer, 8, stdin);
     // discard remaining chars
    if (!strchr(buffer, '\n')) {
        int chr;
        while ((chr = fgetc(stdin) != '\n') && chr != EOF);
    }
    
    if (!strcmp(buffer, "q\n")) {
        return DQLD_CMD_Q;
    } else if (!strcmp(buffer, "ps\n")) {
        return DQLD_CMD_PS;
    } else if (!strcmp(buffer, "pe\n")) {
        return DQLD_CMD_PE;
    } else if (!strcmp(buffer, "pf\n")) {
        return DQLD_CMD_PF;
    } else if (!strcmp(buffer, "l\n")) {
        return DQLD_CMD_L;
    } else if (!strcmp(buffer, "r\n")) {
        return DQLD_CMD_R;
    } else if (!strcmp(buffer, "h\n")) {
        return DQLD_CMD_H;
    } else if (!strcmp(buffer, "f\n")) {
        return DQLD_CMD_F;
    } else if (!strcmp(buffer, "w\n")) {
        return DQLD_CMD_W;
    } else if (!strcmp(buffer, "o\n")) {
        return DQLD_CMD_O;
    } else if (!strcmp(buffer, "n\n")) {
        return DQLD_CMD_N;
    } else if (!strcmp(buffer, "p\n")) {
        return DQLD_CMD_P;
    } else {
        return -1;
    }
}

void dav_debug_statement(DavQLStatement *stmt) {
    if (!stmt) {
        fprintf(stderr, "Debug DavQLStatement failed: null pointer");
        return;
    }

    printf("Starting DavQL debugger (type 'h' for help)...\n\n");
    dav_debug_ql_stmt_print(stmt);
    
    if (stmt->errorcode) {
        return;
    }
    
    DavQLExpression *examineexpr = NULL;
    UcxList *examineelem = NULL;
    int examineclause = 0;
    
    while(1) {
        int cmd = dav_debug_ql_command();
        switch (cmd) {
        case DQLD_CMD_Q: return;
        case DQLD_CMD_PS: dav_debug_ql_stmt_print(stmt); break;
        case DQLD_CMD_PE: dav_debug_ql_expr_print(examineexpr); break;
        case DQLD_CMD_PF: dav_debug_ql_fnames_print(stmt); break;
        case DQLD_CMD_F:
            if (examineclause != DQLD_CMD_F) {
                examineclause = DQLD_CMD_F;
                examineelem = stmt->fields;
                examineexpr = stmt->fields ?
                    ((DavQLField*)stmt->fields->data)->expr : NULL;
                dav_debug_ql_expr_print(examineexpr);
            }
            break;
        case DQLD_CMD_W:
            examineclause = 0; examineelem = NULL;
            examineexpr = stmt->where;
            dav_debug_ql_expr_print(examineexpr);
            break;
        case DQLD_CMD_O:
            if (examineclause != DQLD_CMD_O) {
                examineclause = DQLD_CMD_O;
                examineelem = stmt->orderby;
                examineexpr = stmt->orderby ?
                    ((DavQLOrderCriterion*)stmt->orderby->data)->column : NULL;
                dav_debug_ql_expr_print(examineexpr);
            }
            break;
        case DQLD_CMD_N:
        case DQLD_CMD_P:
            if (examineelem) {
                UcxList *newelem = (cmd == DQLD_CMD_N ?
                    examineelem->next : examineelem->prev);
                if (newelem) {
                    examineelem = newelem;
                    if (examineclause == DQLD_CMD_O) {
                        examineexpr = ((DavQLOrderCriterion*)
                            examineelem->data)->column;
                    } else if (examineclause == DQLD_CMD_F) {
                        examineexpr = ((DavQLField*)examineelem->data)->expr;
                    } else {
                        printf("Examining unknown clause type.");
                    }
                    dav_debug_ql_expr_print(examineexpr);
                } else {
                    printf("Reached end of list.\n");
                }
            } else {
                printf("Currently not examining an expression list.\n");
            }
            break;
        case DQLD_CMD_L:
            if (dav_debug_ql_expr_selected(examineexpr)) {
                if (examineexpr->left) {
                    examineexpr = examineexpr->left;
                    dav_debug_ql_expr_print(examineexpr);
                } else {
                    printf("There is no left subtree.\n");
                }
            }
            break;
        case DQLD_CMD_R:
            if (dav_debug_ql_expr_selected(examineexpr)) {
                if (examineexpr->right) {
                    examineexpr = examineexpr->right;
                    dav_debug_ql_expr_print(examineexpr);
                } else {
                    printf("There is no right subtree.\n");
                }
            }
            break;
        case DQLD_CMD_H:
            printf(
                "\nCommands:\n"
                "ps:  print statement information\n"
                "o:   examine order by clause\n"
                "f:   examine field list\n"
                "pf:  print field names\n"
                "w:   examine where clause\n"
                "n:   examine next expression "
                    "(in order by clause or field list)\n"
                "p:   examine previous expression "
                    "(in order by clause or field list)\n"
                "q:   quit\n\n"
                "\nExpression examination:\n"
                "pe:  print expression information\n"
                "l:   enter left subtree\n"
                "r:   enter right subtree\n");
            break;
        default: printf("unknown command\n");
        }
    }
}

// ------------------------------------------------------------------------
//                         P A R S E R
// ------------------------------------------------------------------------

#define _unexpected_end_msg "unexpected end of statement"
#define _invalid_msg "invalid statement"
#define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)"
#define _expected_token "expected token '%s' before '%.*s'"
#define _expected_by "expected 'by' after 'order' (order [->]%.*s)"
#define _missing_fmtspec "format specifier missing (%.*s [->]%.*s %.*s)"
#define _invalid_fmtspec "invalid format specifier (%.*s [->]%.*s %.*s)"
#define _unknown_fmtspec "unknown format specifier (%.*s [->]%.*s %.*s)"
#define _missing_quote "missing closing quote symbol (%.*s)"
#define _parser_state "parser reached invalid state"
#define _unknown_attribute "unknown attribute '%.*s'"
#define _duplicated_attribute "duplicated attribute '%.*s'"
#define _invalid_depth "invalid depth"
#define _invalid_path "invalid path"

#define _identifier_expected "identifier expected (%.*s [->]%.*s %.*s)"
#define _idornum_expected "identifier or number expected (%.*s [->]%.*s %.*s)"
#define _idorstr_expected "identifier or string expected (%.*s [->]%.*s %.*s)"
#define _idorts_expected "identifier or timestamp expected (%.*s [->]%.*s %.*s)"

#define token_sstr(listelem) ((sstr_t*)(listelem)->data)

static void dav_error_in_context(int errorcode, const char *errormsg,
        DavQLStatement *stmt, UcxList *token) {
    sstr_t emptystring = ST("");
    stmt->errorcode = errorcode;
    stmt->errormessage = ucx_sprintf(errormsg,
        sfmtarg(token->prev?*token_sstr(token->prev):emptystring),
        sfmtarg(*token_sstr(token)),
        sfmtarg(token->next?*token_sstr(token->next):emptystring)).ptr;
}

// special symbols are single tokens - the % sign MUST NOT be a special symbol
static const char *special_token_symbols = ",()+-*/&|^~=!<>";

static UcxList* dav_parse_tokenize(sstr_t src) {
    UcxList *tokens = NULL;
    
    sstr_t *token = NULL;
    char insequence = '\0';
    for (size_t i = 0 ; i < src.length ; i++) {
        // quoted strings / identifiers are a single token
        if (src.ptr[i] == '\'' || src.ptr[i] == '`') {
            if (src.ptr[i] == insequence) {
                // add quoted token to list
                token->length++;
                tokens = ucx_list_append(tokens, token);
                token = NULL;
                insequence = '\0';
            } else if (insequence == '\0') {
                insequence = src.ptr[i];
                // always create new token for quoted strings
                if (token) {
                    tokens = ucx_list_append(tokens, token);
                }
                token = malloc(sizeof(sstr_t));
                token->ptr = src.ptr + i;
                token->length = 1;
            } else {
                // add other kind of quotes to token
                token->length++;
            }
        } else if (insequence) {
            token->length++;
        } else if (isspace(src.ptr[i])) {
            // add token before spaces to list (if any)
            if (token) {
                tokens = ucx_list_append(tokens, token);
                token = NULL;
            }
        } else if (strchr(special_token_symbols, src.ptr[i])) {
            // add token before special symbol to list (if any)
            if (token) {
                tokens = ucx_list_append(tokens, token);
                token = NULL;
            }
            // add special symbol as single token to list
            token = malloc(sizeof(sstr_t));
            token->ptr = src.ptr + i;
            token->length = 1;
            tokens = ucx_list_append(tokens, token);
            // set tokenizer ready to read more tokens
            token = NULL;
        } else {
            // if this is a new token, create memory for it
            if (!token) {
                token = malloc(sizeof(sstr_t));
                token->ptr = src.ptr + i;
                token->length = 0;
            }
            // extend token length when reading more bytes
            token->length++;
        }
    }
    
    if (token) {
        tokens = ucx_list_append(tokens, token);
    }
    
    return tokens;
}

static DavQLExpression* dav_parse_expression(
        DavQLStatement* stmt, UcxList* starttoken, size_t n) {
    if (n == 0) {
        return NULL;
    }
    
    DavQLExpression *expr = calloc(1, sizeof(DavQLExpression));
    
    // set pointer for source text
    expr->srctext.ptr = token_sstr(starttoken)->ptr;
    
    // special case - only one token
    if (n == 1) {
        expr->srctext.length = token_sstr(starttoken)->length;
        char firstchar = expr->srctext.ptr[0];
        char lastchar = expr->srctext.ptr[expr->srctext.length-1];
        if (firstchar == '\'') {
            expr->type = DAVQL_STRING;
        } else if (isdigit(firstchar)) {
            expr->type = DAVQL_NUMBER;
        } else if (firstchar == '%') {
            if (expr->srctext.length == 1) {
                dav_error_in_context(DAVQL_ERROR_MISSING_FMTSPEC,
                    _missing_fmtspec, stmt, starttoken);
            } else if (expr->srctext.length == 2) {
                switch (expr->srctext.ptr[1]) {
                case 'd': expr->type = DAVQL_NUMBER; break;
                case 's': expr->type = DAVQL_STRING; break;
                case 't': expr->type = DAVQL_TIMESTAMP; break;
                default:
                    dav_error_in_context(DAVQL_ERROR_UNKNOWN_FMTSPEC,
                        _unknown_fmtspec, stmt, starttoken);
                }
            } else {
                dav_error_in_context(DAVQL_ERROR_INVALID_FMTSPEC,
                        _invalid_fmtspec, stmt, starttoken);
            }
        } else {
            expr->type = DAVQL_IDENTIFIER;
        }
        // remove quotes (if any)
        if (firstchar == '\'' || firstchar == '`') {
            if (lastchar != firstchar) {
                stmt->errorcode = DAVQL_ERROR_MISSING_QUOTE;
                stmt->errormessage =
                    ucx_sprintf(_missing_quote, sfmtarg(expr->srctext)).ptr;
            }
            expr->srctext.ptr++;
            if (expr->srctext.length > 2) {
                expr->srctext.length -= 2;
            } else {
                expr->srctext.length = 0;
            }
        }
    } else {
        UcxList* token = starttoken;
        
        // check, if first token is (
        // if so, verify that last token is ) and throw both away
        if (!sstrcmp(*token_sstr(token), S("("))) {
            if (!sstrcmp(*token_sstr(ucx_list_get(token, n-1)), S(")"))) {
                token = token->next;
                n -= 2;
            } else {
                // TODO: throw syntax error
            }
        }

        // process tokens        
        for (size_t i = 0 ; i < n ; i++) {
            sstr_t tokendata = *token_sstr(token);
            
            // TODO: make it so

            // go to next token (if this is not the last token)
            if (i < n-1) {
                token = token->next;
            }
        }

        // compute length of source text (including delimiters)
        expr->srctext.length = token_sstr(token)->ptr +
            token_sstr(token)->length - expr->srctext.ptr;
    }
    
    return expr;
}

static void dav_free_expression(DavQLExpression *expr) {
    if (expr->left) {
        dav_free_expression(expr->left);
    }
    if (expr->right) {
        dav_free_expression(expr->right);
    }
    free(expr);
}
    
#define _step_fieldlist_    10 // field list
#define _step_FROM_         20 // FROM clause
#define _step_WITH_         30 // WITH clause
#define _step_WITHopt_     530 // expecting more WITH details or end
#define _step_WHERE_        40 // WHERE clause
#define _step_ORDER_BY_     50 // ORDER BY clause
#define _step_ORDER_BYopt_ 550 // expecting more ORDER BY details or end
#define _step_end_         500 // expect end

struct fieldlist_parser_state {
    UcxList *expr_firsttoken;
    DavQLField *currentfield;
    size_t expr_len;
    /*
     * 0: begin of field list - may encounter "*" or "-" special fields
     * 1: collect expression token
     *    switch to step 2 on keyword "as"
     *    expect "," or "from" only if expr_len is 1 (add to list and continue)
     * 2: expect one token (identifier) for as clause
     * 3: expect a ",": continue with step 1
     *    or a "from": leave field list parser
     * 4: expect end of field list (i.e. a "from" keyword)
     */
    int step;
};

static void dav_free_field(DavQLField *field) {
    dav_free_expression(field->expr);
    free(field);
}

static int dav_parse_fieldlist(DavQLStatement *stmt, UcxList *token,
        struct fieldlist_parser_state *state) {
    sstr_t tokendata = *token_sstr(token);

    _Bool fromkeyword = !sstrcasecmp(tokendata, S("from"));
    _Bool comma = !sstrcmp(tokendata, S(","));
    
    switch (state->step) {
    case 0:
        if (!sstrcmp(tokendata, S("*")) || !sstrcmp(tokendata, S("-"))) {
            DavQLField *field = malloc(sizeof(DavQLField));
            field->name = tokendata;
            field->expr = calloc(1, sizeof(DavQLExpression));
            field->expr->type = DAVQL_IDENTIFIER;
            field->expr->srctext = tokendata;
            stmt->fields = ucx_list_append(stmt->fields, field);
            
            if (tokendata.ptr[0] == '-') {
                // no further fields may follow, if dash symbol has been found
                state->step = 4;
            } else {
                state->step = 3;
            }
            return _step_fieldlist_;
        }
        // did not encounter special field, fall through to step 1
        state->step = 1;
    case 1:
        if (fromkeyword || comma) {
            // add possible identifier to list
            if (state->expr_firsttoken) {
                // TODO: skip comma in function call)
                if (state->expr_len > 1) {
                    stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
                    stmt->errormessage = ucx_sprintf(_expected_token,
                        "AS", sfmtarg(tokendata)).ptr;
                    return 0;
                }
                
                DavQLExpression *expr = dav_parse_expression(
                    stmt, state->expr_firsttoken, state->expr_len);
                
                if (expr->type != DAVQL_IDENTIFIER) {
                    dav_free_expression(expr);
                    stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
                    stmt->errormessage = ucx_sprintf(_expected_token,
                        "AS", sfmtarg(tokendata)).ptr;
                    return 0;
                } // TODO: do not allow identifier when wildcard is present
                
                DavQLField *field = malloc(sizeof(DavQLField));
                field->expr = expr;
                field->name = field->expr->srctext;
                stmt->fields = ucx_list_append(stmt->fields, field);
                
                state->expr_firsttoken = NULL;
                state->expr_len = 0;

                if (fromkeyword) {
                    return _step_FROM_;
                }
            } else {
                dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
                    _unexpected_token, stmt, token);
                return 0;
            }
        } else if (!sstrcasecmp(tokendata, S("as"))) {
            // TODO: return error, if expr_first_token is NULL
            state->currentfield = malloc(sizeof(DavQLField));
            state->currentfield->expr = dav_parse_expression(
                stmt, state->expr_firsttoken, state->expr_len);
                
            state->expr_firsttoken = NULL;
            state->expr_len = 0;
                
            state->step = 2;
        } else {
            // collect tokens for field expression
            if (state->expr_firsttoken) {
                state->expr_len++;
            } else {
                state->expr_firsttoken = token;
                state->expr_len = 1;
            }
        }
        
        return _step_fieldlist_;
    case 2: {
        DavQLExpression *expr = dav_parse_expression(stmt, token, 1);
        if (expr->type == DAVQL_IDENTIFIER) {
            state->currentfield->name = expr->srctext;
            stmt->fields = ucx_list_append(stmt->fields, state->currentfield);
            state->currentfield = NULL;
        } else {
            dav_free_field(state->currentfield);
            dav_error_in_context(DAVQL_ERROR_IDENTIFIER_EXPECTED,
                _identifier_expected, stmt, token);
            
        }
        dav_free_expression(expr);
        state->step = 3;
        
        return _step_fieldlist_;
    }
    case 3:
        if (fromkeyword) {
            return _step_FROM_;
        } else if (comma) {
            state->step = 1;
            return _step_fieldlist_;
        } else {
            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
                _unexpected_token, stmt, token);
            return 0;
        }
    case 4:
        if (fromkeyword) {
            return _step_FROM_;
        } else {
            stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
            stmt->errormessage = ucx_sprintf(_expected_token,
                "FROM", sfmtarg(tokendata)).ptr;
            return 0;
        }
    default:
        stmt->errorcode = DAVQL_ERROR_INVALID;
        stmt->errormessage = strdup(_parser_state);
        return 0;
    }
}

static int dav_parse_from(DavQLStatement *stmt, UcxList *token) {
    sstr_t tokendata = *token_sstr(token);
    
    if (!sstrcasecmp(tokendata, S("with"))) {
        return _step_WITH_;
    } else if (!sstrcasecmp(tokendata, S("where"))) {
        return _step_WHERE_;
    } else if (!sstrcasecmp(tokendata, S("order"))) {
        return _step_ORDER_BY_;
    } else {
        if (stmt->path.ptr) {
            if (stmt->path.ptr[0] == '/') {
                char *end = tokendata.ptr+tokendata.length;
                stmt->path.length = end - stmt->path.ptr;
            } else {
                stmt->errorcode = DAVQL_ERROR_INVALID_PATH;
                stmt->errormessage = strdup(_invalid_path);
            }
        } else {
            if (tokendata.ptr[0] == '/' || !sstrcmp(tokendata, S("%s"))) {
                stmt->path = tokendata;
            } else {
                stmt->errorcode = DAVQL_ERROR_INVALID_PATH;
                stmt->errormessage = strdup(_invalid_path);
            }
        }
        return _step_FROM_;
    }
}

struct with_parser_state {
    /*
     * 0: key
     * 1: =
     * 2: value
     * 3: comma or new clause or end
     */
    int step;
    /*
     * 1: depth
     */
    int key;
    int keymask;
};

static int dav_parse_with_clause(DavQLStatement *stmt, UcxList *token,
        struct with_parser_state *state) {
    sstr_t tokendata = *token_sstr(token);

    switch (state->step) {
    case 0:
        if (!sstrcasecmp(tokendata, S("depth"))) {
            state->key = 1;
            state->step = 1;
            if (state->keymask & state->key) {
                stmt->errorcode = DAVQL_ERROR_DUPLICATED_ATTRIBUTE;
                stmt->errormessage = ucx_sprintf(_duplicated_attribute,
                    sfmtarg(tokendata)).ptr;
            } else {
                state->keymask |= state->key;
            }
        } else {
            stmt->errorcode = DAVQL_ERROR_UNKNOWN_ATTRIBUTE;
            stmt->errormessage = ucx_sprintf(_unknown_attribute,
                sfmtarg(tokendata)).ptr;
        }
        return _step_WITH_; // continue parsing WITH clause
    case 1:
        if (sstrcmp(tokendata, S("="))) {
            stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
            stmt->errormessage = ucx_sprintf(_expected_token,
                "=", sfmtarg(tokendata)).ptr;
        } else {
            state->step = 2;
        }
        return _step_WITH_; // continue parsing WITH clause
    case 2:
        switch (state->key) {
        case 1: /* depth */
            if (!sstrcasecmp(tokendata, S("infinity"))) {
                stmt->depth = DAV_DEPTH_INFINITY;
            } else {
                DavQLExpression *depthexpr =
                    dav_parse_expression(stmt, token, 1);
                
                if (depthexpr->type == DAVQL_NUMBER) {
                    if (depthexpr->srctext.ptr[0] == '%') {
                        stmt->depth = DAV_DEPTH_PLACEHOLDER;
                    } else {
                        sstr_t depthstr = depthexpr->srctext;
                        char *conv = malloc(depthstr.length+1);
                        char *chk;
                        memcpy(conv, depthstr.ptr, depthstr.length);
                        conv[depthstr.length] = '\0';
                        stmt->depth = strtol(conv, &chk, 10);
                        if (*chk || stmt->depth < -1) {
                            stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH;
                            stmt->errormessage = strdup(_invalid_depth);
                        }
                        free(conv);
                    }
                } else {
                    stmt->errorcode = DAVQL_ERROR_INVALID_DEPTH;
                    stmt->errormessage = strdup(_invalid_depth);
                }
                
                dav_free_expression(depthexpr);
            }
            break;
        }
        state->step = 3;
        return _step_WITHopt_; // continue parsing WITH clause
    case 3:
        // a with clause may be continued with a comma
        // or another clause may follow
        if (!sstrcmp(tokendata, S(","))) {
            state->step = 0; // reset clause parser
            return _step_WITH_;
        } else if (!sstrcasecmp(tokendata, S("where"))) {
            return _step_WHERE_;
        } else if (!sstrcasecmp(tokendata, S("order"))) {
            return _step_ORDER_BY_;
        } else {
            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
                _unexpected_token, stmt, token);
            return 0;
        }
    default:
        stmt->errorcode = DAVQL_ERROR_INVALID;
        stmt->errormessage = strdup(_parser_state);
        return 0;
    }
}

struct orderby_parser_state {
    /*
     * 0: expect by keyword
     * 1: expect identifier / number
     * 2: expect asc / desc or comma
     * 3: expect comma
     */
    int step;
    DavQLOrderCriterion *crit;
};

static int dav_parse_orderby_clause(DavQLStatement *stmt, UcxList *token,
        struct orderby_parser_state *state) {
    
    sstr_t tokendata = *token_sstr(token);
    
    switch (state->step) {
    case 0:
        if (!sstrcasecmp(tokendata, S("by"))) {
            state->step++;
        } else {
            stmt->errorcode = DAVQL_ERROR_UNEXPECTED_TOKEN;
            stmt->errormessage = ucx_sprintf(_expected_by,
                sfmtarg(tokendata)).ptr;
        }
        return _step_ORDER_BY_;
    case 1:
        state->crit = malloc(sizeof(DavQLOrderCriterion));
        state->crit->column = dav_parse_expression(stmt, token, 1);
        state->crit->descending = 0;
        
        if (!state->crit->column || (
            state->crit->column->type != DAVQL_NUMBER &&
            state->crit->column->type != DAVQL_IDENTIFIER)) {
            free(state->crit);
            dav_error_in_context(DAVQL_ERROR_IDORNUM_EXPECTED,
                _idornum_expected, stmt, token);
        } else {
            stmt->orderby = ucx_list_append(stmt->orderby, state->crit);
        }
        
        // continue parsing clause, if more tokens available
        state->step++;
        return _step_ORDER_BYopt_;
    case 2:
        if (!sstrcasecmp(tokendata, S("desc"))) {
            state->crit->descending = 1;
        } else if (!sstrcasecmp(tokendata, S("asc"))) {
            state->crit->descending = 0;
        } else if (!sstrcmp(tokendata, S(","))) {
            state->step = 1; // reset clause parser
            return _step_ORDER_BY_; // statement must not end now
        } else {
            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
                _unexpected_token, stmt, token);
            return 0;
        }
         // continue parsing clause, if more tokens available
        state++;
        return _step_ORDER_BYopt_;
    case 3:
        if (!sstrcmp(tokendata, S(","))) {
            state->step = 1; // reset clause parser
            return _step_ORDER_BY_; // statement must not end now
        } else {
            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
                _unexpected_token, stmt, token);
            return 0;
        }
    }
    
    return _step_end_;
}

static void dav_free_order_criterion(DavQLOrderCriterion *crit) {
    if (crit->column) { // do it null-safe though column is expected to be set
        dav_free_expression(crit->column);
    }
    free(crit);
}

/**
 * Semantic analysis of a get statement.
 * @param stmt the statement to analyze.
 */
static void dav_analyze_get_statement(DavQLStatement *stmt) {
    // TODO: make it so
}


/**
 * Parser of a get statement.
 * @param stmt the statement object that shall contain the syntax tree
 * @param tokens the token list
 */
static void dav_parse_get_statement(DavQLStatement *stmt, UcxList *tokens) {
    stmt->type = DAVQL_GET;

    int step = _step_fieldlist_;
    
    struct with_parser_state state_with;
    memset(&state_with, 0, sizeof(struct with_parser_state));
    struct orderby_parser_state state_orderby;
    memset(&state_orderby, 0, sizeof(struct orderby_parser_state));
    struct fieldlist_parser_state state_fieldlist;
    memset(&state_fieldlist, 0, sizeof(struct fieldlist_parser_state));
    
    // Process tokens
    UCX_FOREACH(token, tokens) {
        switch (step) {
        // too much input data
        case _step_end_:
            dav_error_in_context(DAVQL_ERROR_UNEXPECTED_TOKEN,
                _unexpected_token, stmt, token);
            break;
        // field list
        case _step_fieldlist_: {
            step = dav_parse_fieldlist(stmt, token, &state_fieldlist);
            break;
        }
        // from clause
        case _step_FROM_: {
            step = dav_parse_from(stmt, token);
            break;
        }
        // with clause
        case _step_WITH_:
        case _step_WITHopt_: {
            step = dav_parse_with_clause(stmt, token, &state_with);
            break;
        }
        // where clause
        case _step_WHERE_:
            // TODO: implement
            step = _step_end_;
            break;
        // order by clause
        case _step_ORDER_BY_:
        case _step_ORDER_BYopt_:
            step = dav_parse_orderby_clause(stmt, token, &state_orderby);
            break;
        default:
            stmt->errorcode = DAVQL_ERROR_INVALID;
            stmt->errormessage = strdup(_parser_state);
        }
        
        // cancel processing, when an error has been detected
        if (stmt->errorcode) {
            break;
        }
    }
    
    if (!stmt->errorcode) {
        if (step < _step_end_) {
            stmt->errorcode = DAVQL_ERROR_UNEXPECTED_END;
            stmt->errormessage = strdup(_unexpected_end_msg);
        } else {
            dav_analyze_get_statement(stmt);
        }
    }
}

static void dav_parse_set_statement(DavQLStatement *stmt, UcxList *tokens) {
    stmt->type = DAVQL_SET;
    
    UCX_FOREACH(token, tokens) {
        sstr_t tokendata = *token_sstr(token);
        
    }
}

DavQLStatement* dav_parse_statement(sstr_t srctext) {
    DavQLStatement *stmt = calloc(1, sizeof(DavQLStatement));
    
    // default values
    stmt->type = -1;
    stmt->depth = 1;
    
    // save trimmed source text
    stmt->srctext = sstrtrim(srctext);
    
    // tokenization
    UcxList* tokens = dav_parse_tokenize(stmt->srctext);
    
    if (tokens) {
        // use first token to determine query type
        sstr_t token = *token_sstr(tokens);
        free(tokens->data);
        tokens = ucx_list_remove(tokens, tokens);
        
        if (!sstrcasecmp(token, S("get"))) {
            dav_parse_get_statement(stmt, tokens);
        } else if (!sstrcasecmp(token, S("set"))) {
            dav_parse_set_statement(stmt, tokens);
        } else {
            stmt->type = DAVQL_ERROR;
            stmt->errorcode = DAVQL_ERROR_INVALID;
            stmt->errormessage = strdup(_invalid_msg);
        }
        
        // free token data
        UCX_FOREACH(token, tokens) {
            free(token->data);
        }
        ucx_list_free(tokens);
    } else {
        stmt->type = DAVQL_ERROR;
        stmt->errorcode = DAVQL_ERROR_INVALID;
        stmt->errormessage = strdup(_invalid_msg);
    }
    
    return stmt;
}

void dav_free_statement(DavQLStatement *stmt) {
    UCX_FOREACH(expr, stmt->fields) {
        dav_free_field(expr->data);
    }
    ucx_list_free(stmt->fields);
    
    if (stmt->where) {
        dav_free_expression(stmt->where);
    }
    if (stmt->errormessage) {
        free(stmt->errormessage);
    }
    UCX_FOREACH(crit, stmt->orderby) {
        dav_free_order_criterion(crit->data);
    }
    ucx_list_free(stmt->orderby);
    free(stmt);
}

mercurial