libidav/davqlparser.c

changeset 88
4d6b03bd7034
parent 87
ed21d95984bb
child 89
785f6007a0c1
equal deleted inserted replaced
87:ed21d95984bb 88:4d6b03bd7034
243 // ------------------------------------------------------------------------ 243 // ------------------------------------------------------------------------
244 244
245 #define _unexpected_end_msg "unexpected end of statement" 245 #define _unexpected_end_msg "unexpected end of statement"
246 #define _invalid_msg "invalid statement" 246 #define _invalid_msg "invalid statement"
247 #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)" 247 #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)"
248 #define _missing_quote "missing closing quote symbol (%.*s)"
248 249
249 static UcxList* dav_parse_tokenize(sstr_t src) { 250 static UcxList* dav_parse_tokenize(sstr_t src) {
250 UcxList *tokens = NULL; 251 UcxList *tokens = NULL;
251 252
252 // Delimiters: whitespace and dead whitespace around commas 253 // Delimiters: whitespace and dead whitespace around commas
253 sstr_t *token = NULL; 254 sstr_t *token = NULL;
255 char insequence = '\0';
254 for (size_t i = 0 ; i < src.length ; i++) { 256 for (size_t i = 0 ; i < src.length ; i++) {
255 if (isspace(src.ptr[i])) { 257 // quoted strings / identifiers are a single token
258 if (src.ptr[i] == '\'' || src.ptr[i] == '`') {
259 if (src.ptr[i] == insequence) {
260 // add quoted token to list
261 token->length++;
262 tokens = ucx_list_append(tokens, token);
263 token = NULL;
264 insequence = '\0';
265 } else if (insequence == '\0') {
266 insequence = src.ptr[i];
267 // always create new token for quoted strings
268 if (token) {
269 tokens = ucx_list_append(tokens, token);
270 }
271 token = malloc(sizeof(sstr_t));
272 token->ptr = src.ptr + i;
273 token->length = 1;
274 } else {
275 // add other kind of quotes to token
276 token->length++;
277 }
278 } else if (insequence) {
279 token->length++;
280 } else if (isspace(src.ptr[i])) {
256 // add token before spaces to list (if any) 281 // add token before spaces to list (if any)
257 if (token) { 282 if (token) {
258 tokens = ucx_list_append(tokens, token); 283 tokens = ucx_list_append(tokens, token);
259 token = NULL; 284 token = NULL;
260 } 285 }
285 310
286 if (token) { 311 if (token) {
287 tokens = ucx_list_append(tokens, token); 312 tokens = ucx_list_append(tokens, token);
288 } 313 }
289 314
290 // now find quotes and backsticks and merge enclosed tokens
291 // TODO: make it so or disable tokenization in such cases in above code
292
293 return tokens; 315 return tokens;
294 } 316 }
295 317
296 #define token_sstr(listelem) ((sstr_t*)(listelem)->data) 318 #define token_sstr(listelem) ((sstr_t*)(listelem)->data)
297 static DavQLExpression* dav_parse_expression(UcxList* starttoken, size_t n) { 319 static DavQLExpression* dav_parse_expression(
320 DavQLStatement* stmt, UcxList* starttoken, size_t n) {
298 if (n == 0) { 321 if (n == 0) {
299 return NULL; 322 return NULL;
300 } 323 }
301 324
302 DavQLExpression *expr = calloc(1, sizeof(DavQLExpression)); 325 DavQLExpression *expr = calloc(1, sizeof(DavQLExpression));
306 329
307 // special case - only one token 330 // special case - only one token
308 if (n == 1) { 331 if (n == 1) {
309 expr->srctext.length = token_sstr(starttoken)->length; 332 expr->srctext.length = token_sstr(starttoken)->length;
310 char firstchar = expr->srctext.ptr[0]; 333 char firstchar = expr->srctext.ptr[0];
334 char lastchar = expr->srctext.ptr[expr->srctext.length-1];
311 if (firstchar == '\'' || isdigit(firstchar)) { 335 if (firstchar == '\'' || isdigit(firstchar)) {
312 expr->type = DAVQL_LITERAL; 336 expr->type = DAVQL_LITERAL;
313 } else { 337 } else {
314 expr->type = DAVQL_IDENTIFIER; 338 expr->type = DAVQL_IDENTIFIER;
339 }
340 // remove quotes (if any)
341 if (firstchar == '\'' || firstchar == '`') {
342 if (lastchar != firstchar) {
343 stmt->errorcode = DAVQL_ERROR_MISSING_QUOTE;
344 stmt->errormessage =
345 ucx_sprintf(_missing_quote, sfmtarg(expr->srctext)).ptr;
346 }
347 expr->srctext.ptr++;
348 if (expr->srctext.length > 2) {
349 expr->srctext.length -= 2;
350 } else {
351 expr->srctext.length = 0;
352 }
315 } 353 }
316 } else { 354 } else {
317 UcxList* token = starttoken; 355 UcxList* token = starttoken;
318 356
319 // check, if first token is ( 357 // check, if first token is (
386 UcxList *exprstart = NULL; 424 UcxList *exprstart = NULL;
387 size_t exprlen = 0; 425 size_t exprlen = 0;
388 426
389 // Process tokens 427 // Process tokens
390 UCX_FOREACH(token, tokens) { 428 UCX_FOREACH(token, tokens) {
429 if (stmt->errorcode) {
430 ultrabreak: break;
431 }
432
391 sstr_t tokendata = *token_sstr(token); 433 sstr_t tokendata = *token_sstr(token);
392 434
393 switch (step) { 435 switch (step) {
394 // optional clauses 436 // optional clauses
395 case 520: 437 case 520:
400 case 530: 442 case 530:
401 if (!sstrcasecmp(tokendata, S("with"))) { 443 if (!sstrcasecmp(tokendata, S("with"))) {
402 step = 40; 444 step = 40;
403 } else { 445 } else {
404 dav_parse_unexpected_token(stmt, token); 446 dav_parse_unexpected_token(stmt, token);
405 step = 999; 447 goto ultrabreak;
406 } 448 }
407 break; 449 break;
408 // field list 450 // field list
409 case 10: { 451 case 10: {
410 _Bool fromkeyword = !sstrcasecmp(tokendata, S("from")); 452 _Bool fromkeyword = !sstrcasecmp(tokendata, S("from"));
411 if (fromkeyword || !sstrcmp(tokendata, S(","))) { 453 if (fromkeyword || !sstrcmp(tokendata, S(","))) {
412 if (exprstart) { 454 if (exprstart) {
413 stmt->fields = ucx_list_append(stmt->fields, 455 stmt->fields = ucx_list_append(stmt->fields,
414 dav_parse_expression(exprstart, exprlen)); 456 dav_parse_expression(stmt, exprstart, exprlen));
415 exprstart = NULL; 457 exprstart = NULL;
416 exprlen = 0; 458 exprlen = 0;
417 } else { 459 } else {
418 // TODO: throw syntax error 460 // TODO: throw syntax error
419 } 461 }
432 } 474 }
433 break; 475 break;
434 } 476 }
435 // from clause 477 // from clause
436 case 20: { 478 case 20: {
437 DavQLExpression *expr = dav_parse_expression(token, 1); 479 DavQLExpression *expr = dav_parse_expression(stmt, token, 1);
438 stmt->path = expr->srctext; 480 stmt->path = expr->srctext;
439 dav_free_expression(expr); 481 dav_free_expression(expr);
440 step = 520; 482 step = 520;
441 break; 483 break;
442 } 484 }

mercurial