243 // ------------------------------------------------------------------------ |
243 // ------------------------------------------------------------------------ |
244 |
244 |
245 #define _unexpected_end_msg "unexpected end of statement" |
245 #define _unexpected_end_msg "unexpected end of statement" |
246 #define _invalid_msg "invalid statement" |
246 #define _invalid_msg "invalid statement" |
247 #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)" |
247 #define _unexpected_token "unexpected token (%.*s [->]%.*s %.*s)" |
|
248 #define _missing_quote "missing closing quote symbol (%.*s)" |
248 |
249 |
249 static UcxList* dav_parse_tokenize(sstr_t src) { |
250 static UcxList* dav_parse_tokenize(sstr_t src) { |
250 UcxList *tokens = NULL; |
251 UcxList *tokens = NULL; |
251 |
252 |
252 // Delimiters: whitespace and dead whitespace around commas |
253 // Delimiters: whitespace and dead whitespace around commas |
253 sstr_t *token = NULL; |
254 sstr_t *token = NULL; |
|
255 char insequence = '\0'; |
254 for (size_t i = 0 ; i < src.length ; i++) { |
256 for (size_t i = 0 ; i < src.length ; i++) { |
255 if (isspace(src.ptr[i])) { |
257 // quoted strings / identifiers are a single token |
|
258 if (src.ptr[i] == '\'' || src.ptr[i] == '`') { |
|
259 if (src.ptr[i] == insequence) { |
|
260 // add quoted token to list |
|
261 token->length++; |
|
262 tokens = ucx_list_append(tokens, token); |
|
263 token = NULL; |
|
264 insequence = '\0'; |
|
265 } else if (insequence == '\0') { |
|
266 insequence = src.ptr[i]; |
|
267 // always create new token for quoted strings |
|
268 if (token) { |
|
269 tokens = ucx_list_append(tokens, token); |
|
270 } |
|
271 token = malloc(sizeof(sstr_t)); |
|
272 token->ptr = src.ptr + i; |
|
273 token->length = 1; |
|
274 } else { |
|
275 // add other kind of quotes to token |
|
276 token->length++; |
|
277 } |
|
278 } else if (insequence) { |
|
279 token->length++; |
|
280 } else if (isspace(src.ptr[i])) { |
256 // add token before spaces to list (if any) |
281 // add token before spaces to list (if any) |
257 if (token) { |
282 if (token) { |
258 tokens = ucx_list_append(tokens, token); |
283 tokens = ucx_list_append(tokens, token); |
259 token = NULL; |
284 token = NULL; |
260 } |
285 } |
285 |
310 |
286 if (token) { |
311 if (token) { |
287 tokens = ucx_list_append(tokens, token); |
312 tokens = ucx_list_append(tokens, token); |
288 } |
313 } |
289 |
314 |
290 // now find quotes and backsticks and merge enclosed tokens |
|
291 // TODO: make it so or disable tokenization in such cases in above code |
|
292 |
|
293 return tokens; |
315 return tokens; |
294 } |
316 } |
295 |
317 |
296 #define token_sstr(listelem) ((sstr_t*)(listelem)->data) |
318 #define token_sstr(listelem) ((sstr_t*)(listelem)->data) |
297 static DavQLExpression* dav_parse_expression(UcxList* starttoken, size_t n) { |
319 static DavQLExpression* dav_parse_expression( |
|
320 DavQLStatement* stmt, UcxList* starttoken, size_t n) { |
298 if (n == 0) { |
321 if (n == 0) { |
299 return NULL; |
322 return NULL; |
300 } |
323 } |
301 |
324 |
302 DavQLExpression *expr = calloc(1, sizeof(DavQLExpression)); |
325 DavQLExpression *expr = calloc(1, sizeof(DavQLExpression)); |
306 |
329 |
307 // special case - only one token |
330 // special case - only one token |
308 if (n == 1) { |
331 if (n == 1) { |
309 expr->srctext.length = token_sstr(starttoken)->length; |
332 expr->srctext.length = token_sstr(starttoken)->length; |
310 char firstchar = expr->srctext.ptr[0]; |
333 char firstchar = expr->srctext.ptr[0]; |
|
334 char lastchar = expr->srctext.ptr[expr->srctext.length-1]; |
311 if (firstchar == '\'' || isdigit(firstchar)) { |
335 if (firstchar == '\'' || isdigit(firstchar)) { |
312 expr->type = DAVQL_LITERAL; |
336 expr->type = DAVQL_LITERAL; |
313 } else { |
337 } else { |
314 expr->type = DAVQL_IDENTIFIER; |
338 expr->type = DAVQL_IDENTIFIER; |
|
339 } |
|
340 // remove quotes (if any) |
|
341 if (firstchar == '\'' || firstchar == '`') { |
|
342 if (lastchar != firstchar) { |
|
343 stmt->errorcode = DAVQL_ERROR_MISSING_QUOTE; |
|
344 stmt->errormessage = |
|
345 ucx_sprintf(_missing_quote, sfmtarg(expr->srctext)).ptr; |
|
346 } |
|
347 expr->srctext.ptr++; |
|
348 if (expr->srctext.length > 2) { |
|
349 expr->srctext.length -= 2; |
|
350 } else { |
|
351 expr->srctext.length = 0; |
|
352 } |
315 } |
353 } |
316 } else { |
354 } else { |
317 UcxList* token = starttoken; |
355 UcxList* token = starttoken; |
318 |
356 |
319 // check, if first token is ( |
357 // check, if first token is ( |
400 case 530: |
442 case 530: |
401 if (!sstrcasecmp(tokendata, S("with"))) { |
443 if (!sstrcasecmp(tokendata, S("with"))) { |
402 step = 40; |
444 step = 40; |
403 } else { |
445 } else { |
404 dav_parse_unexpected_token(stmt, token); |
446 dav_parse_unexpected_token(stmt, token); |
405 step = 999; |
447 goto ultrabreak; |
406 } |
448 } |
407 break; |
449 break; |
408 // field list |
450 // field list |
409 case 10: { |
451 case 10: { |
410 _Bool fromkeyword = !sstrcasecmp(tokendata, S("from")); |
452 _Bool fromkeyword = !sstrcasecmp(tokendata, S("from")); |
411 if (fromkeyword || !sstrcmp(tokendata, S(","))) { |
453 if (fromkeyword || !sstrcmp(tokendata, S(","))) { |
412 if (exprstart) { |
454 if (exprstart) { |
413 stmt->fields = ucx_list_append(stmt->fields, |
455 stmt->fields = ucx_list_append(stmt->fields, |
414 dav_parse_expression(exprstart, exprlen)); |
456 dav_parse_expression(stmt, exprstart, exprlen)); |
415 exprstart = NULL; |
457 exprstart = NULL; |
416 exprlen = 0; |
458 exprlen = 0; |
417 } else { |
459 } else { |
418 // TODO: throw syntax error |
460 // TODO: throw syntax error |
419 } |
461 } |