better and better and better iso8601 parser

Sun, 18 Oct 2015 12:44:57 +0200

author
Mike Becker <universe@uap-core.de>
date
Sun, 18 Oct 2015 12:44:57 +0200
changeset 177
3c0734eeab33
parent 176
747f3796eddd
child 178
e137883e620f

better and better and better iso8601 parser

libidav/utils.c file | annotate | diff | comparison | revisions
--- a/libidav/utils.c	Thu Oct 15 19:04:49 2015 +0200
+++ b/libidav/utils.c	Sun Oct 18 12:44:57 2015 +0200
@@ -57,116 +57,102 @@
 #include "crypto.h"
 #include "webdav.h"
 
-static int parse_iso8601(char *str, time_t *result) {
+static size_t extractval(sstr_t str, char *result, char delim) {
+    size_t n = 0;
+    for(size_t i = 0; i < str.length ; i++) {
+        if(isdigit(str.ptr[i])) {
+            result[n++] = str.ptr[i];
+        } else if(str.ptr[i] != delim) {
+            return 0;
+        }
+    }
+    result[n] = '\0';
+    return n;
+}
+
+static int parse_iso8601(char *iso8601str) {
 
     // safety
-    if(!str || !result) {
-        return 1;
+    if(!iso8601str) {
+        return 0;
     }
     
+    // local vars
     struct tm tparts;
     memset(&tparts, 0, sizeof(struct tm));
-    *result = 0;
     long val;
-
-    // skip leading spaces
-    while(isspace(*str)) {
-        str++;
-    }
-
-    // ensure we have numeric values
-    if(!isdigit(*str)) {
-        return 1;
-    }
-
-    // starting parsing the year
-    val = strtoul(str, &str, 10);
+    char conv[16];
     
-    if(*str == '-') {
-        // month (and day) seem to be dash separated
-        tparts.tm_year = val - 1900;
-        str++;
-        tparts.tm_mon = strtoul(str, &str, 10) - 1;
+    // work on the trimmed string
+    sstr_t date = sstrtrim(sstr(iso8601str));
 
-        if(*str++ != '-') {
-            return 1;
-        }
-
-        tparts.tm_mday = strtoul(str, &str, 10);
-    } else {
-        // year, month, day was parsed as one big integer
-        tparts.tm_mday = val % 100;
-        tparts.tm_mon = (val % 10000) / 100 - 1;
-        tparts.tm_year = val / 10000 - 1900;
+    sstr_t time = sstrchr(date, 'T');
+    if(time.length == 0) {
+        return 0;
     }
-
-    // time separator
-    if(*str != 'T') {
-        return 1;
-    }
-    str++;
-
-    // ensure we have numeric values (unsigned)
-    if(!isdigit(*str)) {
-        return 1;
+    date.length = time.ptr - date.ptr;
+    time.ptr++; time.length--;
+    
+    sstr_t tzinfo;
+    if((tzinfo = sstrchr(time, 'Z')).length > 0 ||
+        (tzinfo = sstrchr(time, '+')).length > 0 ||
+        (tzinfo = sstrchr(time, '-')).length > 0) {
+        
+        time.length = tzinfo.ptr - time.ptr;
     }
 
-    // start parsing the hour
-    val = strtoul(str, &str, 10);
-    if(*str == ':') {
-        // minutes (and seconds) are separated by colon
-        tparts.tm_hour = val;
-        str++;
-        tparts.tm_min = strtoul(str, &str, 10);
-
-        if(*str++ != ':') {
-            return 1;
-        }
+    // parse date
+    if((date.length != 8 && date.length != 10)
+            || extractval(date, conv , '-') != 8) {
+        return 0;
+    }
+    val = atol(conv);
+    if(val < 19000000L) {
+        return 0;
+    }
+    tparts.tm_mday = val % 100;
+    tparts.tm_mon = (val % 10000) / 100 - 1;
+    tparts.tm_year = val / 10000 - 1900;
+    
+    // parse time and skip possible fractional seconds
+    sstr_t frac;
+    if((frac = sstrchr(time, '.')).length > 0 ||
+        (frac = sstrchr(time, ',')).length > 0) {
+        time.length = frac.ptr - time.ptr;
+    }
+    if((time.length != 6 && time.length != 8)
+            || extractval(time, conv , ':') != 6) {
+        return 0;
+    }
+    val = atol(conv);
+    tparts.tm_sec = val % 100;
+    tparts.tm_min = (val % 10000) / 100;
+    tparts.tm_hour = val / 10000;
 
-        tparts.tm_sec = strtoul(str, &str, 10);
-    } else {
-        // minutes (and seconds) are one big integer
-        tparts.tm_sec = val % 100;
-        tparts.tm_min = (val % 10000) / 100;
-        tparts.tm_hour = val / 10000;
-    }
-
-    // parse fractional seconds, but skip them (we return a time_t)
-    if(*str == ',' || *str == '.') {
-        do {
-            str++;
-        } while(isdigit(*str));
-    }
 
     // parse time zone (if any)
-    if(*str == 'Z') {
-        str++;
-        *result = mktime(&tparts) - timezone;
-    } else if (*str == '+' || *str == '-') {
-        int sign = (*str == '+') ? -1 : 1;
-
-        val = strtoul(str + 1, &str, 10);
-
-        if (*str == ':') {
-            val = 60 * val + strtoul(str + 1, &str, 10);
-        } else {
-            val = 60 * (val / 100) + (val % 100);
-        }
-
-        *result = mktime(&tparts) - timezone + (time_t) (60 * val * sign);
-    } else {
+    if(tzinfo.length == 0) {
         // local time
         tparts.tm_isdst = -1;
-        *result = mktime(&tparts);
-    }
+        return mktime(&tparts);
+    } else if(!sstrcmp(tzinfo, S("Z"))) {
+        return mktime(&tparts) - timezone;
+    } else if(tzinfo.ptr[0] == '+' || tzinfo.ptr[0] == '-') {
+        int sign = (tzinfo.ptr[0] == '+') ? -1 : 1;
 
-    // skip trailing spaces
-    while(isspace(*str)) {
-        str++;
+        if(tzinfo.length > 6) {
+            return 0;
+        } else {
+            tzinfo.ptr++; tzinfo.length--;
+            extractval(tzinfo, conv, ':');
+            val = atol(conv);
+            val = 60 * (val / 100) + (val % 100);
+
+            return mktime(&tparts) - timezone + (time_t) (60 * val * sign);
+        }
+    } else {
+        return 0;
     }
-
-    // string must be zero terminated, no further characters may follow
-    return *str != '\0'; // return zero on success
 }
 
 
@@ -177,12 +163,7 @@
         return 0;
     }
     
-    time_t result;
-    if(!parse_iso8601(str, &result)) {
-        return result;
-    } else {
-        return 0;
-    }
+    return parse_iso8601(str);
 }
 
 time_t util_parse_lastmodified(char *str) {

mercurial