add util_parse_uri default tip

Sat, 21 Feb 2026 19:36:15 +0100

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Sat, 21 Feb 2026 19:36:15 +0100
changeset 693
b26cae13b79a
parent 692
32faa1d6a744

add util_parse_uri

src/server/test/main.c file | annotate | diff | comparison | revisions
src/server/test/uri.c file | annotate | diff | comparison | revisions
src/server/test/uri.h file | annotate | diff | comparison | revisions
src/server/util/util.c file | annotate | diff | comparison | revisions
src/server/util/util.h file | annotate | diff | comparison | revisions
--- a/src/server/test/main.c	Sat Feb 21 14:40:03 2026 +0100
+++ b/src/server/test/main.c	Sat Feb 21 19:36:15 2026 +0100
@@ -85,6 +85,8 @@
     cx_test_register(suite, test_util_uri_escape_space);
     cx_test_register(suite, test_util_uri_escape_latin);
     cx_test_register(suite, test_util_uri_escape_kanji);
+    cx_test_register(suite, test_util_parse_uri);
+    cx_test_register(suite, test_util_parse_uri_error);
     cx_test_register(suite, test_pblock_iterator);
     
     // httpparser tests
--- a/src/server/test/uri.c	Sat Feb 21 14:40:03 2026 +0100
+++ b/src/server/test/uri.c	Sat Feb 21 19:36:15 2026 +0100
@@ -106,3 +106,87 @@
     }
 }
 
+CX_TEST(test_util_parse_uri) {
+    CX_TEST_DO {
+        WSUri uri;
+        CX_TEST_ASSERT(util_parse_uri("http://example.com", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "example.com"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "http"));
+        CX_TEST_ASSERT(uri.pathlen == 0);
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTP);
+        CX_TEST_ASSERT(uri.port == 80);
+        
+        CX_TEST_ASSERT(util_parse_uri("https://unixwork.de/path/", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "unixwork.de"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "https"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), "/path/"));
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTPS);
+        CX_TEST_ASSERT(uri.port == 443);
+        
+        CX_TEST_ASSERT(util_parse_uri("https://code.unixwork.de/", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "code.unixwork.de"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "https"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), "/"));
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTPS);
+        
+        CX_TEST_ASSERT(util_parse_uri("http://pkg.unixwork.de:8080", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "pkg.unixwork.de"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "http"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), ""));
+        CX_TEST_ASSERT(uri.port == 8080);
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTP);
+        
+        CX_TEST_ASSERT(util_parse_uri("https://pkg.unixwork.eu:8443/", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "pkg.unixwork.eu"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "https"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), "/"));
+        CX_TEST_ASSERT(uri.port == 8443);
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTPS);
+        
+        CX_TEST_ASSERT(util_parse_uri("http://[::1]", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "::1"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "http"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), ""));
+        CX_TEST_ASSERT(uri.port == 80);
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTP);
+        
+        CX_TEST_ASSERT(util_parse_uri("http://[fe80::1ff:fe23:4567:890a]:8081", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "fe80::1ff:fe23:4567:890a"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "http"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), ""));
+        CX_TEST_ASSERT(uri.port == 8081);
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTP);
+        
+        CX_TEST_ASSERT(util_parse_uri("http://[::ffff:0.0.0.0]:8082/ipv6/path/", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "::ffff:0.0.0.0"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "http"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), "/ipv6/path/"));
+        CX_TEST_ASSERT(uri.port == 8082);
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTP);
+        
+        CX_TEST_ASSERT(util_parse_uri("http://[::ffff:0.0.0.1]/ipv6/without/port/", &uri));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.host, uri.hostlen), "::ffff:0.0.0.1"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.scheme, uri.schemelen), "http"));
+        CX_TEST_ASSERT(!cx_strcmp(cx_strn(uri.path, uri.pathlen), "/ipv6/without/port/"));
+        CX_TEST_ASSERT(uri.port == 80);
+        CX_TEST_ASSERT(uri.scheme_num == WS_URI_HTTP);
+    }
+}
+
+CX_TEST(test_util_parse_uri_error) {
+    CX_TEST_DO {
+        WSUri uri;
+        CX_TEST_ASSERT(!util_parse_uri("", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("https://", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://host:invalidport", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://host:01:02", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http:///", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://[::1", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://[::1]test", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://[:[:1]", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://[hello-world]", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://host[]", &uri));
+        CX_TEST_ASSERT(!util_parse_uri("http://localhost:9999999", &uri));
+    }
+}
--- a/src/server/test/uri.h	Sat Feb 21 14:40:03 2026 +0100
+++ b/src/server/test/uri.h	Sat Feb 21 19:36:15 2026 +0100
@@ -43,6 +43,9 @@
 CX_TEST(test_util_uri_escape_latin);
 CX_TEST(test_util_uri_escape_kanji);
 
+CX_TEST(test_util_parse_uri);
+CX_TEST(test_util_parse_uri_error);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/server/util/util.c	Sat Feb 21 14:40:03 2026 +0100
+++ b/src/server/util/util.c	Sat Feb 21 19:36:15 2026 +0100
@@ -1173,3 +1173,132 @@
     }
     return 0;
 }
+
+/* -------------------------- util_parse_http_uri -------------------------- */
+
+NSAPI_PUBLIC int util_parse_uri(const char *uri, WSUri *result) {
+    if(!uri) {
+        return 0;
+    }
+    return util_parse_uri_len(uri, strlen(uri), result);
+}
+ 
+NSAPI_PUBLIC int util_parse_uri_len(const char *uri, size_t length, WSUri *result) {
+    if(length == 0) {
+        return 0;
+    }
+    
+    memset(result, 0, sizeof(WSUri));
+    
+    // get uri scheme
+    int scheme = -1;
+    const char *scheme_str = uri;
+    size_t scheme_len = length;
+    short port = 0;
+    size_t i = 0;
+    for(;i<length;i++) {
+        if(uri[i] == ':') {
+            if(i+3 < length && uri[i+1] == '/' && uri[i+2] == '/') {
+                cxstring scheme_s = cx_strn(uri, i);
+                scheme_len = i;
+                if(!cx_strcmp(scheme_s, "http")) {
+                    scheme = WS_URI_HTTP;
+                    port = 80;
+                } else if(!cx_strcmp(scheme_s, "https")) {
+                    scheme = WS_URI_HTTPS;
+                    port = 443;
+                } else {
+                    scheme = WS_URI_OTHER;
+                }
+                i += 3;
+                break;
+            } else {
+                return 0; // invalid uri
+            }
+        }
+    }
+    result->scheme_num = scheme;
+    result->scheme = scheme_str;
+    result->schemelen = scheme_len;
+    
+    if(i == length) {
+        return 0; // invlid uri
+    }
+    
+    // get host
+    size_t hostlen = 0;
+    const char *port_str = NULL;
+    const char *path = NULL;
+    int is_ipv6_addr = 0;
+    size_t host_start = i;
+    if(uri[i] == '[') {
+        host_start++;
+        is_ipv6_addr = 1;
+        i++;
+    }
+    for(;i<length;i++) {
+        char c = uri[i];
+        if(is_ipv6_addr) {
+            if(c == ']') {
+                // end of ipv6 address
+                result->host = uri + host_start;
+                result->hostlen = i - host_start;
+                is_ipv6_addr = 0;
+                
+                // the next character must be a port or path separator
+                if(i+1 < length) {
+                    c = uri[i+1];
+                    if(c != '/' && c != ':') {
+                        return 0; // invalid uri
+                    }
+                } 
+            } else if(c != ':' && c != '.' && c != '%' && !isalnum(c)) {
+                return 0; // invalid ipv6 address
+            }
+        } else if(c == ':') {
+            if(port_str) {
+                return 0; // error: port was already specified
+            }
+            if(!result->host) {
+                result->host = uri + host_start;
+                result->hostlen = i - host_start;
+            }
+            // get port
+            port_str = uri + i + 1;
+        } else if(c == '/') {
+            path = uri + i;
+            break;
+        } else if(c == '[' || c == ']') {
+            return 0; // invalid hostname
+        }
+    }
+    if(is_ipv6_addr) {
+        return 0; // ipv6 address was not terminated
+    }
+    
+    if(port_str) {
+        size_t port_len = uri + i - port_str;
+        cxstring port_s = cx_strn(port_str, port_len);
+        if(cx_strtos(port_s, &port, 10)) {
+            return 0; // invalid port
+        }
+    }
+    if(!result->host) {
+        result->host = uri + host_start;
+        result->hostlen = i - host_start;
+    }
+    if(result->hostlen == 0) {
+        return 0;
+    }
+    
+    size_t pathlen = 0;
+    if(path) {
+        pathlen = uri + length - path;
+    }
+    
+    result->path = path;
+    result->pathlen = pathlen;
+    result->port = port;
+    
+    return 1;
+}
--- a/src/server/util/util.h	Sat Feb 21 14:40:03 2026 +0100
+++ b/src/server/util/util.h	Sat Feb 21 19:36:15 2026 +0100
@@ -129,6 +129,29 @@
 
 NSAPI_PUBLIC void INTutil_uri_parse(char *uri);
 
+// new util_parse_uri
+enum WSUriScheme {
+    WS_URI_OTHER = 0,
+    WS_URI_HTTP,
+    WS_URI_HTTPS
+};
+
+typedef struct WSUri {
+    const char *scheme;
+    size_t schemelen;
+    const char *host;
+    size_t hostlen;
+    const char *path;
+    size_t pathlen;
+    enum WSUriScheme scheme_num;
+    short port;
+} WSUri;
+
+NSAPI_PUBLIC int util_parse_uri(const char *uri, WSUri *result);
+NSAPI_PUBLIC int util_parse_uri_len(const char *uri, size_t length, WSUri *result);
+// end util_parse_uri
+
+
 #ifdef XP_WIN32
 NSAPI_PUBLIC int INTutil_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized);
 #endif /* XP_WIN32 */

mercurial