Sat, 04 Jun 2022 19:27:25 +0200
port uri.cpp to uri.c
src/server/util/uri.c | file | annotate | diff | comparison | revisions | |
src/server/util/uri.cpp | file | annotate | diff | comparison | revisions | |
src/server/util/util.h | file | annotate | diff | comparison | revisions |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/server/util/uri.c Sat Jun 04 19:27:25 2022 +0200 @@ -0,0 +1,556 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * + * THE BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef XP_WIN32 +#define _MBCS +#include <windows.h> +#include <mbctype.h> +#endif + +#include "util.h" +#include "pool.h" +//include "frame/conf_api.h" +//include "support/stringvalue.h" + +#ifdef XP_WIN32 +static PRBool _getfullpathname = -1; +#endif /* XP_WIN32 */ + +/* --------------------------- util_uri_is_evil --------------------------- */ + +static inline int allow_dbcs_uri() +{ + /* + static int flagDbcsUri = -1; + if (flagDbcsUri == -1) { + flagDbcsUri = StringValue::getBoolean(conf_findGlobal("DbcsUri")); + } + return flagDbcsUri; + */ + return PR_FALSE; +} + +#ifdef XP_WIN32 +void set_fullpathname(PRBool b) +{ + _getfullpathname = b; +} +#endif /*XP_WIN32*/ + +NSAPI_PUBLIC int util_uri_is_evil_internal(const char *t, int allow_tilde, int allow_dot_dir) +{ +#ifdef XP_WIN32 + int flagDbcsUri = allow_dbcs_uri(); +#endif // XP_WIN32 + PRBool flagEmptySegment = PR_FALSE; + int x; + + for (x = 0; t[x]; ++x) { + if (t[x] == '/') { + if (flagEmptySegment) + return 1; // "/;a/b" +#ifdef XP_WIN32 + if (t[x+1] == '/' && x != 0) +#else + if (t[x+1] == '/') +#endif + return 1; + if (t[x+1] == ';') + flagEmptySegment = PR_TRUE; // "/;a/b" is evil, "/a/;b" is not + if (t[x+1] == '.') { + /* "." at end of line is always prohibited */ + if (t[x+2] == '\0') + return 1; + + /* "." as a path segment is prohibited conditionally */ + if (!allow_dot_dir && (t[x+2] == '/' || t[x+2] == ';')) + return 1; + + /* ".." as a path segment is always prohibited */ + if (t[x+2] == '.' && (t[x+3] == '/' || t[x+3] == ';' || t[x+3] == '\0')) + return 1; + } + } +#ifdef XP_WIN32 + // Don't allow '~' in the filename. On some filesystems a long name + // (e.g. longfilename.htm) can be accessed using '~' bypassing any ACL + // checks (e.g. longfi~1.htm). + if (!allow_tilde && (t[x] == '~')) { + return 1; + } + + // Do not allow ':' apart from drive letter. Windows filestream + // will treat filename::$DATA as a plain file & display content. + // So block it to prevent source viewing vulnerability. + if ((t[x] == ':') && x > 1) { + return 1; + } + + // On NT, the directory "abc...." is the same as "abc" + // The only cheap way to catch this globally is to disallow + // names with the trailing "."s. Hopefully this is not over + // restrictive. + // Also trailing spaces in names can wreak havoc on ACL checks + // and name resolution. Therefore, ban them on the end of a + // name. + if (((t[x] == '.') || (t[x] == ' ')) && + ((t[x+1] == ';') || (t[x+1] == '/') || (t[x+1] == '\0'))) + { + return 1; + } + + // Skip past the second byte of two byte DBCS characters. Bug 353999 + if (flagDbcsUri && t[x+1] && IsDBCSLeadByte(t[x])) x++; +#endif // XP_WIN32 + } + return 0; +} + +NSAPI_PUBLIC int util_uri_is_evil(const char *t) +{ + return util_uri_is_evil_internal(t, 0, 0); +} + + +/* -------------------- util_uri_unescape_and_normalize -------------------- */ + +#ifdef XP_WIN32 +/* The server calls this function to unescape the URI and also normalize + * the uri. Normalizing the uri converts all "\" characters in the URI + * and pathinfo portion to "/". Does not touch "\" in query strings. + */ +NSAPI_PUBLIC +int util_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized) +{ + if(!(util_uri_unescape_strict(s))) + return 0; + + if (unnormalized) strcpy(unnormalized, s); + + if (_getfullpathname == -1) + _getfullpathname = (_getmbcp() != 0); + + /* Get canonical filename Bugid: 4672869 */ + if(_getfullpathname && strcmp(s, "*") && (*s == '/' ) ) { + char *pzAbsPath = NULL; + int pathlen = 0; + int len = 0; + int ret = 0; + if(!(pzAbsPath = util_canonicalize_uri(pool, s, strlen(s), NULL))) { + //Error canonicalizing; possibly pointing out of docroot + return 0; + } + char *pzPath = (char *)MALLOC(MAX_PATH + 1); /* reserved byte for trailing slash */ + char *pzFilename = NULL; + + /* If required length of the buffer(pzPath) is more than the allocated one i.e. MAX_PATH(neglecting the reserved byte for trailing slash), return BAD REQUEST. This will happen if length of uri is more than the specified uri length(257) for MBCS windows */ + if(!(ret = GetFullPathName(pzAbsPath, MAX_PATH, pzPath, &pzFilename)) || ( ret > MAX_PATH)){ + FREE(pzAbsPath); + FREE(pzPath); + return 0; + } + len = strlen(pzAbsPath); + pathlen = strlen( pzPath ); + + /* GetFullPathName behaves differently in case of WINNT and WIN2K */ + /* o/p string doesn't contain the trailing slash in case of WINNT */ + /* if i/p is /foo/, we get o/p as c:\foo instead of c:\foo\ */ + /* Checking if i/p has trailing slash and o/p doesn't have, then */ + /* adding slash */ + if ( pzAbsPath[len-1] == '/' && pzPath[pathlen-1] != '\\') + strcat( pzPath, "\\"); + FREE(pzAbsPath); + pzFilename = strchr(pzPath, '\\'); + if(!pzFilename) { + FREE(pzPath); + return 0; + } + strcpy(s, pzFilename); + FREE(pzPath); + } + + util_uri_normalize_slashes(s); + + return 1; +} +#endif /* XP_WIN32 */ + + +/* ---------------------- util_uri_normalize_slashes ---------------------- */ + +void util_uri_normalize_slashes(char *s) +{ +#ifdef XP_WIN32 + int flagDbcsUri = allow_dbcs_uri(); + + while (*s) { + if (*s == '\\') { + // Normalize '\\' to '/' + *s = '/'; + } else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) { + // Skip past two byte DBCS characters. Bug 353999 + s++; + } + s++; + } +#endif +} + + +/* --------------------------- util_uri_escape ---------------------------- */ +/* +NSAPI_PUBLIC char *util_uri_escape(char *od, const char *s) +{ + int flagDbcsUri = allow_dbcs_uri(); + char *d; + + if (!od) + od = (char *) MALLOC((strlen(s)*3) + 1); + d = od; + + while (*s) { + if (strchr("% ?#:+&*\"'<>\r\n", *s)) { + util_sprintf(d, "%%%02x", (unsigned char)*s); + ++s; d += 3; + } +#ifdef XP_WIN32 + else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) +#else + // Treat any character with the high bit set as a DBCS lead byte + else if (flagDbcsUri && s[1] && (s[0] & 0x80)) +#endif + { + // Escape the second byte of DBCS characters. The first byte will + // have been escaped already. IE translates all unescaped '\\'s + // into '/'. + // Bug 353999 + util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); + s += 2; d += 6; + } + else if (0x80 & *s) { + util_sprintf(d, "%%%02x", (unsigned char)*s); + ++s; d += 3; + } else { + *d++ = *s++; + } + } + *d = '\0'; + return od; +} +*/ + + +/* --------------------------- util_url_escape ---------------------------- */ +/* +NSAPI_PUBLIC char *util_url_escape(char *od, const char *s) +{ + int flagDbcsUri = allow_dbcs_uri(); + char *d; + + if (!od) + od = (char *) MALLOC((strlen(s)*3) + 1); + d = od; + + while (*s) { + if (strchr("% +*\"'<>\r\n", *s)) { + util_sprintf(d, "%%%02x", (unsigned char)*s); + ++s; d += 3; + } +#ifdef XP_WIN32 + else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) +#else + // Treat any character with the high bit set as a DBCS lead byte + else if (flagDbcsUri && s[1] && (s[0] & 0x80)) +#endif + { + // Escape the second byte of DBCS characters. The first byte will + // have been escaped already. IE translates all unescaped '\\'s + // into '/'. + // Bug 353999 + util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); + s += 2; d += 6; + } + else if (0x80 & *s) { + util_sprintf(d, "%%%02x", (unsigned char)*s); + ++s; d += 3; + } else { + *d++ = *s++; + } + } + *d = '\0'; + return od; +} +*/ + +/* ------------------------- util_uri_strip_params ------------------------- */ + +NSAPI_PUBLIC char* util_uri_strip_params(char *uri) +{ + // As per RFC2396, URI path segments can contain parameters beginning with + // ';'. These parameters must be removed from the ppath. Bug 418271 + char* out; + if((out = strchr(uri, ';'))) { + char* in = out; + while (*in) { + if (*in == ';') { + // Skip past parameter + do in++; while (*in && *in != '/'); + } else { + // Copy non-parameter path data + *out++ = *in++; + } + } + *out = 0; + } + return uri; +} + + +/* ------------------------ util_canonicalize_uri ------------------------- */ + +/* + * rewrite rules: + * // -> '/' + * /./ -> '/' + * /.\0 -> '/' + * /foo/../ -> '/' + * /foo/..\0 -> '/' + * + * Allocate a new string, as otherwise replacing in-line would impact the + * RequestURI, i.e. original URI in the request. + * Some guidelines in: http://www.ietf.org/rfc/rfc2396.txt + * Uniform Resource Identifiers (URI): Generic Syntax + */ +NSAPI_PUBLIC char* util_canonicalize_uri(pool_handle_t *pool, const char *uri, int len, int *pcanonlen) +{ + PRBool success = PR_TRUE; + const char *in_ptr = uri; + int in = 0; + int in_len = len; + + //PR_ASSERT(uri != NULL); // TODO + + char* canonPath = (char *)pool_malloc(pool, in_len+1); + char* out_ptr = canonPath; + + if (!canonPath) { + success = PR_FALSE; + goto done; + } + + + /* in goes from 0 .. sURIPath.len-1; out_ptr points to + * space where next char from input would be copied to + */ + while (in < in_len) { + + /* If the character isn't '/' then copy it out and move on*/ + if (in_ptr[0] != '/') { + *out_ptr++ = *in_ptr++; + in++; + continue; + } + + /* found '/' and reached end of sURIPath, done */ + if (in+1 >= in_len) { + *out_ptr++ = *in_ptr++; + in++; + break; + } + + /* we have '/' and there are more chars in the string */ + switch(in_ptr[1]) { + case '/': + /* '//' => '/' */ + in_ptr++; + in++; + break; + + case '.': + /* we have "/." so far */ + if (in+2 >= in_len) { + /* the string ends after this; basically ignore '.' + * make sure the ending / is transferred to output. + */ + *out_ptr++ = *in_ptr++; + goto done; + } + + /* more chars after "/."; see if it is a '/' */ + if (in_ptr[2] == '/') { + /* in deed, compact "/./" => "/"; */ + in_ptr += 2; + in += 2; + break; + } + + if (in_ptr[2] != '.') { + /* "/.x" where x is not '.'; copy as is */ + *out_ptr++ = *in_ptr++; + in++; + break; + } + + /* we have "/.." so far. see if we have either string + * ending after this or '/' following. + */ + if (in+3 < in_len && in_ptr[3] != '/' && in_ptr[3] != ';') { + /* we have "/..x" here; so copy as is */ + *out_ptr++ = *in_ptr++; + in++; + } + else { + /* we have "foo/../" or "foo/.." at the end; */ + if (out_ptr == canonPath) { + /* oops, we found "/../" pointing out of docroot */ + success = PR_FALSE; + goto done; + } + + /* remove the previous segment in the output */ + for (out_ptr--; + out_ptr != canonPath && out_ptr[0] != '/'; + out_ptr--); /* Empty Loop */ + + /* point to '/' if the last segment ended with .. then + * leave the '/' before the previous segment. + */ + if(in+3 == in_len) + out_ptr++; + + /* skip the input as well */ + in_ptr += 3; + in += 3; + } + break; + + default: + /* If we already have '/' at out_ptr we donot need to copy */ + if (out_ptr == canonPath || *(out_ptr-1) != '/') + *out_ptr++ = *in_ptr; + in_ptr++; in++; + break; + } + } + +done: + int canonLen = 0; + + if (success) { + /* the path looks fine; return the canonicalized form */ + canonLen = (int) (out_ptr - canonPath); + canonPath[canonLen] = '\0'; + } else { + /* error canonicalizing */ + pool_free(pool, canonPath); + canonPath = NULL; + } + + if (pcanonlen) + *pcanonlen = canonLen; + + return canonPath; +} + + +/* ---------------------- util_canonicalize_redirect ---------------------- */ + +NSAPI_PUBLIC char* util_canonicalize_redirect(pool_handle_t *pool, const char *baseUri, const char *newUri) +{ + //PR_ASSERT(baseUri != NULL); // TODO + + if (*newUri == '/') + return util_canonicalize_uri(pool, newUri, strlen(newUri), NULL); + + int bLen = strlen(baseUri); + if (bLen > 0 && baseUri[bLen - 1] != '/') { + while (bLen > 0 && baseUri[bLen - 1] != '/') + bLen--; + } + + int pLen = strlen(newUri) + bLen + 1; // 1 for slash + char *pUri = (char *)pool_malloc(pool, pLen + 1); + if (!pUri) + return PR_FALSE; + + memcpy(pUri, baseUri, bLen); + pUri[bLen] = '/'; + strcpy(pUri + bLen + 1, newUri); + + char *rval = util_canonicalize_uri(pool, pUri, pLen, NULL); + pool_free(pool, pUri); + + return rval; +} + + +/* ------------------------ util_host_port_suffix ------------------------- */ + +NSAPI_PUBLIC const char *util_host_port_suffix(const char *h) +{ + /* Return a pointer to the colon preceding the port number in a hostname. + * + * util_host_port_suffix("foo.com:80") = ":80" + * util_host_port_suffix("foo.com") = NULL + * util_host_port_suffix("[::]:80") = ":80" + * util_host_port_suffix("[::]") = NULL + */ + + if (h == NULL) + return h; + + for (;;) { + /* Find end of host, beginning of ":port", or an IPv6 address */ + for (;;) { + char c = *h; + + if (c == '\0') + return NULL; /* end of host, no port found */ + + if (c == '/') + return NULL; /* end of host, no port found */ + + if (c == ':') + return h; /* found port */ + + if (c == '[') + break; /* skip IPv6 address */ + + h++; + } + + /* Skip IPv6 address */ + while (*h != '\0' && *h != ']') + h++; + } +}
--- a/src/server/util/uri.cpp Tue Aug 13 22:14:32 2019 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,561 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * - * THE BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * Neither the name of the nor the names of its contributors may be - * used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef XP_WIN32 -#define _MBCS -#include <windows.h> -#include <mbctype.h> -#endif - -#include "util.h" -#include "pool.h" -//include "frame/conf_api.h" -//include "support/stringvalue.h" - -#ifdef XP_WIN32 -static PRBool _getfullpathname = -1; -#endif /* XP_WIN32 */ - -/* --------------------------- util_uri_is_evil --------------------------- */ - -static inline int allow_dbcs_uri() -{ - /* - static int flagDbcsUri = -1; - if (flagDbcsUri == -1) { - flagDbcsUri = StringValue::getBoolean(conf_findGlobal("DbcsUri")); - } - return flagDbcsUri; - */ - return PR_FALSE; -} - -#ifdef XP_WIN32 -void set_fullpathname(PRBool b) -{ - _getfullpathname = b; -} -#endif /*XP_WIN32*/ - -NSAPI_PUBLIC int util_uri_is_evil_internal(const char *t, int allow_tilde, int allow_dot_dir) -{ -#ifdef XP_WIN32 - int flagDbcsUri = allow_dbcs_uri(); -#endif // XP_WIN32 - PRBool flagEmptySegment = PR_FALSE; - register int x; - - for (x = 0; t[x]; ++x) { - if (t[x] == '/') { - if (flagEmptySegment) - return 1; // "/;a/b" -#ifdef XP_WIN32 - if (t[x+1] == '/' && x != 0) -#else - if (t[x+1] == '/') -#endif - return 1; - if (t[x+1] == ';') - flagEmptySegment = PR_TRUE; // "/;a/b" is evil, "/a/;b" is not - if (t[x+1] == '.') { - /* "." at end of line is always prohibited */ - if (t[x+2] == '\0') - return 1; - - /* "." as a path segment is prohibited conditionally */ - if (!allow_dot_dir && (t[x+2] == '/' || t[x+2] == ';')) - return 1; - - /* ".." as a path segment is always prohibited */ - if (t[x+2] == '.' && (t[x+3] == '/' || t[x+3] == ';' || t[x+3] == '\0')) - return 1; - } - } -#ifdef XP_WIN32 - // Don't allow '~' in the filename. On some filesystems a long name - // (e.g. longfilename.htm) can be accessed using '~' bypassing any ACL - // checks (e.g. longfi~1.htm). - if (!allow_tilde && (t[x] == '~')) { - return 1; - } - - // Do not allow ':' apart from drive letter. Windows filestream - // will treat filename::$DATA as a plain file & display content. - // So block it to prevent source viewing vulnerability. - if ((t[x] == ':') && x > 1) { - return 1; - } - - // On NT, the directory "abc...." is the same as "abc" - // The only cheap way to catch this globally is to disallow - // names with the trailing "."s. Hopefully this is not over - // restrictive. - // Also trailing spaces in names can wreak havoc on ACL checks - // and name resolution. Therefore, ban them on the end of a - // name. - if (((t[x] == '.') || (t[x] == ' ')) && - ((t[x+1] == ';') || (t[x+1] == '/') || (t[x+1] == '\0'))) - { - return 1; - } - - // Skip past the second byte of two byte DBCS characters. Bug 353999 - if (flagDbcsUri && t[x+1] && IsDBCSLeadByte(t[x])) x++; -#endif // XP_WIN32 - } - return 0; -} - -NSAPI_PUBLIC int util_uri_is_evil(const char *t) -{ - return util_uri_is_evil_internal(t, 0, 0); -} - - -/* -------------------- util_uri_unescape_and_normalize -------------------- */ - -#ifdef XP_WIN32 -/* The server calls this function to unescape the URI and also normalize - * the uri. Normalizing the uri converts all "\" characters in the URI - * and pathinfo portion to "/". Does not touch "\" in query strings. - */ -NSAPI_PUBLIC -int util_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized) -{ - if(!(util_uri_unescape_strict(s))) - return 0; - - if (unnormalized) strcpy(unnormalized, s); - - if (_getfullpathname == -1) - _getfullpathname = (_getmbcp() != 0); - - /* Get canonical filename Bugid: 4672869 */ - if(_getfullpathname && strcmp(s, "*") && (*s == '/' ) ) { - char *pzAbsPath = NULL; - int pathlen = 0; - int len = 0; - int ret = 0; - if(!(pzAbsPath = util_canonicalize_uri(pool, s, strlen(s), NULL))) { - //Error canonicalizing; possibly pointing out of docroot - return 0; - } - char *pzPath = (char *)MALLOC(MAX_PATH + 1); /* reserved byte for trailing slash */ - char *pzFilename = NULL; - - /* If required length of the buffer(pzPath) is more than the allocated one i.e. MAX_PATH(neglecting the reserved byte for trailing slash), return BAD REQUEST. This will happen if length of uri is more than the specified uri length(257) for MBCS windows */ - if(!(ret = GetFullPathName(pzAbsPath, MAX_PATH, pzPath, &pzFilename)) || ( ret > MAX_PATH)){ - FREE(pzAbsPath); - FREE(pzPath); - return 0; - } - len = strlen(pzAbsPath); - pathlen = strlen( pzPath ); - - /* GetFullPathName behaves differently in case of WINNT and WIN2K */ - /* o/p string doesn't contain the trailing slash in case of WINNT */ - /* if i/p is /foo/, we get o/p as c:\foo instead of c:\foo\ */ - /* Checking if i/p has trailing slash and o/p doesn't have, then */ - /* adding slash */ - if ( pzAbsPath[len-1] == '/' && pzPath[pathlen-1] != '\\') - strcat( pzPath, "\\"); - FREE(pzAbsPath); - pzFilename = strchr(pzPath, '\\'); - if(!pzFilename) { - FREE(pzPath); - return 0; - } - strcpy(s, pzFilename); - FREE(pzPath); - } - - util_uri_normalize_slashes(s); - - return 1; -} -#endif /* XP_WIN32 */ - - -/* ---------------------- util_uri_normalize_slashes ---------------------- */ - -void util_uri_normalize_slashes(char *s) -{ -#ifdef XP_WIN32 - int flagDbcsUri = allow_dbcs_uri(); - - while (*s) { - if (*s == '\\') { - // Normalize '\\' to '/' - *s = '/'; - } else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) { - // Skip past two byte DBCS characters. Bug 353999 - s++; - } - s++; - } -#endif -} - - -/* --------------------------- util_uri_escape ---------------------------- */ -/* -NSAPI_PUBLIC char *util_uri_escape(char *od, const char *s) -{ - int flagDbcsUri = allow_dbcs_uri(); - char *d; - - if (!od) - od = (char *) MALLOC((strlen(s)*3) + 1); - d = od; - - while (*s) { - if (strchr("% ?#:+&*\"'<>\r\n", *s)) { - util_sprintf(d, "%%%02x", (unsigned char)*s); - ++s; d += 3; - } -#ifdef XP_WIN32 - else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) -#else - // Treat any character with the high bit set as a DBCS lead byte - else if (flagDbcsUri && s[1] && (s[0] & 0x80)) -#endif - { - // Escape the second byte of DBCS characters. The first byte will - // have been escaped already. IE translates all unescaped '\\'s - // into '/'. - // Bug 353999 - util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); - s += 2; d += 6; - } - else if (0x80 & *s) { - util_sprintf(d, "%%%02x", (unsigned char)*s); - ++s; d += 3; - } else { - *d++ = *s++; - } - } - *d = '\0'; - return od; -} -*/ - - -/* --------------------------- util_url_escape ---------------------------- */ -/* -NSAPI_PUBLIC char *util_url_escape(char *od, const char *s) -{ - int flagDbcsUri = allow_dbcs_uri(); - char *d; - - if (!od) - od = (char *) MALLOC((strlen(s)*3) + 1); - d = od; - - while (*s) { - if (strchr("% +*\"'<>\r\n", *s)) { - util_sprintf(d, "%%%02x", (unsigned char)*s); - ++s; d += 3; - } -#ifdef XP_WIN32 - else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) -#else - // Treat any character with the high bit set as a DBCS lead byte - else if (flagDbcsUri && s[1] && (s[0] & 0x80)) -#endif - { - // Escape the second byte of DBCS characters. The first byte will - // have been escaped already. IE translates all unescaped '\\'s - // into '/'. - // Bug 353999 - util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); - s += 2; d += 6; - } - else if (0x80 & *s) { - util_sprintf(d, "%%%02x", (unsigned char)*s); - ++s; d += 3; - } else { - *d++ = *s++; - } - } - *d = '\0'; - return od; -} -*/ - -/* ------------------------- util_uri_strip_params ------------------------- */ - -NSAPI_PUBLIC char* util_uri_strip_params(char *uri) -{ - // As per RFC2396, URI path segments can contain parameters beginning with - // ';'. These parameters must be removed from the ppath. Bug 418271 - char* out; - if((out = strchr(uri, ';'))) { - char* in = out; - while (*in) { - if (*in == ';') { - // Skip past parameter - do in++; while (*in && *in != '/'); - } else { - // Copy non-parameter path data - *out++ = *in++; - } - } - *out = 0; - } - return uri; -} - - -/* ------------------------ util_canonicalize_uri ------------------------- */ - -/* - * rewrite rules: - * // -> '/' - * /./ -> '/' - * /.\0 -> '/' - * /foo/../ -> '/' - * /foo/..\0 -> '/' - * - * Allocate a new string, as otherwise replacing in-line would impact the - * RequestURI, i.e. original URI in the request. - * Some guidelines in: http://www.ietf.org/rfc/rfc2396.txt - * Uniform Resource Identifiers (URI): Generic Syntax - */ -NSAPI_PUBLIC char* util_canonicalize_uri(pool_handle_t *pool, const char *uri, int len, int *pcanonlen) -{ - PRBool success = PR_TRUE; - const char *in_ptr = uri; - int in = 0; - int in_len = len; - - //PR_ASSERT(uri != NULL); // TODO - - char* canonPath = (char *)pool_malloc(pool, in_len+1); - char* out_ptr = canonPath; - - if (!canonPath) { - success = PR_FALSE; - goto done; - } - - - /* in goes from 0 .. sURIPath.len-1; out_ptr points to - * space where next char from input would be copied to - */ - while (in < in_len) { - - /* If the character isn't '/' then copy it out and move on*/ - if (in_ptr[0] != '/') { - *out_ptr++ = *in_ptr++; - in++; - continue; - } - - /* found '/' and reached end of sURIPath, done */ - if (in+1 >= in_len) { - *out_ptr++ = *in_ptr++; - in++; - break; - } - - /* we have '/' and there are more chars in the string */ - switch(in_ptr[1]) { - case '/': - /* '//' => '/' */ - in_ptr++; - in++; - break; - - case '.': - /* we have "/." so far */ - if (in+2 >= in_len) { - /* the string ends after this; basically ignore '.' - * make sure the ending / is transferred to output. - */ - *out_ptr++ = *in_ptr++; - goto done; - } - - /* more chars after "/."; see if it is a '/' */ - if (in_ptr[2] == '/') { - /* in deed, compact "/./" => "/"; */ - in_ptr += 2; - in += 2; - break; - } - - if (in_ptr[2] != '.') { - /* "/.x" where x is not '.'; copy as is */ - *out_ptr++ = *in_ptr++; - in++; - break; - } - - /* we have "/.." so far. see if we have either string - * ending after this or '/' following. - */ - if (in+3 < in_len && in_ptr[3] != '/' && in_ptr[3] != ';') { - /* we have "/..x" here; so copy as is */ - *out_ptr++ = *in_ptr++; - in++; - } - else { - /* we have "foo/../" or "foo/.." at the end; */ - if (out_ptr == canonPath) { - /* oops, we found "/../" pointing out of docroot */ - success = PR_FALSE; - goto done; - } - - /* remove the previous segment in the output */ - for (out_ptr--; - out_ptr != canonPath && out_ptr[0] != '/'; - out_ptr--); /* Empty Loop */ - - /* point to '/' if the last segment ended with .. then - * leave the '/' before the previous segment. - */ - if(in+3 == in_len) - out_ptr++; - - /* skip the input as well */ - in_ptr += 3; - in += 3; - } - break; - - default: - /* If we already have '/' at out_ptr we donot need to copy */ - if (out_ptr == canonPath || *(out_ptr-1) != '/') - *out_ptr++ = *in_ptr; - in_ptr++; in++; - break; - } - } - -done: - int canonLen = 0; - - if (success) { - /* the path looks fine; return the canonicalized form */ - canonLen = out_ptr - canonPath; - canonPath[canonLen] = '\0'; - } else { - /* error canonicalizing */ - pool_free(pool, canonPath); - canonPath = NULL; - } - - if (pcanonlen) - *pcanonlen = canonLen; - - return canonPath; -} - - -/* ---------------------- util_canonicalize_redirect ---------------------- */ - -NSAPI_PUBLIC char* util_canonicalize_redirect(pool_handle_t *pool, const char *baseUri, const char *newUri) -{ - //PR_ASSERT(baseUri != NULL); // TODO - - if (*newUri == '/') - return util_canonicalize_uri(pool, newUri, strlen(newUri), NULL); - - int bLen = strlen(baseUri); - if (bLen > 0 && baseUri[bLen - 1] != '/') { - while (bLen > 0 && baseUri[bLen - 1] != '/') - bLen--; - } - - int pLen = strlen(newUri) + bLen + 1; // 1 for slash - char *pUri = (char *)pool_malloc(pool, pLen + 1); - if (!pUri) - return PR_FALSE; - - memcpy(pUri, baseUri, bLen); - pUri[bLen] = '/'; - strcpy(pUri + bLen + 1, newUri); - - char *rval = util_canonicalize_uri(pool, pUri, pLen, NULL); - pool_free(pool, pUri); - - return rval; -} - - -/* ------------------------ util_host_port_suffix ------------------------- */ - -NSAPI_PUBLIC char *util_host_port_suffix(char *h) -{ - return (char *)util_host_port_suffix((const char *)h); -} - -const char *util_host_port_suffix(const char *h) -{ - /* Return a pointer to the colon preceding the port number in a hostname. - * - * util_host_port_suffix("foo.com:80") = ":80" - * util_host_port_suffix("foo.com") = NULL - * util_host_port_suffix("[::]:80") = ":80" - * util_host_port_suffix("[::]") = NULL - */ - - if (h == NULL) - return h; - - for (;;) { - /* Find end of host, beginning of ":port", or an IPv6 address */ - for (;;) { - register char c = *h; - - if (c == '\0') - return NULL; /* end of host, no port found */ - - if (c == '/') - return NULL; /* end of host, no port found */ - - if (c == ':') - return h; /* found port */ - - if (c == '[') - break; /* skip IPv6 address */ - - h++; - } - - /* Skip IPv6 address */ - while (*h != '\0' && *h != ']') - h++; - } -}