src/server/util/uri.c

Wed, 27 Nov 2024 23:00:07 +0100

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Wed, 27 Nov 2024 23:00:07 +0100
changeset 563
6ca97c99173e
parent 386
b91f8efadb63
permissions
-rw-r--r--

add TODO to use a future ucx feature

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
 *
 * THE BSD LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * Neither the name of the  nor the names of its contributors may be
 * used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifdef XP_WIN32
#define _MBCS
#include <windows.h>
#include <mbctype.h>
#endif

#include "util.h"
#include "pool.h"
//include "frame/conf_api.h"
//include "support/stringvalue.h"

#ifdef XP_WIN32
static PRBool _getfullpathname = -1;
#endif /* XP_WIN32 */

/* --------------------------- util_uri_is_evil --------------------------- */

static inline int allow_dbcs_uri()
{
    /*
    static int flagDbcsUri = -1;
    if (flagDbcsUri == -1) {
        flagDbcsUri = StringValue::getBoolean(conf_findGlobal("DbcsUri"));
    }
    return flagDbcsUri;
    */
    return PR_TRUE;
}

#ifdef XP_WIN32
void set_fullpathname(PRBool b)
{
    _getfullpathname = b;
}
#endif  /*XP_WIN32*/

NSAPI_PUBLIC int util_uri_is_evil_internal(const char *t, int allow_tilde, int allow_dot_dir)
{
#ifdef XP_WIN32
    int flagDbcsUri = allow_dbcs_uri();
#endif // XP_WIN32
    PRBool flagEmptySegment = PR_FALSE;
    int x;

    for (x = 0; t[x]; ++x) {
        if (t[x] == '/') {
            if (flagEmptySegment)
                return 1; // "/;a/b"
#ifdef XP_WIN32
            if (t[x+1] == '/' && x != 0)
#else
            if (t[x+1] == '/')
#endif
                return 1;
            if (t[x+1] == ';')
                flagEmptySegment = PR_TRUE; // "/;a/b" is evil, "/a/;b" is not
            if (t[x+1] == '.') {
                /* "." at end of line is always prohibited */
                if (t[x+2] == '\0')
                    return 1;

                /* "." as a path segment is prohibited conditionally */
                if (!allow_dot_dir && (t[x+2] == '/' || t[x+2] == ';'))
                    return 1;

                /* ".." as a path segment is always prohibited */
                if (t[x+2] == '.' && (t[x+3] == '/' || t[x+3] == ';' || t[x+3] == '\0'))
                    return 1;
            }
        }
#ifdef XP_WIN32
        // Don't allow '~' in the filename.  On some filesystems a long name
        // (e.g. longfilename.htm) can be accessed using '~' bypassing any ACL
        // checks (e.g. longfi~1.htm).
        if (!allow_tilde && (t[x] == '~')) {
            return 1;
        }

        // Do not allow ':' apart from drive letter. Windows filestream
        // will treat filename::$DATA as a plain file & display content.
        // So block it to prevent source viewing vulnerability.
        if ((t[x] == ':') && x > 1) {
            return 1;
        }

        // On NT, the directory "abc...." is the same as "abc"
        // The only cheap way to catch this globally is to disallow
        // names with the trailing "."s.  Hopefully this is not over
        // restrictive.
        // Also trailing spaces in names can wreak havoc on ACL checks
        // and name resolution.  Therefore, ban them on the end of a
        // name.
        if (((t[x] == '.') || (t[x] == ' ')) &&
            ((t[x+1] == ';') || (t[x+1] == '/') || (t[x+1] == '\0')))
        {
            return 1;
        }

        // Skip past the second byte of two byte DBCS characters.  Bug 353999
        if (flagDbcsUri && t[x+1] && IsDBCSLeadByte(t[x])) x++;
#endif // XP_WIN32
    }
    return 0;
}

NSAPI_PUBLIC int util_uri_is_evil(const char *t)
{
    return util_uri_is_evil_internal(t, 0, 0);
}


/* -------------------- util_uri_unescape_and_normalize -------------------- */

#ifdef XP_WIN32
/* The server calls this function to unescape the URI and also normalize
 * the uri.  Normalizing the uri converts all "\" characters in the URI
 * and pathinfo portion to "/".  Does not touch "\" in query strings.
 */
NSAPI_PUBLIC
int util_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized)
{
    if(!(util_uri_unescape_strict(s)))
        return 0;

    if (unnormalized) strcpy(unnormalized, s);

    if (_getfullpathname == -1)
        _getfullpathname = (_getmbcp() != 0);

    /* Get canonical filename Bugid: 4672869 */
    if(_getfullpathname && strcmp(s, "*") && (*s == '/' ) ) {
        char *pzAbsPath = NULL;
        int pathlen = 0;
        int len = 0;
        int ret = 0;
        if(!(pzAbsPath = util_canonicalize_uri(pool, s, strlen(s), NULL))) {
            //Error canonicalizing; possibly pointing out of docroot
            return 0;
        }
        char *pzPath = (char *)MALLOC(MAX_PATH + 1); /* reserved byte for trailing slash */
        char *pzFilename = NULL;

        /* If required length of the buffer(pzPath) is more than the allocated one i.e. MAX_PATH(neglecting the reserved byte for trailing slash), return BAD REQUEST. This will happen if length of uri is more than the specified uri length(257) for MBCS windows */
        if(!(ret = GetFullPathName(pzAbsPath, MAX_PATH, pzPath, &pzFilename)) || ( ret > MAX_PATH)){
            FREE(pzAbsPath);
            FREE(pzPath);
            return 0;
        }
        len = strlen(pzAbsPath);
        pathlen = strlen( pzPath );

        /*  GetFullPathName behaves differently in case of WINNT and WIN2K */
        /* o/p string doesn't contain the trailing slash in case of WINNT */
        /* if i/p is /foo/, we get o/p as c:\foo instead of c:\foo\ */
        /* Checking if i/p has trailing slash and o/p doesn't have, then */
        /* adding slash */
        if ( pzAbsPath[len-1] == '/' && pzPath[pathlen-1] != '\\')
            strcat( pzPath, "\\");
        FREE(pzAbsPath);
        pzFilename = strchr(pzPath, '\\');
        if(!pzFilename) {
            FREE(pzPath);
            return 0;
        }
        strcpy(s, pzFilename);
        FREE(pzPath);
    }

    util_uri_normalize_slashes(s);

    return 1;
}
#endif /* XP_WIN32 */


/* ---------------------- util_uri_normalize_slashes ---------------------- */

void util_uri_normalize_slashes(char *s)
{
#ifdef XP_WIN32
    int flagDbcsUri = allow_dbcs_uri();

    while (*s) {
        if (*s == '\\') {
            // Normalize '\\' to '/'
            *s = '/';
        } else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) {
            // Skip past two byte DBCS characters.  Bug 353999
            s++;
        }
        s++;
    }
#endif
}


/* --------------------------- util_uri_escape ---------------------------- */
NSAPI_PUBLIC char *util_uri_escape(char *od, const char *s)
{
    int flagDbcsUri = allow_dbcs_uri();
    char *d;

    if (!od)
        od = (char *) MALLOC((strlen(s)*3) + 1);
    d = od;

    while (*s) {
        if (strchr("% ?#:+&*\"'<>\r\n", *s)) {
            util_sprintf(d, "%%%02x", (unsigned char)*s);
            ++s; d += 3;
        }
#ifdef XP_WIN32
        else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0]))
#else
        // Treat any character with the high bit set as a DBCS lead byte
        else if (flagDbcsUri && s[1] && (s[0] & 0x80))
#endif
	{
            // Escape the second byte of DBCS characters.  The first byte will
            // have been escaped already.  IE translates all unescaped '\\'s
            // into '/'.
            // Bug 353999
            util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]);
            s += 2; d += 6;
        }
        else if (0x80 & *s) {
            util_sprintf(d, "%%%02x", (unsigned char)*s);
            ++s; d += 3;
        } else {
            *d++ = *s++;
        }
    }
    *d = '\0';
    return od;
}


/* --------------------------- util_url_escape ---------------------------- */

NSAPI_PUBLIC char *util_url_escape(char *od, const char *s)
{
    int flagDbcsUri = allow_dbcs_uri();
    char *d;

    if (!od)
        od = (char *) MALLOC((strlen(s)*3) + 1);
    d = od;

    while (*s) {
        if (strchr("% +*\"'<>\r\n", *s)) {
            util_sprintf(d, "%%%02x", (unsigned char)*s);
            ++s; d += 3;
        }
#ifdef XP_WIN32
        else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0]))
#else
        // Treat any character with the high bit set as a DBCS lead byte
        else if (flagDbcsUri && s[1] && (s[0] & 0x80))
#endif
	{
            // Escape the second byte of DBCS characters.  The first byte will
            // have been escaped already.  IE translates all unescaped '\\'s
            // into '/'.
            // Bug 353999
            util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]);
            s += 2; d += 6;
        }
        else if (0x80 & *s) {
            util_sprintf(d, "%%%02x", (unsigned char)*s);
            ++s; d += 3;
        } else {
            *d++ = *s++;
        }
    }
    *d = '\0';
    return od;
}


/* ------------------------- util_uri_strip_params ------------------------- */

NSAPI_PUBLIC char* util_uri_strip_params(char *uri)
{
    // As per RFC2396, URI path segments can contain parameters beginning with
    // ';'.  These parameters must be removed from the ppath.  Bug 418271
    char* out;
    if((out = strchr(uri, ';'))) {
        char* in = out;
        while (*in) {
            if (*in == ';') {
                // Skip past parameter
                do in++; while (*in && *in != '/');
            } else {
                // Copy non-parameter path data
                *out++ = *in++;
            }
        }
        *out = 0;
    }
    return uri;
}


/* ------------------------ util_canonicalize_uri ------------------------- */

/*
 * rewrite rules:
 *   //                       ->  '/'
 *   /./                      ->  '/'
 *   /.\0                     ->  '/'
 *   /foo/../                 ->  '/'
 *   /foo/..\0                ->  '/'
 *
 * Allocate a new string, as otherwise replacing in-line would impact the
 * RequestURI, i.e. original URI in the request.
 * Some guidelines in: http://www.ietf.org/rfc/rfc2396.txt
 *      Uniform Resource Identifiers (URI): Generic Syntax
 */
NSAPI_PUBLIC char* util_canonicalize_uri(pool_handle_t *pool, const char *uri, int len, int *pcanonlen)
{
    PRBool success = PR_TRUE;
    const char *in_ptr = uri;
    int in = 0;
    int in_len = len;

    //PR_ASSERT(uri != NULL); // TODO

    char* canonPath = (char *)pool_malloc(pool, in_len+1);
    char* out_ptr = canonPath;

    if (!canonPath) {
        success = PR_FALSE;
        goto done;
    }


    /* in goes from 0 .. sURIPath.len-1; out_ptr points to
     * space where next char from input would be copied to
     */
    while (in < in_len) {

        /* If the character isn't '/' then copy it out and move on*/
        if (in_ptr[0] != '/') {
            *out_ptr++ = *in_ptr++;
            in++;
            continue;
        }

        /* found '/' and reached end of sURIPath, done */
        if (in+1 >= in_len) {
            *out_ptr++ = *in_ptr++;
            in++;
            break;
        }

        /* we have '/' and there are more chars in the string */
        switch(in_ptr[1]) {
        case '/':
            /*  '//' => '/'  */
            in_ptr++;
            in++;
            break;

        case '.':
            /* we have "/." so far */
            if (in+2 >= in_len) {
                /*  the string ends after this; basically ignore '.'
                 *  make sure the ending / is transferred to output.
                 */
                *out_ptr++ = *in_ptr++;
                goto done;
            }

            /* more chars after "/."; see if it is a '/' */
            if (in_ptr[2] == '/') {
                /* in deed, compact "/./" => "/"; */
                in_ptr += 2;
                in += 2;
                break;
            }

            if (in_ptr[2] != '.') {
                /* "/.x" where x is not '.'; copy as is */
                *out_ptr++ = *in_ptr++;
                in++;
                break;
            }

            /* we have "/.." so far. see if we have either string
             * ending after this or '/' following.
             */
            if (in+3 < in_len && in_ptr[3] != '/' && in_ptr[3] != ';') {
                /* we have "/..x" here; so copy as is */
                *out_ptr++ = *in_ptr++;
                in++;
            }
            else {
                /* we have "foo/../" or "foo/.." at the end; */
                if (out_ptr == canonPath) {
                    /* oops, we found "/../" pointing out of docroot */
                    success = PR_FALSE;
                    goto done;
                }

                /* remove the previous segment in the output */
                for (out_ptr--;
                     out_ptr != canonPath && out_ptr[0] != '/';
                     out_ptr--); /* Empty Loop */

                /* point to '/' if the last segment ended with .. then
                 * leave the '/' before the previous segment.
                 */
                if(in+3 == in_len)
                    out_ptr++;

                /* skip the input as well */
                in_ptr += 3;
                in += 3;
            }
            break;

        default:
            /* If we already have '/' at out_ptr we donot need to copy */
            if (out_ptr == canonPath || *(out_ptr-1) != '/')
                *out_ptr++ = *in_ptr;
            in_ptr++; in++;
            break;
        }
    }

done:

    if (success) {
        /* the path looks fine; return the canonicalized form */
        unsigned canonLen = (unsigned) (out_ptr - canonPath);
        canonPath[canonLen] = '\0';
        if (pcanonlen) *pcanonlen = (int) canonLen;
    } else {
        /* error canonicalizing */
        pool_free(pool, canonPath);
        canonPath = NULL;
        if (pcanonlen) *pcanonlen = 0;
    }

    return canonPath;
}


/* ---------------------- util_canonicalize_redirect ---------------------- */

NSAPI_PUBLIC char* util_canonicalize_redirect(pool_handle_t *pool, const char *baseUri, const char *newUri)
{
    //PR_ASSERT(baseUri != NULL); // TODO

    if (*newUri == '/')
        return util_canonicalize_uri(pool, newUri, strlen(newUri), NULL);

    int bLen = strlen(baseUri);
    if (bLen > 0 && baseUri[bLen - 1] != '/') {
        while (bLen > 0 && baseUri[bLen - 1] != '/')
            bLen--;
    }

    int pLen = strlen(newUri) + bLen + 1; // 1 for slash
    char *pUri = (char *)pool_malloc(pool, pLen + 1);
    if (!pUri)
        return PR_FALSE;

    memcpy(pUri, baseUri, bLen);
    pUri[bLen] = '/';
    strcpy(pUri + bLen + 1, newUri);

    char *rval = util_canonicalize_uri(pool, pUri, pLen, NULL);
    pool_free(pool, pUri);

    return rval;
}


/* ------------------------ util_host_port_suffix ------------------------- */

NSAPI_PUBLIC const char *util_host_port_suffix(const char *h)
{
    /* Return a pointer to the colon preceding the port number in a hostname.
     *
     * util_host_port_suffix("foo.com:80") = ":80"
     * util_host_port_suffix("foo.com") = NULL
     * util_host_port_suffix("[::]:80") = ":80"
     * util_host_port_suffix("[::]") = NULL
     */

    if (h == NULL)
        return h;

    for (;;) {
        /* Find end of host, beginning of ":port", or an IPv6 address */
        for (;;) {
            char c = *h;

            if (c == '\0')
                return NULL; /* end of host, no port found */

            if (c == '/')
                return NULL; /* end of host, no port found */

            if (c == ':')
                return h; /* found port */

            if (c == '[')
                break; /* skip IPv6 address */

            h++;
        }

        /* Skip IPv6 address */
        while (*h != '\0' && *h != ']')
            h++;
    }
}

mercurial