uri.c - UNIXwork Code

1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 * 4 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 5 * 6 * THE BSD LICENSE 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * Neither the name of the nor the names of its contributors may be 18 * used to endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifdef XP_WIN32 35 #define _MBCS 36 #include <windows.h> 37 #include <mbctype.h> 38 #endif 39 40 #include "util.h" 41 #include "pool.h" 42 //include "frame/conf_api.h" 43 //include "support/stringvalue.h" 44 45 #ifdef XP_WIN32 46 static PRBool _getfullpathname = -1; 47 #endif /* XP_WIN32 */ 48 49 /* --------------------------- util_uri_is_evil --------------------------- */ 50 51 static inline int allow_dbcs_uri() 52 { 53 /* 54 static int flagDbcsUri = -1; 55 if (flagDbcsUri == -1) { 56 flagDbcsUri = StringValue::getBoolean(conf_findGlobal("DbcsUri")); 57 } 58 return flagDbcsUri; 59 */ 60 return PR_TRUE; 61 } 62 63 #ifdef XP_WIN32 64 void set_fullpathname(PRBool b) 65 { 66 _getfullpathname = b; 67 } 68 #endif /*XP_WIN32*/ 69 70 NSAPI_PUBLIC int util_uri_is_evil_internal(const char *t, int allow_tilde, int allow_dot_dir) 71 { 72 #ifdef XP_WIN32 73 int flagDbcsUri = allow_dbcs_uri(); 74 #endif // XP_WIN32 75 PRBool flagEmptySegment = PR_FALSE; 76 int x; 77 78 for (x = 0; t[x]; ++x) { 79 if (t[x] == '/') { 80 if (flagEmptySegment) 81 return 1; // "/;a/b" 82 #ifdef XP_WIN32 83 if (t[x+1] == '/' && x != 0) 84 #else 85 if (t[x+1] == '/') 86 #endif 87 return 1; 88 if (t[x+1] == ';') 89 flagEmptySegment = PR_TRUE; // "/;a/b" is evil, "/a/;b" is not 90 if (t[x+1] == '.') { 91 /* "." at end of line is always prohibited */ 92 if (t[x+2] == '\0') 93 return 1; 94 95 /* "." as a path segment is prohibited conditionally */ 96 if (!allow_dot_dir && (t[x+2] == '/' || t[x+2] == ';')) 97 return 1; 98 99 /* ".." as a path segment is always prohibited */ 100 if (t[x+2] == '.' && (t[x+3] == '/' || t[x+3] == ';' || t[x+3] == '\0')) 101 return 1; 102 } 103 } 104 #ifdef XP_WIN32 105 // Don't allow '~' in the filename. On some filesystems a long name 106 // (e.g. longfilename.htm) can be accessed using '~' bypassing any ACL 107 // checks (e.g. longfi~1.htm). 108 if (!allow_tilde && (t[x] == '~')) { 109 return 1; 110 } 111 112 // Do not allow ':' apart from drive letter. Windows filestream 113 // will treat filename::$DATA as a plain file & display content. 114 // So block it to prevent source viewing vulnerability. 115 if ((t[x] == ':') && x > 1) { 116 return 1; 117 } 118 119 // On NT, the directory "abc...." is the same as "abc" 120 // The only cheap way to catch this globally is to disallow 121 // names with the trailing "."s. Hopefully this is not over 122 // restrictive. 123 // Also trailing spaces in names can wreak havoc on ACL checks 124 // and name resolution. Therefore, ban them on the end of a 125 // name. 126 if (((t[x] == '.') || (t[x] == ' ')) && 127 ((t[x+1] == ';') || (t[x+1] == '/') || (t[x+1] == '\0'))) 128 { 129 return 1; 130 } 131 132 // Skip past the second byte of two byte DBCS characters. Bug 353999 133 if (flagDbcsUri && t[x+1] && IsDBCSLeadByte(t[x])) x++; 134 #endif // XP_WIN32 135 } 136 return 0; 137 } 138 139 NSAPI_PUBLIC int util_uri_is_evil(const char *t) 140 { 141 return util_uri_is_evil_internal(t, 0, 0); 142 } 143 144 145 /* -------------------- util_uri_unescape_and_normalize -------------------- */ 146 147 #ifdef XP_WIN32 148 /* The server calls this function to unescape the URI and also normalize 149 * the uri. Normalizing the uri converts all "\" characters in the URI 150 * and pathinfo portion to "/". Does not touch "\" in query strings. 151 */ 152 NSAPI_PUBLIC 153 int util_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized) 154 { 155 if(!(util_uri_unescape_strict(s))) 156 return 0; 157 158 if (unnormalized) strcpy(unnormalized, s); 159 160 if (_getfullpathname == -1) 161 _getfullpathname = (_getmbcp() != 0); 162 163 /* Get canonical filename Bugid: 4672869 */ 164 if(_getfullpathname && strcmp(s, "*") && (*s == '/' ) ) { 165 char *pzAbsPath = NULL; 166 int pathlen = 0; 167 int len = 0; 168 int ret = 0; 169 if(!(pzAbsPath = util_canonicalize_uri(pool, s, strlen(s), NULL))) { 170 //Error canonicalizing; possibly pointing out of docroot 171 return 0; 172 } 173 char *pzPath = (char *)MALLOC(MAX_PATH + 1); /* reserved byte for trailing slash */ 174 char *pzFilename = NULL; 175 176 /* If required length of the buffer(pzPath) is more than the allocated one i.e. MAX_PATH(neglecting the reserved byte for trailing slash), return BAD REQUEST. This will happen if length of uri is more than the specified uri length(257) for MBCS windows */ 177 if(!(ret = GetFullPathName(pzAbsPath, MAX_PATH, pzPath, &pzFilename)) || ( ret > MAX_PATH)){ 178 FREE(pzAbsPath); 179 FREE(pzPath); 180 return 0; 181 } 182 len = strlen(pzAbsPath); 183 pathlen = strlen( pzPath ); 184 185 /* GetFullPathName behaves differently in case of WINNT and WIN2K */ 186 /* o/p string doesn't contain the trailing slash in case of WINNT */ 187 /* if i/p is /foo/, we get o/p as c:\foo instead of c:\foo\ */ 188 /* Checking if i/p has trailing slash and o/p doesn't have, then */ 189 /* adding slash */ 190 if ( pzAbsPath[len-1] == '/' && pzPath[pathlen-1] != '\\') 191 strcat( pzPath, "\\"); 192 FREE(pzAbsPath); 193 pzFilename = strchr(pzPath, '\\'); 194 if(!pzFilename) { 195 FREE(pzPath); 196 return 0; 197 } 198 strcpy(s, pzFilename); 199 FREE(pzPath); 200 } 201 202 util_uri_normalize_slashes(s); 203 204 return 1; 205 } 206 #endif /* XP_WIN32 */ 207 208 209 /* ---------------------- util_uri_normalize_slashes ---------------------- */ 210 211 void util_uri_normalize_slashes(char *s) 212 { 213 #ifdef XP_WIN32 214 int flagDbcsUri = allow_dbcs_uri(); 215 216 while (*s) { 217 if (*s == '\\') { 218 // Normalize '\\' to '/' 219 *s = '/'; 220 } else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) { 221 // Skip past two byte DBCS characters. Bug 353999 222 s++; 223 } 224 s++; 225 } 226 #endif 227 } 228 229 230 /* --------------------------- util_uri_escape ---------------------------- */ 231 NSAPI_PUBLIC char *util_uri_escape(char *od, const char *s) 232 { 233 int flagDbcsUri = allow_dbcs_uri(); 234 char *d; 235 236 if (!od) 237 od = (char *) MALLOC((strlen(s)*3) + 1); 238 d = od; 239 240 while (*s) { 241 if (strchr("% ?#:+&*\"''<>\r\n", *s)) { 242 util_sprintf(d, "%%%02x", (unsigned char)*s); 243 ++s; d += 3; 244 } 245 #ifdef XP_WIN32 246 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) 247 #else 248 // Treat any character with the high bit set as a DBCS lead byte 249 else if (flagDbcsUri && s[1] && (s[0] & 0x80)) 250 #endif 251 { 252 // Escape the second byte of DBCS characters. The first byte will 253 // have been escaped already. IE translates all unescaped '\\'s 254 // into '/'. 255 // Bug 353999 256 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); 257 s += 2; d += 6; 258 } 259 else if (0x80 & *s) { 260 util_sprintf(d, "%%%02x", (unsigned char)*s); 261 ++s; d += 3; 262 } else { 263 *d++ = *s++; 264 } 265 } 266 *d = '\0'; 267 return od; 268 } 269 270 271 /* --------------------------- util_url_escape ---------------------------- */ 272 273 NSAPI_PUBLIC char *util_url_escape(char *od, const char *s) 274 { 275 int flagDbcsUri = allow_dbcs_uri(); 276 char *d; 277 278 if (!od) 279 od = (char *) MALLOC((strlen(s)*3) + 1); 280 d = od; 281 282 while (*s) { 283 if (strchr("% +*\"''<>\r\n", *s)) { 284 util_sprintf(d, "%%%02x", (unsigned char)*s); 285 ++s; d += 3; 286 } 287 #ifdef XP_WIN32 288 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) 289 #else 290 // Treat any character with the high bit set as a DBCS lead byte 291 else if (flagDbcsUri && s[1] && (s[0] & 0x80)) 292 #endif 293 { 294 // Escape the second byte of DBCS characters. The first byte will 295 // have been escaped already. IE translates all unescaped '\\'s 296 // into '/'. 297 // Bug 353999 298 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); 299 s += 2; d += 6; 300 } 301 else if (0x80 & *s) { 302 util_sprintf(d, "%%%02x", (unsigned char)*s); 303 ++s; d += 3; 304 } else { 305 *d++ = *s++; 306 } 307 } 308 *d = '\0'; 309 return od; 310 } 311 312 313 /* ------------------------- util_uri_strip_params ------------------------- */ 314 315 NSAPI_PUBLIC char* util_uri_strip_params(char *uri) 316 { 317 // As per RFC2396, URI path segments can contain parameters beginning with 318 // ';'. These parameters must be removed from the ppath. Bug 418271 319 char* out; 320 if((out = strchr(uri, ';'))) { 321 char* in = out; 322 while (*in) { 323 if (*in == ';') { 324 // Skip past parameter 325 do in++; while (*in && *in != '/'); 326 } else { 327 // Copy non-parameter path data 328 *out++ = *in++; 329 } 330 } 331 *out = 0; 332 } 333 return uri; 334 } 335 336 337 /* ------------------------ util_canonicalize_uri ------------------------- */ 338 339 /* 340 * rewrite rules: 341 * // -> '/' 342 * /./ -> '/' 343 * /.\0 -> '/' 344 * /foo/../ -> '/' 345 * /foo/..\0 -> '/' 346 * 347 * Allocate a new string, as otherwise replacing in-line would impact the 348 * RequestURI, i.e. original URI in the request. 349 * Some guidelines in: http://www.ietf.org/rfc/rfc2396.txt 350 * Uniform Resource Identifiers (URI): Generic Syntax 351 */ 352 NSAPI_PUBLIC char* util_canonicalize_uri(pool_handle_t *pool, const char *uri, int len, int *pcanonlen) 353 { 354 PRBool success = PR_TRUE; 355 const char *in_ptr = uri; 356 int in = 0; 357 int in_len = len; 358 359 //PR_ASSERT(uri != NULL); // TODO 360 361 char* canonPath = (char *)pool_malloc(pool, in_len+1); 362 char* out_ptr = canonPath; 363 364 if (!canonPath) { 365 success = PR_FALSE; 366 goto done; 367 } 368 369 370 /* in goes from 0 .. sURIPath.len-1; out_ptr points to 371 * space where next char from input would be copied to 372 */ 373 while (in < in_len) { 374 375 /* If the character isn't '/' then copy it out and move on*/ 376 if (in_ptr[0] != '/') { 377 *out_ptr++ = *in_ptr++; 378 in++; 379 continue; 380 } 381 382 /* found '/' and reached end of sURIPath, done */ 383 if (in+1 >= in_len) { 384 *out_ptr++ = *in_ptr++; 385 in++; 386 break; 387 } 388 389 /* we have '/' and there are more chars in the string */ 390 switch(in_ptr[1]) { 391 case '/': 392 /* '//' => '/' */ 393 in_ptr++; 394 in++; 395 break; 396 397 case '.': 398 /* we have "/." so far */ 399 if (in+2 >= in_len) { 400 /* the string ends after this; basically ignore '.' 401 * make sure the ending / is transferred to output. 402 */ 403 *out_ptr++ = *in_ptr++; 404 goto done; 405 } 406 407 /* more chars after "/."; see if it is a '/' */ 408 if (in_ptr[2] == '/') { 409 /* in deed, compact "/./" => "/"; */ 410 in_ptr += 2; 411 in += 2; 412 break; 413 } 414 415 if (in_ptr[2] != '.') { 416 /* "/.x" where x is not '.'; copy as is */ 417 *out_ptr++ = *in_ptr++; 418 in++; 419 break; 420 } 421 422 /* we have "/.." so far. see if we have either string 423 * ending after this or '/' following. 424 */ 425 if (in+3 < in_len && in_ptr[3] != '/' && in_ptr[3] != ';') { 426 /* we have "/..x" here; so copy as is */ 427 *out_ptr++ = *in_ptr++; 428 in++; 429 } 430 else { 431 /* we have "foo/../" or "foo/.." at the end; */ 432 if (out_ptr == canonPath) { 433 /* oops, we found "/../" pointing out of docroot */ 434 success = PR_FALSE; 435 goto done; 436 } 437 438 /* remove the previous segment in the output */ 439 for (out_ptr--; 440 out_ptr != canonPath && out_ptr[0] != '/'; 441 out_ptr--); /* Empty Loop */ 442 443 /* point to '/' if the last segment ended with .. then 444 * leave the '/' before the previous segment. 445 */ 446 if(in+3 == in_len) 447 out_ptr++; 448 449 /* skip the input as well */ 450 in_ptr += 3; 451 in += 3; 452 } 453 break; 454 455 default: 456 /* If we already have '/' at out_ptr we donot need to copy */ 457 if (out_ptr == canonPath || *(out_ptr-1) != '/') 458 *out_ptr++ = *in_ptr; 459 in_ptr++; in++; 460 break; 461 } 462 } 463 464 done: 465 466 if (success) { 467 /* the path looks fine; return the canonicalized form */ 468 unsigned canonLen = (unsigned) (out_ptr - canonPath); 469 canonPath[canonLen] = '\0'; 470 if (pcanonlen) *pcanonlen = (int) canonLen; 471 } else { 472 /* error canonicalizing */ 473 pool_free(pool, canonPath); 474 canonPath = NULL; 475 if (pcanonlen) *pcanonlen = 0; 476 } 477 478 return canonPath; 479 } 480 481 482 /* ---------------------- util_canonicalize_redirect ---------------------- */ 483 484 NSAPI_PUBLIC char* util_canonicalize_redirect(pool_handle_t *pool, const char *baseUri, const char *newUri) 485 { 486 //PR_ASSERT(baseUri != NULL); // TODO 487 488 if (*newUri == '/') 489 return util_canonicalize_uri(pool, newUri, strlen(newUri), NULL); 490 491 int bLen = strlen(baseUri); 492 if (bLen > 0 && baseUri[bLen - 1] != '/') { 493 while (bLen > 0 && baseUri[bLen - 1] != '/') 494 bLen--; 495 } 496 497 int pLen = strlen(newUri) + bLen + 1; // 1 for slash 498 char *pUri = (char *)pool_malloc(pool, pLen + 1); 499 if (!pUri) 500 return PR_FALSE; 501 502 memcpy(pUri, baseUri, bLen); 503 pUri[bLen] = '/'; 504 strcpy(pUri + bLen + 1, newUri); 505 506 char *rval = util_canonicalize_uri(pool, pUri, pLen, NULL); 507 pool_free(pool, pUri); 508 509 return rval; 510 } 511 512 513 /* ------------------------ util_host_port_suffix ------------------------- */ 514 515 NSAPI_PUBLIC const char *util_host_port_suffix(const char *h) 516 { 517 /* Return a pointer to the colon preceding the port number in a hostname. 518 * 519 * util_host_port_suffix("foo.com:80") = ":80" 520 * util_host_port_suffix("foo.com") = NULL 521 * util_host_port_suffix("[::]:80") = ":80" 522 * util_host_port_suffix("[::]") = NULL 523 */ 524 525 if (h == NULL) 526 return h; 527 528 for (;;) { 529 /* Find end of host, beginning of ":port", or an IPv6 address */ 530 for (;;) { 531 char c = *h; 532 533 if (c == '\0') 534 return NULL; /* end of host, no port found */ 535 536 if (c == '/') 537 return NULL; /* end of host, no port found */ 538 539 if (c == ':') 540 return h; /* found port */ 541 542 if (c == '[') 543 break; /* skip IPv6 address */ 544 545 h++; 546 } 547 548 /* Skip IPv6 address */ 549 while (*h != '\0' && *h != ']') 550 h++; 551 } 552 } 553

UNIXworkcode

UNIXwork`code`