uri.cpp - UNIXwork Code

1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 * 4 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 5 * 6 * THE BSD LICENSE 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * Neither the name of the nor the names of its contributors may be 18 * used to endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifdef XP_WIN32 35 #define _MBCS 36 #include <windows.h> 37 #include <mbctype.h> 38 #endif 39 40 #include "util.h" 41 #include "pool.h" 42 //include "frame/conf_api.h" 43 //include "support/stringvalue.h" 44 45 #ifdef XP_WIN32 46 static PRBool _getfullpathname = -1; 47 #endif /* XP_WIN32 */ 48 49 /* --------------------------- util_uri_is_evil --------------------------- */ 50 51 static inline int allow_dbcs_uri() 52 { 53 /* 54 static int flagDbcsUri = -1; 55 if (flagDbcsUri == -1) { 56 flagDbcsUri = StringValue::getBoolean(conf_findGlobal("DbcsUri")); 57 } 58 return flagDbcsUri; 59 */ 60 return PR_FALSE; 61 } 62 63 #ifdef XP_WIN32 64 void set_fullpathname(PRBool b) 65 { 66 _getfullpathname = b; 67 } 68 #endif /*XP_WIN32*/ 69 70 NSAPI_PUBLIC int util_uri_is_evil_internal(const char *t, int allow_tilde, int allow_dot_dir) 71 { 72 #ifdef XP_WIN32 73 int flagDbcsUri = allow_dbcs_uri(); 74 #endif // XP_WIN32 75 PRBool flagEmptySegment = PR_FALSE; 76 register int x; 77 78 for (x = 0; t[x]; ++x) { 79 if (t[x] == '/') { 80 if (flagEmptySegment) 81 return 1; // "/;a/b" 82 #ifdef XP_WIN32 83 if (t[x+1] == '/' && x != 0) 84 #else 85 if (t[x+1] == '/') 86 #endif 87 return 1; 88 if (t[x+1] == ';') 89 flagEmptySegment = PR_TRUE; // "/;a/b" is evil, "/a/;b" is not 90 if (t[x+1] == '.') { 91 /* "." at end of line is always prohibited */ 92 if (t[x+2] == '\0') 93 return 1; 94 95 /* "." as a path segment is prohibited conditionally */ 96 if (!allow_dot_dir && (t[x+2] == '/' || t[x+2] == ';')) 97 return 1; 98 99 /* ".." as a path segment is always prohibited */ 100 if (t[x+2] == '.' && (t[x+3] == '/' || t[x+3] == ';' || t[x+3] == '\0')) 101 return 1; 102 } 103 } 104 #ifdef XP_WIN32 105 // Don't allow '~' in the filename. On some filesystems a long name 106 // (e.g. longfilename.htm) can be accessed using '~' bypassing any ACL 107 // checks (e.g. longfi~1.htm). 108 if (!allow_tilde && (t[x] == '~')) { 109 return 1; 110 } 111 112 // Do not allow ':' apart from drive letter. Windows filestream 113 // will treat filename::$DATA as a plain file & display content. 114 // So block it to prevent source viewing vulnerability. 115 if ((t[x] == ':') && x > 1) { 116 return 1; 117 } 118 119 // On NT, the directory "abc...." is the same as "abc" 120 // The only cheap way to catch this globally is to disallow 121 // names with the trailing "."s. Hopefully this is not over 122 // restrictive. 123 // Also trailing spaces in names can wreak havoc on ACL checks 124 // and name resolution. Therefore, ban them on the end of a 125 // name. 126 if (((t[x] == '.') || (t[x] == ' ')) && 127 ((t[x+1] == ';') || (t[x+1] == '/') || (t[x+1] == '\0'))) 128 { 129 return 1; 130 } 131 132 // Skip past the second byte of two byte DBCS characters. Bug 353999 133 if (flagDbcsUri && t[x+1] && IsDBCSLeadByte(t[x])) x++; 134 #endif // XP_WIN32 135 } 136 return 0; 137 } 138 139 NSAPI_PUBLIC int util_uri_is_evil(const char *t) 140 { 141 return util_uri_is_evil_internal(t, 0, 0); 142 } 143 144 145 /* -------------------- util_uri_unescape_and_normalize -------------------- */ 146 147 #ifdef XP_WIN32 148 /* The server calls this function to unescape the URI and also normalize 149 * the uri. Normalizing the uri converts all "\" characters in the URI 150 * and pathinfo portion to "/". Does not touch "\" in query strings. 151 */ 152 NSAPI_PUBLIC 153 int util_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized) 154 { 155 if(!(util_uri_unescape_strict(s))) 156 return 0; 157 158 if (unnormalized) strcpy(unnormalized, s); 159 160 if (_getfullpathname == -1) 161 _getfullpathname = (_getmbcp() != 0); 162 163 /* Get canonical filename Bugid: 4672869 */ 164 if(_getfullpathname && strcmp(s, "*") && (*s == '/' ) ) { 165 char *pzAbsPath = NULL; 166 int pathlen = 0; 167 int len = 0; 168 int ret = 0; 169 if(!(pzAbsPath = util_canonicalize_uri(pool, s, strlen(s), NULL))) { 170 //Error canonicalizing; possibly pointing out of docroot 171 return 0; 172 } 173 char *pzPath = (char *)MALLOC(MAX_PATH + 1); /* reserved byte for trailing slash */ 174 char *pzFilename = NULL; 175 176 /* If required length of the buffer(pzPath) is more than the allocated one i.e. MAX_PATH(neglecting the reserved byte for trailing slash), return BAD REQUEST. This will happen if length of uri is more than the specified uri length(257) for MBCS windows */ 177 if(!(ret = GetFullPathName(pzAbsPath, MAX_PATH, pzPath, &pzFilename)) || ( ret > MAX_PATH)){ 178 FREE(pzAbsPath); 179 FREE(pzPath); 180 return 0; 181 } 182 len = strlen(pzAbsPath); 183 pathlen = strlen( pzPath ); 184 185 /* GetFullPathName behaves differently in case of WINNT and WIN2K */ 186 /* o/p string doesn't contain the trailing slash in case of WINNT */ 187 /* if i/p is /foo/, we get o/p as c:\foo instead of c:\foo\ */ 188 /* Checking if i/p has trailing slash and o/p doesn't have, then */ 189 /* adding slash */ 190 if ( pzAbsPath[len-1] == '/' && pzPath[pathlen-1] != '\\') 191 strcat( pzPath, "\\"); 192 FREE(pzAbsPath); 193 pzFilename = strchr(pzPath, '\\'); 194 if(!pzFilename) { 195 FREE(pzPath); 196 return 0; 197 } 198 strcpy(s, pzFilename); 199 FREE(pzPath); 200 } 201 202 util_uri_normalize_slashes(s); 203 204 return 1; 205 } 206 #endif /* XP_WIN32 */ 207 208 209 /* ---------------------- util_uri_normalize_slashes ---------------------- */ 210 211 void util_uri_normalize_slashes(char *s) 212 { 213 #ifdef XP_WIN32 214 int flagDbcsUri = allow_dbcs_uri(); 215 216 while (*s) { 217 if (*s == '\\') { 218 // Normalize '\\' to '/' 219 *s = '/'; 220 } else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) { 221 // Skip past two byte DBCS characters. Bug 353999 222 s++; 223 } 224 s++; 225 } 226 #endif 227 } 228 229 230 /* --------------------------- util_uri_escape ---------------------------- */ 231 /* 232 NSAPI_PUBLIC char *util_uri_escape(char *od, const char *s) 233 { 234 int flagDbcsUri = allow_dbcs_uri(); 235 char *d; 236 237 if (!od) 238 od = (char *) MALLOC((strlen(s)*3) + 1); 239 d = od; 240 241 while (*s) { 242 if (strchr("% ?#:+&*\"'<>\r\n", *s)) { 243 util_sprintf(d, "%%%02x", (unsigned char)*s); 244 ++s; d += 3; 245 } 246 #ifdef XP_WIN32 247 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) 248 #else 249 // Treat any character with the high bit set as a DBCS lead byte 250 else if (flagDbcsUri && s[1] && (s[0] & 0x80)) 251 #endif 252 { 253 // Escape the second byte of DBCS characters. The first byte will 254 // have been escaped already. IE translates all unescaped '\\'s 255 // into '/'. 256 // Bug 353999 257 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); 258 s += 2; d += 6; 259 } 260 else if (0x80 & *s) { 261 util_sprintf(d, "%%%02x", (unsigned char)*s); 262 ++s; d += 3; 263 } else { 264 *d++ = *s++; 265 } 266 } 267 *d = '\0'; 268 return od; 269 } 270 */ 271 272 273 /* --------------------------- util_url_escape ---------------------------- */ 274 /* 275 NSAPI_PUBLIC char *util_url_escape(char *od, const char *s) 276 { 277 int flagDbcsUri = allow_dbcs_uri(); 278 char *d; 279 280 if (!od) 281 od = (char *) MALLOC((strlen(s)*3) + 1); 282 d = od; 283 284 while (*s) { 285 if (strchr("% +*\"'<>\r\n", *s)) { 286 util_sprintf(d, "%%%02x", (unsigned char)*s); 287 ++s; d += 3; 288 } 289 #ifdef XP_WIN32 290 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) 291 #else 292 // Treat any character with the high bit set as a DBCS lead byte 293 else if (flagDbcsUri && s[1] && (s[0] & 0x80)) 294 #endif 295 { 296 // Escape the second byte of DBCS characters. The first byte will 297 // have been escaped already. IE translates all unescaped '\\'s 298 // into '/'. 299 // Bug 353999 300 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); 301 s += 2; d += 6; 302 } 303 else if (0x80 & *s) { 304 util_sprintf(d, "%%%02x", (unsigned char)*s); 305 ++s; d += 3; 306 } else { 307 *d++ = *s++; 308 } 309 } 310 *d = '\0'; 311 return od; 312 } 313 */ 314 315 /* ------------------------- util_uri_strip_params ------------------------- */ 316 317 NSAPI_PUBLIC char* util_uri_strip_params(char *uri) 318 { 319 // As per RFC2396, URI path segments can contain parameters beginning with 320 // ';'. These parameters must be removed from the ppath. Bug 418271 321 char* out; 322 if((out = strchr(uri, ';'))) { 323 char* in = out; 324 while (*in) { 325 if (*in == ';') { 326 // Skip past parameter 327 do in++; while (*in && *in != '/'); 328 } else { 329 // Copy non-parameter path data 330 *out++ = *in++; 331 } 332 } 333 *out = 0; 334 } 335 return uri; 336 } 337 338 339 /* ------------------------ util_canonicalize_uri ------------------------- */ 340 341 /* 342 * rewrite rules: 343 * // -> '/' 344 * /./ -> '/' 345 * /.\0 -> '/' 346 * /foo/../ -> '/' 347 * /foo/..\0 -> '/' 348 * 349 * Allocate a new string, as otherwise replacing in-line would impact the 350 * RequestURI, i.e. original URI in the request. 351 * Some guidelines in: http://www.ietf.org/rfc/rfc2396.txt 352 * Uniform Resource Identifiers (URI): Generic Syntax 353 */ 354 NSAPI_PUBLIC char* util_canonicalize_uri(pool_handle_t *pool, const char *uri, int len, int *pcanonlen) 355 { 356 PRBool success = PR_TRUE; 357 const char *in_ptr = uri; 358 int in = 0; 359 int in_len = len; 360 361 //PR_ASSERT(uri != NULL); // TODO 362 363 char* canonPath = (char *)pool_malloc(pool, in_len+1); 364 char* out_ptr = canonPath; 365 366 if (!canonPath) { 367 success = PR_FALSE; 368 goto done; 369 } 370 371 372 /* in goes from 0 .. sURIPath.len-1; out_ptr points to 373 * space where next char from input would be copied to 374 */ 375 while (in < in_len) { 376 377 /* If the character isn't '/' then copy it out and move on*/ 378 if (in_ptr[0] != '/') { 379 *out_ptr++ = *in_ptr++; 380 in++; 381 continue; 382 } 383 384 /* found '/' and reached end of sURIPath, done */ 385 if (in+1 >= in_len) { 386 *out_ptr++ = *in_ptr++; 387 in++; 388 break; 389 } 390 391 /* we have '/' and there are more chars in the string */ 392 switch(in_ptr[1]) { 393 case '/': 394 /* '//' => '/' */ 395 in_ptr++; 396 in++; 397 break; 398 399 case '.': 400 /* we have "/." so far */ 401 if (in+2 >= in_len) { 402 /* the string ends after this; basically ignore '.' 403 * make sure the ending / is transferred to output. 404 */ 405 *out_ptr++ = *in_ptr++; 406 goto done; 407 } 408 409 /* more chars after "/."; see if it is a '/' */ 410 if (in_ptr[2] == '/') { 411 /* in deed, compact "/./" => "/"; */ 412 in_ptr += 2; 413 in += 2; 414 break; 415 } 416 417 if (in_ptr[2] != '.') { 418 /* "/.x" where x is not '.'; copy as is */ 419 *out_ptr++ = *in_ptr++; 420 in++; 421 break; 422 } 423 424 /* we have "/.." so far. see if we have either string 425 * ending after this or '/' following. 426 */ 427 if (in+3 < in_len && in_ptr[3] != '/' && in_ptr[3] != ';') { 428 /* we have "/..x" here; so copy as is */ 429 *out_ptr++ = *in_ptr++; 430 in++; 431 } 432 else { 433 /* we have "foo/../" or "foo/.." at the end; */ 434 if (out_ptr == canonPath) { 435 /* oops, we found "/../" pointing out of docroot */ 436 success = PR_FALSE; 437 goto done; 438 } 439 440 /* remove the previous segment in the output */ 441 for (out_ptr--; 442 out_ptr != canonPath && out_ptr[0] != '/'; 443 out_ptr--); /* Empty Loop */ 444 445 /* point to '/' if the last segment ended with .. then 446 * leave the '/' before the previous segment. 447 */ 448 if(in+3 == in_len) 449 out_ptr++; 450 451 /* skip the input as well */ 452 in_ptr += 3; 453 in += 3; 454 } 455 break; 456 457 default: 458 /* If we already have '/' at out_ptr we donot need to copy */ 459 if (out_ptr == canonPath || *(out_ptr-1) != '/') 460 *out_ptr++ = *in_ptr; 461 in_ptr++; in++; 462 break; 463 } 464 } 465 466 done: 467 int canonLen = 0; 468 469 if (success) { 470 /* the path looks fine; return the canonicalized form */ 471 canonLen = out_ptr - canonPath; 472 canonPath[canonLen] = '\0'; 473 } else { 474 /* error canonicalizing */ 475 pool_free(pool, canonPath); 476 canonPath = NULL; 477 } 478 479 if (pcanonlen) 480 *pcanonlen = canonLen; 481 482 return canonPath; 483 } 484 485 486 /* ---------------------- util_canonicalize_redirect ---------------------- */ 487 488 NSAPI_PUBLIC char* util_canonicalize_redirect(pool_handle_t *pool, const char *baseUri, const char *newUri) 489 { 490 //PR_ASSERT(baseUri != NULL); // TODO 491 492 if (*newUri == '/') 493 return util_canonicalize_uri(pool, newUri, strlen(newUri), NULL); 494 495 int bLen = strlen(baseUri); 496 if (bLen > 0 && baseUri[bLen - 1] != '/') { 497 while (bLen > 0 && baseUri[bLen - 1] != '/') 498 bLen--; 499 } 500 501 int pLen = strlen(newUri) + bLen + 1; // 1 for slash 502 char *pUri = (char *)pool_malloc(pool, pLen + 1); 503 if (!pUri) 504 return PR_FALSE; 505 506 memcpy(pUri, baseUri, bLen); 507 pUri[bLen] = '/'; 508 strcpy(pUri + bLen + 1, newUri); 509 510 char *rval = util_canonicalize_uri(pool, pUri, pLen, NULL); 511 pool_free(pool, pUri); 512 513 return rval; 514 } 515 516 517 /* ------------------------ util_host_port_suffix ------------------------- */ 518 519 NSAPI_PUBLIC char *util_host_port_suffix(char *h) 520 { 521 return (char *)util_host_port_suffix((const char *)h); 522 } 523 524 const char *util_host_port_suffix(const char *h) 525 { 526 /* Return a pointer to the colon preceding the port number in a hostname. 527 * 528 * util_host_port_suffix("foo.com:80") = ":80" 529 * util_host_port_suffix("foo.com") = NULL 530 * util_host_port_suffix("[::]:80") = ":80" 531 * util_host_port_suffix("[::]") = NULL 532 */ 533 534 if (h == NULL) 535 return h; 536 537 for (;;) { 538 /* Find end of host, beginning of ":port", or an IPv6 address */ 539 for (;;) { 540 register char c = *h; 541 542 if (c == '\0') 543 return NULL; /* end of host, no port found */ 544 545 if (c == '/') 546 return NULL; /* end of host, no port found */ 547 548 if (c == ':') 549 return h; /* found port */ 550 551 if (c == '[') 552 break; /* skip IPv6 address */ 553 554 h++; 555 } 556 557 /* Skip IPv6 address */ 558 while (*h != '\0' && *h != ']') 559 h++; 560 } 561 } 562

UNIXworkcode

UNIXwork`code`