1 /* |
|
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
|
3 * |
|
4 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
|
5 * |
|
6 * THE BSD LICENSE |
|
7 * |
|
8 * Redistribution and use in source and binary forms, with or without |
|
9 * modification, are permitted provided that the following conditions are met: |
|
10 * |
|
11 * Redistributions of source code must retain the above copyright notice, this |
|
12 * list of conditions and the following disclaimer. |
|
13 * Redistributions in binary form must reproduce the above copyright notice, |
|
14 * this list of conditions and the following disclaimer in the documentation |
|
15 * and/or other materials provided with the distribution. |
|
16 * |
|
17 * Neither the name of the nor the names of its contributors may be |
|
18 * used to endorse or promote products derived from this software without |
|
19 * specific prior written permission. |
|
20 * |
|
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
|
25 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
|
28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|
30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|
31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
32 */ |
|
33 |
|
34 #ifdef XP_WIN32 |
|
35 #define _MBCS |
|
36 #include <windows.h> |
|
37 #include <mbctype.h> |
|
38 #endif |
|
39 |
|
40 #include "util.h" |
|
41 #include "pool.h" |
|
42 //include "frame/conf_api.h" |
|
43 //include "support/stringvalue.h" |
|
44 |
|
45 #ifdef XP_WIN32 |
|
46 static PRBool _getfullpathname = -1; |
|
47 #endif /* XP_WIN32 */ |
|
48 |
|
49 /* --------------------------- util_uri_is_evil --------------------------- */ |
|
50 |
|
51 static inline int allow_dbcs_uri() |
|
52 { |
|
53 /* |
|
54 static int flagDbcsUri = -1; |
|
55 if (flagDbcsUri == -1) { |
|
56 flagDbcsUri = StringValue::getBoolean(conf_findGlobal("DbcsUri")); |
|
57 } |
|
58 return flagDbcsUri; |
|
59 */ |
|
60 return PR_TRUE; |
|
61 } |
|
62 |
|
63 #ifdef XP_WIN32 |
|
64 void set_fullpathname(PRBool b) |
|
65 { |
|
66 _getfullpathname = b; |
|
67 } |
|
68 #endif /*XP_WIN32*/ |
|
69 |
|
70 NSAPI_PUBLIC int util_uri_is_evil_internal(const char *t, int allow_tilde, int allow_dot_dir) |
|
71 { |
|
72 #ifdef XP_WIN32 |
|
73 int flagDbcsUri = allow_dbcs_uri(); |
|
74 #endif // XP_WIN32 |
|
75 PRBool flagEmptySegment = PR_FALSE; |
|
76 register int x; |
|
77 |
|
78 for (x = 0; t[x]; ++x) { |
|
79 if (t[x] == '/') { |
|
80 if (flagEmptySegment) |
|
81 return 1; // "/;a/b" |
|
82 #ifdef XP_WIN32 |
|
83 if (t[x+1] == '/' && x != 0) |
|
84 #else |
|
85 if (t[x+1] == '/') |
|
86 #endif |
|
87 return 1; |
|
88 if (t[x+1] == ';') |
|
89 flagEmptySegment = PR_TRUE; // "/;a/b" is evil, "/a/;b" is not |
|
90 if (t[x+1] == '.') { |
|
91 /* "." at end of line is always prohibited */ |
|
92 if (t[x+2] == '\0') |
|
93 return 1; |
|
94 |
|
95 /* "." as a path segment is prohibited conditionally */ |
|
96 if (!allow_dot_dir && (t[x+2] == '/' || t[x+2] == ';')) |
|
97 return 1; |
|
98 |
|
99 /* ".." as a path segment is always prohibited */ |
|
100 if (t[x+2] == '.' && (t[x+3] == '/' || t[x+3] == ';' || t[x+3] == '\0')) |
|
101 return 1; |
|
102 } |
|
103 } |
|
104 #ifdef XP_WIN32 |
|
105 // Don't allow '~' in the filename. On some filesystems a long name |
|
106 // (e.g. longfilename.htm) can be accessed using '~' bypassing any ACL |
|
107 // checks (e.g. longfi~1.htm). |
|
108 if (!allow_tilde && (t[x] == '~')) { |
|
109 return 1; |
|
110 } |
|
111 |
|
112 // Do not allow ':' apart from drive letter. Windows filestream |
|
113 // will treat filename::$DATA as a plain file & display content. |
|
114 // So block it to prevent source viewing vulnerability. |
|
115 if ((t[x] == ':') && x > 1) { |
|
116 return 1; |
|
117 } |
|
118 |
|
119 // On NT, the directory "abc...." is the same as "abc" |
|
120 // The only cheap way to catch this globally is to disallow |
|
121 // names with the trailing "."s. Hopefully this is not over |
|
122 // restrictive. |
|
123 // Also trailing spaces in names can wreak havoc on ACL checks |
|
124 // and name resolution. Therefore, ban them on the end of a |
|
125 // name. |
|
126 if (((t[x] == '.') || (t[x] == ' ')) && |
|
127 ((t[x+1] == ';') || (t[x+1] == '/') || (t[x+1] == '\0'))) |
|
128 { |
|
129 return 1; |
|
130 } |
|
131 |
|
132 // Skip past the second byte of two byte DBCS characters. Bug 353999 |
|
133 if (flagDbcsUri && t[x+1] && IsDBCSLeadByte(t[x])) x++; |
|
134 #endif // XP_WIN32 |
|
135 } |
|
136 return 0; |
|
137 } |
|
138 |
|
139 NSAPI_PUBLIC int util_uri_is_evil(const char *t) |
|
140 { |
|
141 return util_uri_is_evil_internal(t, 0, 0); |
|
142 } |
|
143 |
|
144 |
|
145 /* -------------------- util_uri_unescape_and_normalize -------------------- */ |
|
146 |
|
147 #ifdef XP_WIN32 |
|
148 /* The server calls this function to unescape the URI and also normalize |
|
149 * the uri. Normalizing the uri converts all "\" characters in the URI |
|
150 * and pathinfo portion to "/". Does not touch "\" in query strings. |
|
151 */ |
|
152 NSAPI_PUBLIC |
|
153 int util_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized) |
|
154 { |
|
155 if(!(util_uri_unescape_strict(s))) |
|
156 return 0; |
|
157 |
|
158 if (unnormalized) strcpy(unnormalized, s); |
|
159 |
|
160 if (_getfullpathname == -1) |
|
161 _getfullpathname = (_getmbcp() != 0); |
|
162 |
|
163 /* Get canonical filename Bugid: 4672869 */ |
|
164 if(_getfullpathname && strcmp(s, "*") && (*s == '/' ) ) { |
|
165 char *pzAbsPath = NULL; |
|
166 int pathlen = 0; |
|
167 int len = 0; |
|
168 int ret = 0; |
|
169 if(!(pzAbsPath = util_canonicalize_uri(pool, s, strlen(s), NULL))) { |
|
170 //Error canonicalizing; possibly pointing out of docroot |
|
171 return 0; |
|
172 } |
|
173 char *pzPath = (char *)MALLOC(MAX_PATH + 1); /* reserved byte for trailing slash */ |
|
174 char *pzFilename = NULL; |
|
175 |
|
176 /* If required length of the buffer(pzPath) is more than the allocated one i.e. MAX_PATH(neglecting the reserved byte for trailing slash), return BAD REQUEST. This will happen if length of uri is more than the specified uri length(257) for MBCS windows */ |
|
177 if(!(ret = GetFullPathName(pzAbsPath, MAX_PATH, pzPath, &pzFilename)) || ( ret > MAX_PATH)){ |
|
178 FREE(pzAbsPath); |
|
179 FREE(pzPath); |
|
180 return 0; |
|
181 } |
|
182 len = strlen(pzAbsPath); |
|
183 pathlen = strlen( pzPath ); |
|
184 |
|
185 /* GetFullPathName behaves differently in case of WINNT and WIN2K */ |
|
186 /* o/p string doesn't contain the trailing slash in case of WINNT */ |
|
187 /* if i/p is /foo/, we get o/p as c:\foo instead of c:\foo\ */ |
|
188 /* Checking if i/p has trailing slash and o/p doesn't have, then */ |
|
189 /* adding slash */ |
|
190 if ( pzAbsPath[len-1] == '/' && pzPath[pathlen-1] != '\\') |
|
191 strcat( pzPath, "\\"); |
|
192 FREE(pzAbsPath); |
|
193 pzFilename = strchr(pzPath, '\\'); |
|
194 if(!pzFilename) { |
|
195 FREE(pzPath); |
|
196 return 0; |
|
197 } |
|
198 strcpy(s, pzFilename); |
|
199 FREE(pzPath); |
|
200 } |
|
201 |
|
202 util_uri_normalize_slashes(s); |
|
203 |
|
204 return 1; |
|
205 } |
|
206 #endif /* XP_WIN32 */ |
|
207 |
|
208 |
|
209 /* ---------------------- util_uri_normalize_slashes ---------------------- */ |
|
210 |
|
211 void util_uri_normalize_slashes(char *s) |
|
212 { |
|
213 #ifdef XP_WIN32 |
|
214 int flagDbcsUri = allow_dbcs_uri(); |
|
215 |
|
216 while (*s) { |
|
217 if (*s == '\\') { |
|
218 // Normalize '\\' to '/' |
|
219 *s = '/'; |
|
220 } else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) { |
|
221 // Skip past two byte DBCS characters. Bug 353999 |
|
222 s++; |
|
223 } |
|
224 s++; |
|
225 } |
|
226 #endif |
|
227 } |
|
228 |
|
229 |
|
230 /* --------------------------- util_uri_escape ---------------------------- */ |
|
231 NSAPI_PUBLIC char *util_uri_escape(char *od, const char *s) |
|
232 { |
|
233 int flagDbcsUri = allow_dbcs_uri(); |
|
234 char *d; |
|
235 |
|
236 if (!od) |
|
237 od = (char *) MALLOC((strlen(s)*3) + 1); |
|
238 d = od; |
|
239 |
|
240 while (*s) { |
|
241 if (strchr("% ?#:+&*\"'<>\r\n", *s)) { |
|
242 util_sprintf(d, "%%%02x", (unsigned char)*s); |
|
243 ++s; d += 3; |
|
244 } |
|
245 #ifdef XP_WIN32 |
|
246 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) |
|
247 #else |
|
248 // Treat any character with the high bit set as a DBCS lead byte |
|
249 else if (flagDbcsUri && s[1] && (s[0] & 0x80)) |
|
250 #endif |
|
251 { |
|
252 // Escape the second byte of DBCS characters. The first byte will |
|
253 // have been escaped already. IE translates all unescaped '\\'s |
|
254 // into '/'. |
|
255 // Bug 353999 |
|
256 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); |
|
257 s += 2; d += 6; |
|
258 } |
|
259 else if (0x80 & *s) { |
|
260 util_sprintf(d, "%%%02x", (unsigned char)*s); |
|
261 ++s; d += 3; |
|
262 } else { |
|
263 *d++ = *s++; |
|
264 } |
|
265 } |
|
266 *d = '\0'; |
|
267 return od; |
|
268 } |
|
269 |
|
270 |
|
271 /* --------------------------- util_url_escape ---------------------------- */ |
|
272 |
|
273 NSAPI_PUBLIC char *util_url_escape(char *od, const char *s) |
|
274 { |
|
275 int flagDbcsUri = allow_dbcs_uri(); |
|
276 char *d; |
|
277 |
|
278 if (!od) |
|
279 od = (char *) MALLOC((strlen(s)*3) + 1); |
|
280 d = od; |
|
281 |
|
282 while (*s) { |
|
283 if (strchr("% +*\"'<>\r\n", *s)) { |
|
284 util_sprintf(d, "%%%02x", (unsigned char)*s); |
|
285 ++s; d += 3; |
|
286 } |
|
287 #ifdef XP_WIN32 |
|
288 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) |
|
289 #else |
|
290 // Treat any character with the high bit set as a DBCS lead byte |
|
291 else if (flagDbcsUri && s[1] && (s[0] & 0x80)) |
|
292 #endif |
|
293 { |
|
294 // Escape the second byte of DBCS characters. The first byte will |
|
295 // have been escaped already. IE translates all unescaped '\\'s |
|
296 // into '/'. |
|
297 // Bug 353999 |
|
298 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]); |
|
299 s += 2; d += 6; |
|
300 } |
|
301 else if (0x80 & *s) { |
|
302 util_sprintf(d, "%%%02x", (unsigned char)*s); |
|
303 ++s; d += 3; |
|
304 } else { |
|
305 *d++ = *s++; |
|
306 } |
|
307 } |
|
308 *d = '\0'; |
|
309 return od; |
|
310 } |
|
311 |
|
312 |
|
313 /* ------------------------- util_uri_strip_params ------------------------- */ |
|
314 |
|
315 NSAPI_PUBLIC char* util_uri_strip_params(char *uri) |
|
316 { |
|
317 // As per RFC2396, URI path segments can contain parameters beginning with |
|
318 // ';'. These parameters must be removed from the ppath. Bug 418271 |
|
319 char* out; |
|
320 if((out = strchr(uri, ';'))) { |
|
321 char* in = out; |
|
322 while (*in) { |
|
323 if (*in == ';') { |
|
324 // Skip past parameter |
|
325 do in++; while (*in && *in != '/'); |
|
326 } else { |
|
327 // Copy non-parameter path data |
|
328 *out++ = *in++; |
|
329 } |
|
330 } |
|
331 *out = 0; |
|
332 } |
|
333 return uri; |
|
334 } |
|
335 |
|
336 |
|
337 /* ------------------------ util_canonicalize_uri ------------------------- */ |
|
338 |
|
339 /* |
|
340 * rewrite rules: |
|
341 * // -> '/' |
|
342 * /./ -> '/' |
|
343 * /.\0 -> '/' |
|
344 * /foo/../ -> '/' |
|
345 * /foo/..\0 -> '/' |
|
346 * |
|
347 * Allocate a new string, as otherwise replacing in-line would impact the |
|
348 * RequestURI, i.e. original URI in the request. |
|
349 * Some guidelines in: http://www.ietf.org/rfc/rfc2396.txt |
|
350 * Uniform Resource Identifiers (URI): Generic Syntax |
|
351 */ |
|
352 NSAPI_PUBLIC char* util_canonicalize_uri(pool_handle_t *pool, const char *uri, int len, int *pcanonlen) |
|
353 { |
|
354 PRBool success = PR_TRUE; |
|
355 const char *in_ptr = uri; |
|
356 int in = 0; |
|
357 int in_len = len; |
|
358 |
|
359 //PR_ASSERT(uri != NULL); // TODO |
|
360 |
|
361 char* canonPath = (char *)pool_malloc(pool, in_len+1); |
|
362 char* out_ptr = canonPath; |
|
363 |
|
364 if (!canonPath) { |
|
365 success = PR_FALSE; |
|
366 goto done; |
|
367 } |
|
368 |
|
369 |
|
370 /* in goes from 0 .. sURIPath.len-1; out_ptr points to |
|
371 * space where next char from input would be copied to |
|
372 */ |
|
373 while (in < in_len) { |
|
374 |
|
375 /* If the character isn't '/' then copy it out and move on*/ |
|
376 if (in_ptr[0] != '/') { |
|
377 *out_ptr++ = *in_ptr++; |
|
378 in++; |
|
379 continue; |
|
380 } |
|
381 |
|
382 /* found '/' and reached end of sURIPath, done */ |
|
383 if (in+1 >= in_len) { |
|
384 *out_ptr++ = *in_ptr++; |
|
385 in++; |
|
386 break; |
|
387 } |
|
388 |
|
389 /* we have '/' and there are more chars in the string */ |
|
390 switch(in_ptr[1]) { |
|
391 case '/': |
|
392 /* '//' => '/' */ |
|
393 in_ptr++; |
|
394 in++; |
|
395 break; |
|
396 |
|
397 case '.': |
|
398 /* we have "/." so far */ |
|
399 if (in+2 >= in_len) { |
|
400 /* the string ends after this; basically ignore '.' |
|
401 * make sure the ending / is transferred to output. |
|
402 */ |
|
403 *out_ptr++ = *in_ptr++; |
|
404 goto done; |
|
405 } |
|
406 |
|
407 /* more chars after "/."; see if it is a '/' */ |
|
408 if (in_ptr[2] == '/') { |
|
409 /* in deed, compact "/./" => "/"; */ |
|
410 in_ptr += 2; |
|
411 in += 2; |
|
412 break; |
|
413 } |
|
414 |
|
415 if (in_ptr[2] != '.') { |
|
416 /* "/.x" where x is not '.'; copy as is */ |
|
417 *out_ptr++ = *in_ptr++; |
|
418 in++; |
|
419 break; |
|
420 } |
|
421 |
|
422 /* we have "/.." so far. see if we have either string |
|
423 * ending after this or '/' following. |
|
424 */ |
|
425 if (in+3 < in_len && in_ptr[3] != '/' && in_ptr[3] != ';') { |
|
426 /* we have "/..x" here; so copy as is */ |
|
427 *out_ptr++ = *in_ptr++; |
|
428 in++; |
|
429 } |
|
430 else { |
|
431 /* we have "foo/../" or "foo/.." at the end; */ |
|
432 if (out_ptr == canonPath) { |
|
433 /* oops, we found "/../" pointing out of docroot */ |
|
434 success = PR_FALSE; |
|
435 goto done; |
|
436 } |
|
437 |
|
438 /* remove the previous segment in the output */ |
|
439 for (out_ptr--; |
|
440 out_ptr != canonPath && out_ptr[0] != '/'; |
|
441 out_ptr--); /* Empty Loop */ |
|
442 |
|
443 /* point to '/' if the last segment ended with .. then |
|
444 * leave the '/' before the previous segment. |
|
445 */ |
|
446 if(in+3 == in_len) |
|
447 out_ptr++; |
|
448 |
|
449 /* skip the input as well */ |
|
450 in_ptr += 3; |
|
451 in += 3; |
|
452 } |
|
453 break; |
|
454 |
|
455 default: |
|
456 /* If we already have '/' at out_ptr we donot need to copy */ |
|
457 if (out_ptr == canonPath || *(out_ptr-1) != '/') |
|
458 *out_ptr++ = *in_ptr; |
|
459 in_ptr++; in++; |
|
460 break; |
|
461 } |
|
462 } |
|
463 |
|
464 done: |
|
465 int canonLen = 0; |
|
466 |
|
467 if (success) { |
|
468 /* the path looks fine; return the canonicalized form */ |
|
469 canonLen = out_ptr - canonPath; |
|
470 canonPath[canonLen] = '\0'; |
|
471 } else { |
|
472 /* error canonicalizing */ |
|
473 pool_free(pool, canonPath); |
|
474 canonPath = NULL; |
|
475 } |
|
476 |
|
477 if (pcanonlen) |
|
478 *pcanonlen = canonLen; |
|
479 |
|
480 return canonPath; |
|
481 } |
|
482 |
|
483 |
|
484 /* ---------------------- util_canonicalize_redirect ---------------------- */ |
|
485 |
|
486 NSAPI_PUBLIC char* util_canonicalize_redirect(pool_handle_t *pool, const char *baseUri, const char *newUri) |
|
487 { |
|
488 //PR_ASSERT(baseUri != NULL); // TODO |
|
489 |
|
490 if (*newUri == '/') |
|
491 return util_canonicalize_uri(pool, newUri, strlen(newUri), NULL); |
|
492 |
|
493 int bLen = strlen(baseUri); |
|
494 if (bLen > 0 && baseUri[bLen - 1] != '/') { |
|
495 while (bLen > 0 && baseUri[bLen - 1] != '/') |
|
496 bLen--; |
|
497 } |
|
498 |
|
499 int pLen = strlen(newUri) + bLen + 1; // 1 for slash |
|
500 char *pUri = (char *)pool_malloc(pool, pLen + 1); |
|
501 if (!pUri) |
|
502 return PR_FALSE; |
|
503 |
|
504 memcpy(pUri, baseUri, bLen); |
|
505 pUri[bLen] = '/'; |
|
506 strcpy(pUri + bLen + 1, newUri); |
|
507 |
|
508 char *rval = util_canonicalize_uri(pool, pUri, pLen, NULL); |
|
509 pool_free(pool, pUri); |
|
510 |
|
511 return rval; |
|
512 } |
|
513 |
|
514 |
|
515 /* ------------------------ util_host_port_suffix ------------------------- */ |
|
516 |
|
517 NSAPI_PUBLIC char *util_host_port_suffix(char *h) |
|
518 { |
|
519 return (char *)util_host_port_suffix((const char *)h); |
|
520 } |
|
521 |
|
522 const char *util_host_port_suffix(const char *h) |
|
523 { |
|
524 /* Return a pointer to the colon preceding the port number in a hostname. |
|
525 * |
|
526 * util_host_port_suffix("foo.com:80") = ":80" |
|
527 * util_host_port_suffix("foo.com") = NULL |
|
528 * util_host_port_suffix("[::]:80") = ":80" |
|
529 * util_host_port_suffix("[::]") = NULL |
|
530 */ |
|
531 |
|
532 if (h == NULL) |
|
533 return h; |
|
534 |
|
535 for (;;) { |
|
536 /* Find end of host, beginning of ":port", or an IPv6 address */ |
|
537 for (;;) { |
|
538 register char c = *h; |
|
539 |
|
540 if (c == '\0') |
|
541 return NULL; /* end of host, no port found */ |
|
542 |
|
543 if (c == '/') |
|
544 return NULL; /* end of host, no port found */ |
|
545 |
|
546 if (c == ':') |
|
547 return h; /* found port */ |
|
548 |
|
549 if (c == '[') |
|
550 break; /* skip IPv6 address */ |
|
551 |
|
552 h++; |
|
553 } |
|
554 |
|
555 /* Skip IPv6 address */ |
|
556 while (*h != '\0' && *h != ']') |
|
557 h++; |
|
558 } |
|
559 } |
|