src/server/util/uri.cpp

changeset 386
b91f8efadb63
parent 385
a1f4cb076d2f
parent 365
2ea1ed291e9f
child 387
f5caf41b4db6
equal deleted inserted replaced
385:a1f4cb076d2f 386:b91f8efadb63
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
5 *
6 * THE BSD LICENSE
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * Neither the name of the nor the names of its contributors may be
18 * used to endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifdef XP_WIN32
35 #define _MBCS
36 #include <windows.h>
37 #include <mbctype.h>
38 #endif
39
40 #include "util.h"
41 #include "pool.h"
42 //include "frame/conf_api.h"
43 //include "support/stringvalue.h"
44
45 #ifdef XP_WIN32
46 static PRBool _getfullpathname = -1;
47 #endif /* XP_WIN32 */
48
49 /* --------------------------- util_uri_is_evil --------------------------- */
50
51 static inline int allow_dbcs_uri()
52 {
53 /*
54 static int flagDbcsUri = -1;
55 if (flagDbcsUri == -1) {
56 flagDbcsUri = StringValue::getBoolean(conf_findGlobal("DbcsUri"));
57 }
58 return flagDbcsUri;
59 */
60 return PR_TRUE;
61 }
62
63 #ifdef XP_WIN32
64 void set_fullpathname(PRBool b)
65 {
66 _getfullpathname = b;
67 }
68 #endif /*XP_WIN32*/
69
70 NSAPI_PUBLIC int util_uri_is_evil_internal(const char *t, int allow_tilde, int allow_dot_dir)
71 {
72 #ifdef XP_WIN32
73 int flagDbcsUri = allow_dbcs_uri();
74 #endif // XP_WIN32
75 PRBool flagEmptySegment = PR_FALSE;
76 register int x;
77
78 for (x = 0; t[x]; ++x) {
79 if (t[x] == '/') {
80 if (flagEmptySegment)
81 return 1; // "/;a/b"
82 #ifdef XP_WIN32
83 if (t[x+1] == '/' && x != 0)
84 #else
85 if (t[x+1] == '/')
86 #endif
87 return 1;
88 if (t[x+1] == ';')
89 flagEmptySegment = PR_TRUE; // "/;a/b" is evil, "/a/;b" is not
90 if (t[x+1] == '.') {
91 /* "." at end of line is always prohibited */
92 if (t[x+2] == '\0')
93 return 1;
94
95 /* "." as a path segment is prohibited conditionally */
96 if (!allow_dot_dir && (t[x+2] == '/' || t[x+2] == ';'))
97 return 1;
98
99 /* ".." as a path segment is always prohibited */
100 if (t[x+2] == '.' && (t[x+3] == '/' || t[x+3] == ';' || t[x+3] == '\0'))
101 return 1;
102 }
103 }
104 #ifdef XP_WIN32
105 // Don't allow '~' in the filename. On some filesystems a long name
106 // (e.g. longfilename.htm) can be accessed using '~' bypassing any ACL
107 // checks (e.g. longfi~1.htm).
108 if (!allow_tilde && (t[x] == '~')) {
109 return 1;
110 }
111
112 // Do not allow ':' apart from drive letter. Windows filestream
113 // will treat filename::$DATA as a plain file & display content.
114 // So block it to prevent source viewing vulnerability.
115 if ((t[x] == ':') && x > 1) {
116 return 1;
117 }
118
119 // On NT, the directory "abc...." is the same as "abc"
120 // The only cheap way to catch this globally is to disallow
121 // names with the trailing "."s. Hopefully this is not over
122 // restrictive.
123 // Also trailing spaces in names can wreak havoc on ACL checks
124 // and name resolution. Therefore, ban them on the end of a
125 // name.
126 if (((t[x] == '.') || (t[x] == ' ')) &&
127 ((t[x+1] == ';') || (t[x+1] == '/') || (t[x+1] == '\0')))
128 {
129 return 1;
130 }
131
132 // Skip past the second byte of two byte DBCS characters. Bug 353999
133 if (flagDbcsUri && t[x+1] && IsDBCSLeadByte(t[x])) x++;
134 #endif // XP_WIN32
135 }
136 return 0;
137 }
138
139 NSAPI_PUBLIC int util_uri_is_evil(const char *t)
140 {
141 return util_uri_is_evil_internal(t, 0, 0);
142 }
143
144
145 /* -------------------- util_uri_unescape_and_normalize -------------------- */
146
147 #ifdef XP_WIN32
148 /* The server calls this function to unescape the URI and also normalize
149 * the uri. Normalizing the uri converts all "\" characters in the URI
150 * and pathinfo portion to "/". Does not touch "\" in query strings.
151 */
152 NSAPI_PUBLIC
153 int util_uri_unescape_and_normalize(pool_handle_t *pool, char *s, char *unnormalized)
154 {
155 if(!(util_uri_unescape_strict(s)))
156 return 0;
157
158 if (unnormalized) strcpy(unnormalized, s);
159
160 if (_getfullpathname == -1)
161 _getfullpathname = (_getmbcp() != 0);
162
163 /* Get canonical filename Bugid: 4672869 */
164 if(_getfullpathname && strcmp(s, "*") && (*s == '/' ) ) {
165 char *pzAbsPath = NULL;
166 int pathlen = 0;
167 int len = 0;
168 int ret = 0;
169 if(!(pzAbsPath = util_canonicalize_uri(pool, s, strlen(s), NULL))) {
170 //Error canonicalizing; possibly pointing out of docroot
171 return 0;
172 }
173 char *pzPath = (char *)MALLOC(MAX_PATH + 1); /* reserved byte for trailing slash */
174 char *pzFilename = NULL;
175
176 /* If required length of the buffer(pzPath) is more than the allocated one i.e. MAX_PATH(neglecting the reserved byte for trailing slash), return BAD REQUEST. This will happen if length of uri is more than the specified uri length(257) for MBCS windows */
177 if(!(ret = GetFullPathName(pzAbsPath, MAX_PATH, pzPath, &pzFilename)) || ( ret > MAX_PATH)){
178 FREE(pzAbsPath);
179 FREE(pzPath);
180 return 0;
181 }
182 len = strlen(pzAbsPath);
183 pathlen = strlen( pzPath );
184
185 /* GetFullPathName behaves differently in case of WINNT and WIN2K */
186 /* o/p string doesn't contain the trailing slash in case of WINNT */
187 /* if i/p is /foo/, we get o/p as c:\foo instead of c:\foo\ */
188 /* Checking if i/p has trailing slash and o/p doesn't have, then */
189 /* adding slash */
190 if ( pzAbsPath[len-1] == '/' && pzPath[pathlen-1] != '\\')
191 strcat( pzPath, "\\");
192 FREE(pzAbsPath);
193 pzFilename = strchr(pzPath, '\\');
194 if(!pzFilename) {
195 FREE(pzPath);
196 return 0;
197 }
198 strcpy(s, pzFilename);
199 FREE(pzPath);
200 }
201
202 util_uri_normalize_slashes(s);
203
204 return 1;
205 }
206 #endif /* XP_WIN32 */
207
208
209 /* ---------------------- util_uri_normalize_slashes ---------------------- */
210
211 void util_uri_normalize_slashes(char *s)
212 {
213 #ifdef XP_WIN32
214 int flagDbcsUri = allow_dbcs_uri();
215
216 while (*s) {
217 if (*s == '\\') {
218 // Normalize '\\' to '/'
219 *s = '/';
220 } else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0])) {
221 // Skip past two byte DBCS characters. Bug 353999
222 s++;
223 }
224 s++;
225 }
226 #endif
227 }
228
229
230 /* --------------------------- util_uri_escape ---------------------------- */
231 NSAPI_PUBLIC char *util_uri_escape(char *od, const char *s)
232 {
233 int flagDbcsUri = allow_dbcs_uri();
234 char *d;
235
236 if (!od)
237 od = (char *) MALLOC((strlen(s)*3) + 1);
238 d = od;
239
240 while (*s) {
241 if (strchr("% ?#:+&*\"'<>\r\n", *s)) {
242 util_sprintf(d, "%%%02x", (unsigned char)*s);
243 ++s; d += 3;
244 }
245 #ifdef XP_WIN32
246 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0]))
247 #else
248 // Treat any character with the high bit set as a DBCS lead byte
249 else if (flagDbcsUri && s[1] && (s[0] & 0x80))
250 #endif
251 {
252 // Escape the second byte of DBCS characters. The first byte will
253 // have been escaped already. IE translates all unescaped '\\'s
254 // into '/'.
255 // Bug 353999
256 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]);
257 s += 2; d += 6;
258 }
259 else if (0x80 & *s) {
260 util_sprintf(d, "%%%02x", (unsigned char)*s);
261 ++s; d += 3;
262 } else {
263 *d++ = *s++;
264 }
265 }
266 *d = '\0';
267 return od;
268 }
269
270
271 /* --------------------------- util_url_escape ---------------------------- */
272
273 NSAPI_PUBLIC char *util_url_escape(char *od, const char *s)
274 {
275 int flagDbcsUri = allow_dbcs_uri();
276 char *d;
277
278 if (!od)
279 od = (char *) MALLOC((strlen(s)*3) + 1);
280 d = od;
281
282 while (*s) {
283 if (strchr("% +*\"'<>\r\n", *s)) {
284 util_sprintf(d, "%%%02x", (unsigned char)*s);
285 ++s; d += 3;
286 }
287 #ifdef XP_WIN32
288 else if (flagDbcsUri && s[1] && IsDBCSLeadByte(s[0]))
289 #else
290 // Treat any character with the high bit set as a DBCS lead byte
291 else if (flagDbcsUri && s[1] && (s[0] & 0x80))
292 #endif
293 {
294 // Escape the second byte of DBCS characters. The first byte will
295 // have been escaped already. IE translates all unescaped '\\'s
296 // into '/'.
297 // Bug 353999
298 util_sprintf(d, "%%%02x%%%02x", (unsigned char)s[0], (unsigned char)s[1]);
299 s += 2; d += 6;
300 }
301 else if (0x80 & *s) {
302 util_sprintf(d, "%%%02x", (unsigned char)*s);
303 ++s; d += 3;
304 } else {
305 *d++ = *s++;
306 }
307 }
308 *d = '\0';
309 return od;
310 }
311
312
313 /* ------------------------- util_uri_strip_params ------------------------- */
314
315 NSAPI_PUBLIC char* util_uri_strip_params(char *uri)
316 {
317 // As per RFC2396, URI path segments can contain parameters beginning with
318 // ';'. These parameters must be removed from the ppath. Bug 418271
319 char* out;
320 if((out = strchr(uri, ';'))) {
321 char* in = out;
322 while (*in) {
323 if (*in == ';') {
324 // Skip past parameter
325 do in++; while (*in && *in != '/');
326 } else {
327 // Copy non-parameter path data
328 *out++ = *in++;
329 }
330 }
331 *out = 0;
332 }
333 return uri;
334 }
335
336
337 /* ------------------------ util_canonicalize_uri ------------------------- */
338
339 /*
340 * rewrite rules:
341 * // -> '/'
342 * /./ -> '/'
343 * /.\0 -> '/'
344 * /foo/../ -> '/'
345 * /foo/..\0 -> '/'
346 *
347 * Allocate a new string, as otherwise replacing in-line would impact the
348 * RequestURI, i.e. original URI in the request.
349 * Some guidelines in: http://www.ietf.org/rfc/rfc2396.txt
350 * Uniform Resource Identifiers (URI): Generic Syntax
351 */
352 NSAPI_PUBLIC char* util_canonicalize_uri(pool_handle_t *pool, const char *uri, int len, int *pcanonlen)
353 {
354 PRBool success = PR_TRUE;
355 const char *in_ptr = uri;
356 int in = 0;
357 int in_len = len;
358
359 //PR_ASSERT(uri != NULL); // TODO
360
361 char* canonPath = (char *)pool_malloc(pool, in_len+1);
362 char* out_ptr = canonPath;
363
364 if (!canonPath) {
365 success = PR_FALSE;
366 goto done;
367 }
368
369
370 /* in goes from 0 .. sURIPath.len-1; out_ptr points to
371 * space where next char from input would be copied to
372 */
373 while (in < in_len) {
374
375 /* If the character isn't '/' then copy it out and move on*/
376 if (in_ptr[0] != '/') {
377 *out_ptr++ = *in_ptr++;
378 in++;
379 continue;
380 }
381
382 /* found '/' and reached end of sURIPath, done */
383 if (in+1 >= in_len) {
384 *out_ptr++ = *in_ptr++;
385 in++;
386 break;
387 }
388
389 /* we have '/' and there are more chars in the string */
390 switch(in_ptr[1]) {
391 case '/':
392 /* '//' => '/' */
393 in_ptr++;
394 in++;
395 break;
396
397 case '.':
398 /* we have "/." so far */
399 if (in+2 >= in_len) {
400 /* the string ends after this; basically ignore '.'
401 * make sure the ending / is transferred to output.
402 */
403 *out_ptr++ = *in_ptr++;
404 goto done;
405 }
406
407 /* more chars after "/."; see if it is a '/' */
408 if (in_ptr[2] == '/') {
409 /* in deed, compact "/./" => "/"; */
410 in_ptr += 2;
411 in += 2;
412 break;
413 }
414
415 if (in_ptr[2] != '.') {
416 /* "/.x" where x is not '.'; copy as is */
417 *out_ptr++ = *in_ptr++;
418 in++;
419 break;
420 }
421
422 /* we have "/.." so far. see if we have either string
423 * ending after this or '/' following.
424 */
425 if (in+3 < in_len && in_ptr[3] != '/' && in_ptr[3] != ';') {
426 /* we have "/..x" here; so copy as is */
427 *out_ptr++ = *in_ptr++;
428 in++;
429 }
430 else {
431 /* we have "foo/../" or "foo/.." at the end; */
432 if (out_ptr == canonPath) {
433 /* oops, we found "/../" pointing out of docroot */
434 success = PR_FALSE;
435 goto done;
436 }
437
438 /* remove the previous segment in the output */
439 for (out_ptr--;
440 out_ptr != canonPath && out_ptr[0] != '/';
441 out_ptr--); /* Empty Loop */
442
443 /* point to '/' if the last segment ended with .. then
444 * leave the '/' before the previous segment.
445 */
446 if(in+3 == in_len)
447 out_ptr++;
448
449 /* skip the input as well */
450 in_ptr += 3;
451 in += 3;
452 }
453 break;
454
455 default:
456 /* If we already have '/' at out_ptr we donot need to copy */
457 if (out_ptr == canonPath || *(out_ptr-1) != '/')
458 *out_ptr++ = *in_ptr;
459 in_ptr++; in++;
460 break;
461 }
462 }
463
464 done:
465 int canonLen = 0;
466
467 if (success) {
468 /* the path looks fine; return the canonicalized form */
469 canonLen = out_ptr - canonPath;
470 canonPath[canonLen] = '\0';
471 } else {
472 /* error canonicalizing */
473 pool_free(pool, canonPath);
474 canonPath = NULL;
475 }
476
477 if (pcanonlen)
478 *pcanonlen = canonLen;
479
480 return canonPath;
481 }
482
483
484 /* ---------------------- util_canonicalize_redirect ---------------------- */
485
486 NSAPI_PUBLIC char* util_canonicalize_redirect(pool_handle_t *pool, const char *baseUri, const char *newUri)
487 {
488 //PR_ASSERT(baseUri != NULL); // TODO
489
490 if (*newUri == '/')
491 return util_canonicalize_uri(pool, newUri, strlen(newUri), NULL);
492
493 int bLen = strlen(baseUri);
494 if (bLen > 0 && baseUri[bLen - 1] != '/') {
495 while (bLen > 0 && baseUri[bLen - 1] != '/')
496 bLen--;
497 }
498
499 int pLen = strlen(newUri) + bLen + 1; // 1 for slash
500 char *pUri = (char *)pool_malloc(pool, pLen + 1);
501 if (!pUri)
502 return PR_FALSE;
503
504 memcpy(pUri, baseUri, bLen);
505 pUri[bLen] = '/';
506 strcpy(pUri + bLen + 1, newUri);
507
508 char *rval = util_canonicalize_uri(pool, pUri, pLen, NULL);
509 pool_free(pool, pUri);
510
511 return rval;
512 }
513
514
515 /* ------------------------ util_host_port_suffix ------------------------- */
516
517 NSAPI_PUBLIC char *util_host_port_suffix(char *h)
518 {
519 return (char *)util_host_port_suffix((const char *)h);
520 }
521
522 const char *util_host_port_suffix(const char *h)
523 {
524 /* Return a pointer to the colon preceding the port number in a hostname.
525 *
526 * util_host_port_suffix("foo.com:80") = ":80"
527 * util_host_port_suffix("foo.com") = NULL
528 * util_host_port_suffix("[::]:80") = ":80"
529 * util_host_port_suffix("[::]") = NULL
530 */
531
532 if (h == NULL)
533 return h;
534
535 for (;;) {
536 /* Find end of host, beginning of ":port", or an IPv6 address */
537 for (;;) {
538 register char c = *h;
539
540 if (c == '\0')
541 return NULL; /* end of host, no port found */
542
543 if (c == '/')
544 return NULL; /* end of host, no port found */
545
546 if (c == ':')
547 return h; /* found port */
548
549 if (c == '[')
550 break; /* skip IPv6 address */
551
552 h++;
553 }
554
555 /* Skip IPv6 address */
556 while (*h != '\0' && *h != ']')
557 h++;
558 }
559 }

mercurial