2 * uri.c: set of generic URI related routines
6 * See Copyright for the status of this software.
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
21 /************************************************************************
23 * Macros to differentiate various character type *
24 * directly extracted from RFC 2396 *
26 ************************************************************************/
29 * alpha = lowalpha | upalpha
31 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
40 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
47 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
50 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
53 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56 * alphanum = alpha | digit
59 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
63 * "a" | "b" | "c" | "d" | "e" | "f"
66 #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
67 (((x) >= 'A') && ((x) <= 'F')))
70 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
74 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
75 ((x) == '(') || ((x) == ')'))
79 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
82 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84 ((x) == '+') || ((x) == '$') || ((x) == ','))
87 * unreserved = alphanum | mark
90 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
93 * escaped = "%" hex hex
96 #define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
100 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
101 * "&" | "=" | "+" | "$" | ","
103 #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
104 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
105 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
106 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
109 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
111 #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
112 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
113 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
117 * rel_segment = 1*( unreserved | escaped |
118 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
121 #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
122 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
123 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
127 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
130 #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
131 ((x) == '+') || ((x) == '-') || ((x) == '.'))
134 * reg_name = 1*( unreserved | escaped | "$" | "," |
135 * ";" | ":" | "@" | "&" | "=" | "+" )
138 #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
139 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
140 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
141 ((*(p) == '=')) || ((*(p) == '+')))
144 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
147 #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
148 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
149 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
153 * uric = reserved | unreserved | escaped
156 #define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
160 * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
163 #define IS_UNWISE(p) \
164 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
165 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
166 ((*(p) == ']')) || ((*(p) == '`')))
169 * Skip to next pointer char, handle escaped sequences
172 #define NEXT(p) ((*p == '%')? p += 3 : p++)
175 * Productions from the spec.
177 * authority = server | reg_name
178 * reg_name = 1*( unreserved | escaped | "$" | "," |
179 * ";" | ":" | "@" | "&" | "=" | "+" )
181 * path = [ abs_path | opaque_part ]
184 /************************************************************************
186 * Generic URI structure functions *
188 ************************************************************************/
193 * Simply creates an empty xmlURI
195 * Returns the new structure or NULL in case of error
201 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
203 xmlGenericError(xmlGenericErrorContext,
204 "xmlCreateURI: out of memory\n");
207 memset(ret, 0, sizeof(xmlURI));
213 * @uri: pointer to an xmlURI
215 * Save the URI as an escaped string
217 * Returns a new string (to be deallocated by caller)
220 xmlSaveUri(xmlURIPtr uri) {
226 if (uri == NULL) return(NULL);
230 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
232 xmlGenericError(xmlGenericErrorContext,
233 "xmlSaveUri: out of memory\n");
238 if (uri->scheme != NULL) {
243 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
245 xmlGenericError(xmlGenericErrorContext,
246 "xmlSaveUri: out of memory\n");
254 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
256 xmlGenericError(xmlGenericErrorContext,
257 "xmlSaveUri: out of memory\n");
263 if (uri->opaque != NULL) {
266 if (len + 3 >= max) {
268 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
270 xmlGenericError(xmlGenericErrorContext,
271 "xmlSaveUri: out of memory\n");
275 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
278 int val = *(unsigned char *)p++;
279 int hi = val / 0x10, lo = val % 0x10;
281 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
282 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
286 if (uri->server != NULL) {
287 if (len + 3 >= max) {
289 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
291 xmlGenericError(xmlGenericErrorContext,
292 "xmlSaveUri: out of memory\n");
298 if (uri->user != NULL) {
301 if (len + 3 >= max) {
303 ret = (xmlChar *) xmlRealloc(ret,
304 (max + 1) * sizeof(xmlChar));
306 xmlGenericError(xmlGenericErrorContext,
307 "xmlSaveUri: out of memory\n");
311 if ((IS_UNRESERVED(*(p))) ||
312 ((*(p) == ';')) || ((*(p) == ':')) ||
313 ((*(p) == '&')) || ((*(p) == '=')) ||
314 ((*(p) == '+')) || ((*(p) == '$')) ||
318 int val = *(unsigned char *)p++;
319 int hi = val / 0x10, lo = val % 0x10;
321 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
322 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
325 if (len + 3 >= max) {
327 ret = (xmlChar *) xmlRealloc(ret,
328 (max + 1) * sizeof(xmlChar));
330 xmlGenericError(xmlGenericErrorContext,
331 "xmlSaveUri: out of memory\n");
341 ret = (xmlChar *) xmlRealloc(ret,
342 (max + 1) * sizeof(xmlChar));
344 xmlGenericError(xmlGenericErrorContext,
345 "xmlSaveUri: out of memory\n");
352 if (len + 10 >= max) {
354 ret = (xmlChar *) xmlRealloc(ret,
355 (max + 1) * sizeof(xmlChar));
357 xmlGenericError(xmlGenericErrorContext,
358 "xmlSaveUri: out of memory\n");
362 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
364 } else if (uri->authority != NULL) {
365 if (len + 3 >= max) {
367 ret = (xmlChar *) xmlRealloc(ret,
368 (max + 1) * sizeof(xmlChar));
370 xmlGenericError(xmlGenericErrorContext,
371 "xmlSaveUri: out of memory\n");
379 if (len + 3 >= max) {
381 ret = (xmlChar *) xmlRealloc(ret,
382 (max + 1) * sizeof(xmlChar));
384 xmlGenericError(xmlGenericErrorContext,
385 "xmlSaveUri: out of memory\n");
389 if ((IS_UNRESERVED(*(p))) ||
390 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
391 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
392 ((*(p) == '=')) || ((*(p) == '+')))
395 int val = *(unsigned char *)p++;
396 int hi = val / 0x10, lo = val % 0x10;
398 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
399 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
402 } else if (uri->scheme != NULL) {
403 if (len + 3 >= max) {
405 ret = (xmlChar *) xmlRealloc(ret,
406 (max + 1) * sizeof(xmlChar));
408 xmlGenericError(xmlGenericErrorContext,
409 "xmlSaveUri: out of memory\n");
416 if (uri->path != NULL) {
419 if (len + 3 >= max) {
421 ret = (xmlChar *) xmlRealloc(ret,
422 (max + 1) * sizeof(xmlChar));
424 xmlGenericError(xmlGenericErrorContext,
425 "xmlSaveUri: out of memory\n");
429 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
430 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
431 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
435 int val = *(unsigned char *)p++;
436 int hi = val / 0x10, lo = val % 0x10;
438 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
439 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
443 if (uri->query != NULL) {
444 if (len + 3 >= max) {
446 ret = (xmlChar *) xmlRealloc(ret,
447 (max + 1) * sizeof(xmlChar));
449 xmlGenericError(xmlGenericErrorContext,
450 "xmlSaveUri: out of memory\n");
457 if (len + 3 >= max) {
459 ret = (xmlChar *) xmlRealloc(ret,
460 (max + 1) * sizeof(xmlChar));
462 xmlGenericError(xmlGenericErrorContext,
463 "xmlSaveUri: out of memory\n");
467 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
470 int val = *(unsigned char *)p++;
471 int hi = val / 0x10, lo = val % 0x10;
473 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
474 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
479 if (uri->fragment != NULL) {
480 if (len + 3 >= max) {
482 ret = (xmlChar *) xmlRealloc(ret,
483 (max + 1) * sizeof(xmlChar));
485 xmlGenericError(xmlGenericErrorContext,
486 "xmlSaveUri: out of memory\n");
493 if (len + 3 >= max) {
495 ret = (xmlChar *) xmlRealloc(ret,
496 (max + 1) * sizeof(xmlChar));
498 xmlGenericError(xmlGenericErrorContext,
499 "xmlSaveUri: out of memory\n");
503 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
506 int val = *(unsigned char *)p++;
507 int hi = val / 0x10, lo = val % 0x10;
509 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
510 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
516 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
518 xmlGenericError(xmlGenericErrorContext,
519 "xmlSaveUri: out of memory\n");
529 * @stream: a FILE* for the output
530 * @uri: pointer to an xmlURI
532 * Prints the URI in the stream @steam.
535 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
538 out = xmlSaveUri(uri);
540 fprintf(stream, "%s", (char *) out);
547 * @uri: pointer to an xmlURI
549 * Make sure the xmlURI struct is free of content
552 xmlCleanURI(xmlURIPtr uri) {
553 if (uri == NULL) return;
555 if (uri->scheme != NULL) xmlFree(uri->scheme);
557 if (uri->server != NULL) xmlFree(uri->server);
559 if (uri->user != NULL) xmlFree(uri->user);
561 if (uri->path != NULL) xmlFree(uri->path);
563 if (uri->fragment != NULL) xmlFree(uri->fragment);
564 uri->fragment = NULL;
565 if (uri->opaque != NULL) xmlFree(uri->opaque);
567 if (uri->authority != NULL) xmlFree(uri->authority);
568 uri->authority = NULL;
569 if (uri->query != NULL) xmlFree(uri->query);
575 * @uri: pointer to an xmlURI
577 * Free up the xmlURI struct
580 xmlFreeURI(xmlURIPtr uri) {
581 if (uri == NULL) return;
583 if (uri->scheme != NULL) xmlFree(uri->scheme);
584 if (uri->server != NULL) xmlFree(uri->server);
585 if (uri->user != NULL) xmlFree(uri->user);
586 if (uri->path != NULL) xmlFree(uri->path);
587 if (uri->fragment != NULL) xmlFree(uri->fragment);
588 if (uri->opaque != NULL) xmlFree(uri->opaque);
589 if (uri->authority != NULL) xmlFree(uri->authority);
590 if (uri->query != NULL) xmlFree(uri->query);
594 /************************************************************************
598 ************************************************************************/
601 * xmlNormalizeURIPath:
602 * @path: pointer to the path string
604 * Applies the 5 normalization steps to a path string--that is, RFC 2396
605 * Section 5.2, steps 6.c through 6.g.
607 * Normalization occurs directly on the string, no new allocation is done
609 * Returns 0 or an error code
612 xmlNormalizeURIPath(char *path) {
618 /* Skip all initial "/" chars. We want to get to the beginning of the
619 * first non-empty segment.
622 while (cur[0] == '/')
627 /* Keep everything we've seen so far. */
631 * Analyze each segment in sequence for cases (c) and (d).
633 while (cur[0] != '\0') {
635 * c) All occurrences of "./", where "." is a complete path segment,
636 * are removed from the buffer string.
638 if ((cur[0] == '.') && (cur[1] == '/')) {
640 /* '//' normalization should be done at this point too */
641 while (cur[0] == '/')
647 * d) If the buffer string ends with "." as a complete path segment,
648 * that "." is removed.
650 if ((cur[0] == '.') && (cur[1] == '\0'))
653 /* Otherwise keep the segment. */
654 while (cur[0] != '/') {
657 (out++)[0] = (cur++)[0];
660 while ((cur[0] == '/') && (cur[1] == '/'))
663 (out++)[0] = (cur++)[0];
668 /* Reset to the beginning of the first segment for the next sequence. */
670 while (cur[0] == '/')
676 * Analyze each segment in sequence for cases (e) and (f).
678 * e) All occurrences of "<segment>/../", where <segment> is a
679 * complete path segment not equal to "..", are removed from the
680 * buffer string. Removal of these path segments is performed
681 * iteratively, removing the leftmost matching pattern on each
682 * iteration, until no matching pattern remains.
684 * f) If the buffer string ends with "<segment>/..", where <segment>
685 * is a complete path segment not equal to "..", that
686 * "<segment>/.." is removed.
688 * To satisfy the "iterative" clause in (e), we need to collapse the
689 * string every time we find something that needs to be removed. Thus,
690 * we don't need to keep two pointers into the string: we only need a
691 * "current position" pointer.
696 /* At the beginning of each iteration of this loop, "cur" points to
697 * the first character of the segment we want to examine.
700 /* Find the end of the current segment. */
702 while ((segp[0] != '/') && (segp[0] != '\0'))
705 /* If this is the last segment, we're done (we need at least two
706 * segments to meet the criteria for the (e) and (f) cases).
711 /* If the first segment is "..", or if the next segment _isn't_ "..",
712 * keep this segment and try the next one.
715 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
716 || ((segp[0] != '.') || (segp[1] != '.')
717 || ((segp[2] != '/') && (segp[2] != '\0')))) {
722 /* If we get here, remove this segment and the next one and back up
723 * to the previous segment (if there is one), to implement the
724 * "iteratively" clause. It's pretty much impossible to back up
725 * while maintaining two pointers into the buffer, so just compact
726 * the whole buffer now.
729 /* If this is the end of the buffer, we're done. */
730 if (segp[2] == '\0') {
734 /* Valgrind complained, strcpy(cur, segp + 3); */
735 /* string will overlap, do not use strcpy */
738 while ((*tmp++ = *segp++) != 0);
740 /* If there are no previous segments, then keep going from here. */
742 while ((segp > path) && ((--segp)[0] == '/'))
747 /* "segp" is pointing to the end of a previous segment; find it's
748 * start. We need to back up to the previous segment and start
749 * over with that to handle things like "foo/bar/../..". If we
750 * don't do this, then on the first pass we'll remove the "bar/..",
751 * but be pointing at the second ".." so we won't realize we can also
752 * remove the "foo/..".
755 while ((cur > path) && (cur[-1] != '/'))
761 * g) If the resulting buffer string still begins with one or more
762 * complete path segments of "..", then the reference is
763 * considered to be in error. Implementations may handle this
764 * error by retaining these components in the resolved path (i.e.,
765 * treating them as part of the final URI), by removing them from
766 * the resolved path (i.e., discarding relative levels above the
767 * root), or by avoiding traversal of the reference.
769 * We discard them from the final path.
771 if (path[0] == '/') {
773 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
774 && ((cur[3] == '/') || (cur[3] == '\0')))
779 while (cur[0] != '\0')
780 (out++)[0] = (cur++)[0];
789 * xmlURIUnescapeString:
790 * @str: the string to unescape
791 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
792 * @target: optional destination buffer
794 * Unescaping routine, does not do validity checks !
795 * Output is direct unsigned char translation of %XX values (no encoding)
797 * Returns an copy of the string, but unescaped
800 xmlURIUnescapeString(const char *str, int len, char *target) {
806 if (len <= 0) len = strlen(str);
807 if (len < 0) return(NULL);
809 if (target == NULL) {
810 ret = (char *) xmlMallocAtomic(len + 1);
812 xmlGenericError(xmlGenericErrorContext,
813 "xmlURIUnescapeString: out of memory\n");
823 if ((*in >= '0') && (*in <= '9'))
825 else if ((*in >= 'a') && (*in <= 'f'))
826 *out = (*in - 'a') + 10;
827 else if ((*in >= 'A') && (*in <= 'F'))
828 *out = (*in - 'A') + 10;
830 if ((*in >= '0') && (*in <= '9'))
831 *out = *out * 16 + (*in - '0');
832 else if ((*in >= 'a') && (*in <= 'f'))
833 *out = *out * 16 + (*in - 'a') + 10;
834 else if ((*in >= 'A') && (*in <= 'F'))
835 *out = *out * 16 + (*in - 'A') + 10;
850 * @str: string to escape
851 * @list: exception list string of chars not to escape
853 * This routine escapes a string to hex, ignoring reserved characters (a-z)
854 * and the characters in the exception list.
856 * Returns a new escaped string or NULL in case of error.
859 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
863 unsigned int len, out;
867 len = xmlStrlen(str);
868 if (!(len > 0)) return(NULL);
871 ret = (xmlChar *) xmlMallocAtomic(len);
873 xmlGenericError(xmlGenericErrorContext,
874 "xmlURIEscapeStr: out of memory\n");
877 in = (const xmlChar *) str;
880 if (len - out <= 3) {
882 ret = (xmlChar *) xmlRealloc(ret, len);
884 xmlGenericError(xmlGenericErrorContext,
885 "xmlURIEscapeStr: out of memory\n");
892 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
897 ret[out++] = '0' + val;
899 ret[out++] = 'A' + val - 0xA;
902 ret[out++] = '0' + val;
904 ret[out++] = 'A' + val - 0xA;
917 * @str: the string of the URI to escape
919 * Escaping routine, does not do validity checks !
920 * It will try to escape the chars needing this, but this is heuristic
921 * based it's impossible to be sure.
923 * Returns an copy of the string, but escaped
926 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
927 * according to RFC2396.
931 xmlURIEscape(const xmlChar * str)
933 xmlChar *ret, *segment = NULL;
937 #define NULLCHK(p) if(!p) { \
938 xmlGenericError(xmlGenericErrorContext, \
939 "xmlURIEscape: out of memory\n"); \
945 uri = xmlCreateURI();
948 * Allow escaping errors in the unescaped form
951 ret2 = xmlParseURIReference(uri, (const char *)str);
964 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
966 ret = xmlStrcat(ret, segment);
967 ret = xmlStrcat(ret, BAD_CAST ":");
971 if (uri->authority) {
973 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
975 ret = xmlStrcat(ret, BAD_CAST "//");
976 ret = xmlStrcat(ret, segment);
981 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
983 ret = xmlStrcat(ret, segment);
984 ret = xmlStrcat(ret, BAD_CAST "@");
989 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
991 ret = xmlStrcat(ret, BAD_CAST "//");
992 ret = xmlStrcat(ret, segment);
999 snprintf((char *) port, 10, "%d", uri->port);
1000 ret = xmlStrcat(ret, BAD_CAST ":");
1001 ret = xmlStrcat(ret, port);
1006 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1008 ret = xmlStrcat(ret, segment);
1014 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1016 ret = xmlStrcat(ret, BAD_CAST "?");
1017 ret = xmlStrcat(ret, segment);
1022 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1024 ret = xmlStrcat(ret, segment);
1028 if (uri->fragment) {
1029 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1031 ret = xmlStrcat(ret, BAD_CAST "#");
1032 ret = xmlStrcat(ret, segment);
1042 /************************************************************************
1044 * Escaped URI parsing *
1046 ************************************************************************/
1049 * xmlParseURIFragment:
1050 * @uri: pointer to an URI structure
1051 * @str: pointer to the string to analyze
1053 * Parse an URI fragment string and fills in the appropriate fields
1054 * of the @uri structure.
1058 * Returns 0 or the error code
1061 xmlParseURIFragment(xmlURIPtr uri, const char **str)
1063 const char *cur = *str;
1068 while (IS_URIC(cur) || IS_UNWISE(cur))
1071 if (uri->fragment != NULL)
1072 xmlFree(uri->fragment);
1073 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1081 * @uri: pointer to an URI structure
1082 * @str: pointer to the string to analyze
1084 * Parse the query part of an URI
1088 * Returns 0 or the error code
1091 xmlParseURIQuery(xmlURIPtr uri, const char **str)
1093 const char *cur = *str;
1098 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1101 if (uri->query != NULL)
1102 xmlFree(uri->query);
1103 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1110 * xmlParseURIScheme:
1111 * @uri: pointer to an URI structure
1112 * @str: pointer to the string to analyze
1114 * Parse an URI scheme
1116 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1118 * Returns 0 or the error code
1121 xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1128 if (!IS_ALPHA(*cur))
1131 while (IS_SCHEME(*cur)) cur++;
1133 if (uri->scheme != NULL) xmlFree(uri->scheme);
1135 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1142 * xmlParseURIOpaquePart:
1143 * @uri: pointer to an URI structure
1144 * @str: pointer to the string to analyze
1146 * Parse an URI opaque part
1148 * opaque_part = uric_no_slash *uric
1150 * Returns 0 or the error code
1153 xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1161 if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
1165 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1168 if (uri->opaque != NULL)
1169 xmlFree(uri->opaque);
1170 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1177 * xmlParseURIServer:
1178 * @uri: pointer to an URI structure
1179 * @str: pointer to the string to analyze
1181 * Parse a server subpart of an URI, it's a finer grain analysis
1182 * of the authority part.
1184 * server = [ [ userinfo "@" ] hostport ]
1185 * userinfo = *( unreserved | escaped |
1186 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1187 * hostport = host [ ":" port ]
1188 * host = hostname | IPv4address
1189 * hostname = *( domainlabel "." ) toplabel [ "." ]
1190 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1191 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1192 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1195 * Returns 0 or the error code
1198 xmlParseURIServer(xmlURIPtr uri, const char **str) {
1200 const char *host, *tmp;
1201 const int IPmax = 4;
1210 * is there an userinfo ?
1212 while (IS_USERINFO(cur)) NEXT(cur);
1215 if (uri->user != NULL) xmlFree(uri->user);
1216 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1221 if (uri->user != NULL) xmlFree(uri->user);
1227 * This can be empty in the case where there is no server
1232 if (uri->authority != NULL) xmlFree(uri->authority);
1233 uri->authority = NULL;
1234 if (uri->server != NULL) xmlFree(uri->server);
1241 * host part of hostport can derive either an IPV4 address
1242 * or an unresolved name. Check the IP first, it easier to detect
1243 * errors if wrong one
1245 for (oct = 0; oct < IPmax; ++oct) {
1247 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1248 while(IS_DIGIT(*cur)) cur++;
1249 if (oct == (IPmax-1))
1255 if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) {
1256 /* maybe host_name */
1257 if (!IS_ALPHANUM(*cur))
1258 return(4); /* e.g. http://xml.$oft */
1260 do ++cur; while (IS_ALPHANUM(*cur));
1264 return(5); /* e.g. http://xml.-soft */
1271 return(6); /* e.g. http://xml-.soft */
1273 return(7); /* e.g. http://xml..soft */
1281 --tmp; /* e.g. http://xml.$Oft/ */
1282 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1283 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1284 return(8); /* e.g. http://xmlsOft.0rg/ */
1287 if (uri->authority != NULL) xmlFree(uri->authority);
1288 uri->authority = NULL;
1289 if (uri->server != NULL) xmlFree(uri->server);
1290 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1293 * finish by checking for a port presence.
1297 if (IS_DIGIT(*cur)) {
1300 while (IS_DIGIT(*cur)) {
1302 uri->port = uri->port * 10 + (*cur - '0');
1312 * xmlParseURIRelSegment:
1313 * @uri: pointer to an URI structure
1314 * @str: pointer to the string to analyze
1316 * Parse an URI relative segment
1318 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1321 * Returns 0 or the error code
1324 xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1332 if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
1336 while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1339 if (uri->path != NULL)
1341 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1348 * xmlParseURIPathSegments:
1349 * @uri: pointer to an URI structure
1350 * @str: pointer to the string to analyze
1351 * @slash: should we add a leading slash
1353 * Parse an URI set of path segments
1355 * path_segments = segment *( "/" segment )
1356 * segment = *pchar *( ";" param )
1359 * Returns 0 or the error code
1362 xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1372 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1374 while (*cur == ';') {
1376 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1388 * Concat the set of path segments to the current path
1394 if (uri->path != NULL) {
1395 len2 = strlen(uri->path);
1398 path = (char *) xmlMallocAtomic(len + 1);
1400 xmlGenericError(xmlGenericErrorContext,
1401 "xmlParseURIPathSegments: out of memory\n");
1405 if (uri->path != NULL)
1406 memcpy(path, uri->path, len2);
1413 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1414 if (uri->path != NULL)
1423 * xmlParseURIAuthority:
1424 * @uri: pointer to an URI structure
1425 * @str: pointer to the string to analyze
1427 * Parse the authority part of an URI.
1429 * authority = server | reg_name
1430 * server = [ [ userinfo "@" ] hostport ]
1431 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1432 * "@" | "&" | "=" | "+" )
1434 * Note : this is completely ambiguous since reg_name is allowed to
1435 * use the full set of chars in use by server:
1437 * 3.2.1. Registry-based Naming Authority
1439 * The structure of a registry-based naming authority is specific
1440 * to the URI scheme, but constrained to the allowed characters
1441 * for an authority component.
1443 * Returns 0 or the error code
1446 xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1456 * try first to parse it as a server string.
1458 ret = xmlParseURIServer(uri, str);
1459 if ((ret == 0) && (*str != NULL) &&
1460 ((**str == 0) || (**str == '/') || (**str == '?')))
1465 * failed, fallback to reg_name
1467 if (!IS_REG_NAME(cur)) {
1471 while (IS_REG_NAME(cur)) NEXT(cur);
1473 if (uri->server != NULL) xmlFree(uri->server);
1475 if (uri->user != NULL) xmlFree(uri->user);
1477 if (uri->authority != NULL) xmlFree(uri->authority);
1478 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1485 * xmlParseURIHierPart:
1486 * @uri: pointer to an URI structure
1487 * @str: pointer to the string to analyze
1489 * Parse an URI hierarchical part
1491 * hier_part = ( net_path | abs_path ) [ "?" query ]
1492 * abs_path = "/" path_segments
1493 * net_path = "//" authority [ abs_path ]
1495 * Returns 0 or the error code
1498 xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1507 if ((cur[0] == '/') && (cur[1] == '/')) {
1509 ret = xmlParseURIAuthority(uri, &cur);
1512 if (cur[0] == '/') {
1514 ret = xmlParseURIPathSegments(uri, &cur, 1);
1516 } else if (cur[0] == '/') {
1518 ret = xmlParseURIPathSegments(uri, &cur, 1);
1526 ret = xmlParseURIQuery(uri, &cur);
1535 * xmlParseAbsoluteURI:
1536 * @uri: pointer to an URI structure
1537 * @str: pointer to the string to analyze
1539 * Parse an URI reference string and fills in the appropriate fields
1540 * of the @uri structure
1542 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1544 * Returns 0 or the error code
1547 xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1556 ret = xmlParseURIScheme(uri, str);
1557 if (ret != 0) return(ret);
1564 return(xmlParseURIHierPart(uri, str));
1565 return(xmlParseURIOpaquePart(uri, str));
1569 * xmlParseRelativeURI:
1570 * @uri: pointer to an URI structure
1571 * @str: pointer to the string to analyze
1573 * Parse an relative URI string and fills in the appropriate fields
1574 * of the @uri structure
1576 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1577 * abs_path = "/" path_segments
1578 * net_path = "//" authority [ abs_path ]
1579 * rel_path = rel_segment [ abs_path ]
1581 * Returns 0 or the error code
1584 xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1592 if ((cur[0] == '/') && (cur[1] == '/')) {
1594 ret = xmlParseURIAuthority(uri, &cur);
1597 if (cur[0] == '/') {
1599 ret = xmlParseURIPathSegments(uri, &cur, 1);
1601 } else if (cur[0] == '/') {
1603 ret = xmlParseURIPathSegments(uri, &cur, 1);
1604 } else if (cur[0] != '#' && cur[0] != '?') {
1605 ret = xmlParseURIRelSegment(uri, &cur);
1608 if (cur[0] == '/') {
1610 ret = xmlParseURIPathSegments(uri, &cur, 1);
1617 ret = xmlParseURIQuery(uri, &cur);
1626 * xmlParseURIReference:
1627 * @uri: pointer to an URI structure
1628 * @str: the string to analyze
1630 * Parse an URI reference string and fills in the appropriate fields
1631 * of the @uri structure
1633 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1635 * Returns 0 or the error code
1638 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1640 const char *tmp = str;
1647 * Try first to parse absolute refs, then fallback to relative if
1650 ret = xmlParseAbsoluteURI(uri, &str);
1654 ret = xmlParseRelativeURI(uri, &str);
1663 ret = xmlParseURIFragment(uri, &str);
1664 if (ret != 0) return(ret);
1675 * @str: the URI string to analyze
1679 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1681 * Returns a newly build xmlURIPtr or NULL in case of error
1684 xmlParseURI(const char *str) {
1690 uri = xmlCreateURI();
1692 ret = xmlParseURIReference(uri, str);
1701 /************************************************************************
1703 * Public functions *
1705 ************************************************************************/
1709 * @URI: the URI instance found in the document
1710 * @base: the base value
1712 * Computes he final URI of the reference done by checking that
1713 * the given URI is valid, and building the final URI using the
1714 * base URI. This is processed according to section 5.2 of the
1717 * 5.2. Resolving Relative References to Absolute Form
1719 * Returns a new URI string (to be freed by the caller) or NULL in case
1723 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1724 xmlChar *val = NULL;
1725 int ret, len, indx, cur, out;
1726 xmlURIPtr ref = NULL;
1727 xmlURIPtr bas = NULL;
1728 xmlURIPtr res = NULL;
1731 * 1) The URI reference is parsed into the potential four components and
1732 * fragment identifier, as described in Section 4.3.
1734 * NOTE that a completely empty URI is treated by modern browsers
1735 * as a reference to "." rather than as a synonym for the current
1736 * URI. Should we do that here?
1742 ref = xmlCreateURI();
1745 ret = xmlParseURIReference(ref, (const char *) URI);
1752 if ((ref != NULL) && (ref->scheme != NULL)) {
1754 * The URI is absolute don't modify.
1756 val = xmlStrdup(URI);
1762 bas = xmlCreateURI();
1765 ret = xmlParseURIReference(bas, (const char *) base);
1769 val = xmlSaveUri(ref);
1774 * the base fragment must be ignored
1776 if (bas->fragment != NULL) {
1777 xmlFree(bas->fragment);
1778 bas->fragment = NULL;
1780 val = xmlSaveUri(bas);
1785 * 2) If the path component is empty and the scheme, authority, and
1786 * query components are undefined, then it is a reference to the
1787 * current document and we are done. Otherwise, the reference URI's
1788 * query and fragment components are defined as found (or not found)
1789 * within the URI reference and not inherited from the base URI.
1791 * NOTE that in modern browsers, the parsing differs from the above
1792 * in the following aspect: the query component is allowed to be
1793 * defined while still treating this as a reference to the current
1796 res = xmlCreateURI();
1799 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1800 ((ref->authority == NULL) && (ref->server == NULL))) {
1801 if (bas->scheme != NULL)
1802 res->scheme = xmlMemStrdup(bas->scheme);
1803 if (bas->authority != NULL)
1804 res->authority = xmlMemStrdup(bas->authority);
1805 else if (bas->server != NULL) {
1806 res->server = xmlMemStrdup(bas->server);
1807 if (bas->user != NULL)
1808 res->user = xmlMemStrdup(bas->user);
1809 res->port = bas->port;
1811 if (bas->path != NULL)
1812 res->path = xmlMemStrdup(bas->path);
1813 if (ref->query != NULL)
1814 res->query = xmlMemStrdup(ref->query);
1815 else if (bas->query != NULL)
1816 res->query = xmlMemStrdup(bas->query);
1817 if (ref->fragment != NULL)
1818 res->fragment = xmlMemStrdup(ref->fragment);
1823 * 3) If the scheme component is defined, indicating that the reference
1824 * starts with a scheme name, then the reference is interpreted as an
1825 * absolute URI and we are done. Otherwise, the reference URI's
1826 * scheme is inherited from the base URI's scheme component.
1828 if (ref->scheme != NULL) {
1829 val = xmlSaveUri(ref);
1832 if (bas->scheme != NULL)
1833 res->scheme = xmlMemStrdup(bas->scheme);
1835 if (ref->query != NULL)
1836 res->query = xmlMemStrdup(ref->query);
1837 if (ref->fragment != NULL)
1838 res->fragment = xmlMemStrdup(ref->fragment);
1841 * 4) If the authority component is defined, then the reference is a
1842 * network-path and we skip to step 7. Otherwise, the reference
1843 * URI's authority is inherited from the base URI's authority
1844 * component, which will also be undefined if the URI scheme does not
1845 * use an authority component.
1847 if ((ref->authority != NULL) || (ref->server != NULL)) {
1848 if (ref->authority != NULL)
1849 res->authority = xmlMemStrdup(ref->authority);
1851 res->server = xmlMemStrdup(ref->server);
1852 if (ref->user != NULL)
1853 res->user = xmlMemStrdup(ref->user);
1854 res->port = ref->port;
1856 if (ref->path != NULL)
1857 res->path = xmlMemStrdup(ref->path);
1860 if (bas->authority != NULL)
1861 res->authority = xmlMemStrdup(bas->authority);
1862 else if (bas->server != NULL) {
1863 res->server = xmlMemStrdup(bas->server);
1864 if (bas->user != NULL)
1865 res->user = xmlMemStrdup(bas->user);
1866 res->port = bas->port;
1870 * 5) If the path component begins with a slash character ("/"), then
1871 * the reference is an absolute-path and we skip to step 7.
1873 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1874 res->path = xmlMemStrdup(ref->path);
1880 * 6) If this step is reached, then we are resolving a relative-path
1881 * reference. The relative path needs to be merged with the base
1882 * URI's path. Although there are many ways to do this, we will
1883 * describe a simple method using a separate string buffer.
1885 * Allocate a buffer large enough for the result string.
1887 len = 2; /* extra / and 0 */
1888 if (ref->path != NULL)
1889 len += strlen(ref->path);
1890 if (bas->path != NULL)
1891 len += strlen(bas->path);
1892 res->path = (char *) xmlMallocAtomic(len);
1893 if (res->path == NULL) {
1894 xmlGenericError(xmlGenericErrorContext,
1895 "xmlBuildURI: out of memory\n");
1901 * a) All but the last segment of the base URI's path component is
1902 * copied to the buffer. In other words, any characters after the
1903 * last (right-most) slash character, if any, are excluded.
1907 if (bas->path != NULL) {
1908 while (bas->path[cur] != 0) {
1909 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1911 if (bas->path[cur] == 0)
1916 res->path[out] = bas->path[out];
1924 * b) The reference's path component is appended to the buffer
1927 if (ref->path != NULL && ref->path[0] != 0) {
1930 * Ensure the path includes a '/'
1932 if ((out == 0) && (bas->server != NULL))
1933 res->path[out++] = '/';
1934 while (ref->path[indx] != 0) {
1935 res->path[out++] = ref->path[indx++];
1941 * Steps c) to h) are really path normalization steps
1943 xmlNormalizeURIPath(res->path);
1948 * 7) The resulting URI components, including any inherited from the
1949 * base URI, are recombined to give the absolute form of the URI
1952 val = xmlSaveUri(res);
1966 * @path: the resource locator in a filesystem notation
1968 * Constructs a canonic path from the specified path.
1970 * Returns a new canonic path, or a duplicate of the path parameter if the
1971 * construction fails. The caller is responsible for freeing the memory occupied
1972 * by the returned string. If there is insufficient memory available, or the
1973 * argument is NULL, the function returns NULL.
1975 #define IS_WINDOWS_PATH(p) \
1977 (((p[0] >= 'a') && (p[0] <= 'z')) || \
1978 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
1979 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
1981 xmlCanonicPath(const xmlChar *path)
1983 #if defined(_WIN32) && !defined(__CYGWIN__)
1993 if ((uri = xmlParseURI((const char *) path)) != NULL) {
1995 return xmlStrdup(path);
1998 uri = xmlCreateURI();
2003 #if defined(_WIN32) && !defined(__CYGWIN__)
2004 len = xmlStrlen(path);
2005 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2006 uri->scheme = xmlStrdup(BAD_CAST "file");
2007 uri->path = xmlMallocAtomic(len + 2);
2010 strncpy(p, path, len + 1);
2012 uri->path = xmlStrdup(path);
2015 while (*p != '\0') {
2021 uri->path = (char *) xmlStrdup((const xmlChar *) path);
2024 ret = xmlSaveUri(uri);