{URL_SCHEME_WILDCARD,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
+ },
+ /* Forbidden characters are encoded for opaque known scheme types. */
+ { "mailto:\"acco<|>unt@example.com\"", 0, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
+ Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {".com%22",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE},
+ {"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
+ {"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"mailto:\"acco<|>unt@example.com\"",S_OK,FALSE},
+ {"mailto",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_UNKNOWN,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_MAILTO,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
+ },
+ { "news:test.tes<|>t.com", 0, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
+ Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"news:test.tes%3C%7C%3Et.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {"news:test.tes%3C%7C%3Et.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {".com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE},
+ {"test.tes%3C%7C%3Et.com",S_OK,TRUE},
+ {"test.tes%3C%7C%3Et.com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"news:test.tes<|>t.com",S_OK,FALSE},
+ {"news",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_UNKNOWN,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_NEWS,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
+ },
+ /* Don't encode forbidden characters. */
+ { "news:test.tes<|>t.com", Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
+ Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"news:test.tes<|>t.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {"news:test.tes<|>t.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {".com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE},
+ {"test.tes<|>t.com",S_OK,TRUE},
+ {"test.tes<|>t.com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"news:test.tes<|>t.com",S_OK,FALSE},
+ {"news",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_UNKNOWN,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_NEWS,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
+ },
+ /* Forbidden characters aren't encoded for unknown, opaque URIs. */
+ { "urn:test.tes<|>t.com", 0, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
+ Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"urn:test.tes<|>t.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {"urn:test.tes<|>t.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {".com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE},
+ {"test.tes<|>t.com",S_OK,TRUE},
+ {"test.tes<|>t.com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"urn:test.tes<|>t.com",S_OK,FALSE},
+ {"urn",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_UNKNOWN,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_UNKNOWN,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
+ },
+ /* Percent encoded unreserved characters are decoded for known opaque URIs. */
+ { "news:test.%74%65%73%74.com", 0, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
+ Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"news:test.test.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {"news:test.test.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {".com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE},
+ {"test.test.com",S_OK,TRUE},
+ {"test.test.com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"news:test.%74%65%73%74.com",S_OK,FALSE},
+ {"news",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_UNKNOWN,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_NEWS,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
+ },
+ /* Percent encoded characters are still decoded for known scheme types. */
+ { "news:test.%74%65%73%74.com", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
+ Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"news:test.test.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {"news:test.test.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {".com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE},
+ {"test.test.com",S_OK,TRUE},
+ {"test.test.com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"news:test.%74%65%73%74.com",S_OK,FALSE},
+ {"news",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_UNKNOWN,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_NEWS,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
+ },
+ /* Percent encoded characters aren't decoded for unknown scheme types. */
+ { "urn:test.%74%65%73%74.com", 0, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
+ Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"urn:test.%74%65%73%74.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {"urn:test.%74%65%73%74.com",S_OK,TRUE},
+ {"",S_FALSE,FALSE},
+ {".com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE},
+ {"test.%74%65%73%74.com",S_OK,TRUE},
+ {"test.%74%65%73%74.com",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"urn:test.%74%65%73%74.com",S_OK,FALSE},
+ {"urn",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_UNKNOWN,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_UNKNOWN,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
}
};
* (per MSDN documentation).
*/
static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
+ const WCHAR *start = *ptr;
+
/* Checks if the authority information needs to be parsed.
*
* Relative URI's aren't hierarchical URI's, but, they could trick
return FALSE;
return parse_path_hierarchical(ptr, data, flags);
- }
+ } else
+ /* Reset ptr to it's starting position so opaque path parsing
+ * begins at the correct location.
+ */
+ *ptr = start;
}
/* If it reaches here, then the URI will be treated as an opaque
return TRUE;
}
+/* Attempts to canonicalize the path for an opaque URI.
+ *
+ * For known scheme types:
+ * 1) forbidden characters are percent encoded if
+ * NO_ENCODE_FORBIDDEN_CHARACTERS isn't set.
+ *
+ * 2) Percent encoded, unreserved characters are decoded
+ * to their actual values, for known scheme types.
+ *
+ * 3) '\\' are changed to '/' for known scheme types
+ * except for mailto schemes.
+ */
+static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
+ const WCHAR *ptr;
+ const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
+
+ if(!data->path) {
+ uri->path_start = -1;
+ uri->path_len = 0;
+ return TRUE;
+ }
+
+ uri->path_start = uri->canon_len;
+
+ /* Windows doesn't allow a "//" to appear after the scheme
+ * of a URI, if it's an opaque URI.
+ */
+ if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') {
+ /* So it inserts a "/." before the "//" if it exists. */
+ if(!computeOnly) {
+ uri->canon_uri[uri->canon_len] = '/';
+ uri->canon_uri[uri->canon_len+1] = '.';
+ }
+
+ uri->canon_len += 2;
+ }
+
+ for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
+ if(*ptr == '%' && known_scheme) {
+ WCHAR val = decode_pct_val(ptr);
+
+ if(is_unreserved(val)) {
+ if(!computeOnly)
+ uri->canon_uri[uri->canon_len] = val;
+ ++uri->canon_len;
+
+ ptr += 2;
+ continue;
+ } else {
+ if(!computeOnly)
+ uri->canon_uri[uri->canon_len] = *ptr;
+ ++uri->canon_len;
+ }
+ } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
+ !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
+ if(!computeOnly)
+ pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
+ uri->canon_len += 3;
+ } else {
+ if(!computeOnly)
+ uri->canon_uri[uri->canon_len] = *ptr;
+ ++uri->canon_len;
+ }
+ }
+
+ uri->path_len = uri->canon_len - uri->path_start;
+
+ TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly,
+ debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len);
+ return TRUE;
+}
+
/* Determines how the URI represented by the parse_data should be canonicalized.
*
* Essentially, if the parse_data represents an hierarchical URI then it calls
uri->authority_start = -1;
uri->authority_len = 0;
uri->domain_offset = -1;
+
+ if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
+ return FALSE;
}
return TRUE;