urlmon: Implemented a parser for URI query strings.
authorThomas Mullaly <thomas.mullaly@gmail.com>
Tue, 27 Jul 2010 00:30:21 +0000 (20:30 -0400)
committerAlexandre Julliard <julliard@winehq.org>
Wed, 4 Aug 2010 10:25:42 +0000 (12:25 +0200)
dlls/urlmon/tests/uri.c
dlls/urlmon/uri.c

index be437515361c4b53cae9b6f995af4a0c27d85780..9a0e6ff953067bc5734dc3eb298644c8b2a45d14 100644 (file)
@@ -2861,6 +2861,66 @@ static const uri_properties uri_tests[] = {
             {URL_SCHEME_UNKNOWN,S_OK,FALSE},
             {URLZONE_INVALID,E_NOTIMPL,FALSE}
         }
+    },
+    /* Unknown scheme types can have invalid % encoded data in query string. */
+    {   "zip://www.winehq.org/tests/..?query=%xx&return=y", 0, S_OK, FALSE,
+        Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST|
+        Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY|Uri_HAS_RAW_URI|
+        Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+        TRUE,
+        {
+            {"zip://www.winehq.org/?query=%xx&return=y",S_OK,TRUE},
+            {"www.winehq.org",S_OK,FALSE},
+            {"zip://www.winehq.org/?query=%xx&return=y",S_OK,TRUE},
+            {"winehq.org",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,TRUE},
+            {"www.winehq.org",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"/",S_OK,FALSE},
+            {"/?query=%xx&return=y",S_OK,TRUE},
+            {"?query=%xx&return=y",S_OK,TRUE},
+            {"zip://www.winehq.org/tests/..?query=%xx&return=y",S_OK,FALSE},
+            {"zip",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,FALSE}
+        },
+        {
+            {Uri_HOST_DNS,S_OK,FALSE},
+            {0,S_FALSE,FALSE},
+            {URL_SCHEME_UNKNOWN,S_OK,FALSE},
+            {URLZONE_INVALID,E_NOTIMPL,FALSE},
+        }
+    },
+    /* Known scheme types can have invalid % encoded data with the right flags. */
+    {   "http://www.winehq.org/tests/..?query=%xx&return=y", Uri_CREATE_NO_DECODE_EXTRA_INFO, S_OK, FALSE,
+        Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|Uri_HAS_HOST|
+        Uri_HAS_DOMAIN|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_PORT|Uri_HAS_QUERY|Uri_HAS_RAW_URI|
+        Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+        TRUE,
+        {
+            {"http://www.winehq.org/?query=%xx&return=y",S_OK,TRUE},
+            {"www.winehq.org",S_OK,FALSE},
+            {"http://www.winehq.org/?query=%xx&return=y",S_OK,TRUE},
+            {"winehq.org",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,TRUE},
+            {"www.winehq.org",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"/",S_OK,FALSE},
+            {"/?query=%xx&return=y",S_OK,TRUE},
+            {"?query=%xx&return=y",S_OK,TRUE},
+            {"http://www.winehq.org/tests/..?query=%xx&return=y",S_OK,FALSE},
+            {"http",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,FALSE}
+        },
+        {
+            {Uri_HOST_DNS,S_OK,FALSE},
+            {80,S_OK,FALSE},
+            {URL_SCHEME_HTTP,S_OK,FALSE},
+            {URLZONE_INVALID,E_NOTIMPL,FALSE},
+        }
     }
 };
 
@@ -2913,7 +2973,9 @@ static const invalid_uri invalid_uri_tests[] = {
     {"gopher://www.google.com\\test",Uri_CREATE_NO_CANONICALIZE,FALSE},
     /* Not allowed to have invalid % encoded data in opaque URI path. */
     {"news:test%XX",0,FALSE},
-    {"mailto:wine@winehq%G8.com",0,FALSE}
+    {"mailto:wine@winehq%G8.com",0,FALSE},
+    /* Known scheme types can't have invalid % encoded data in query string. */
+    {"http://google.com/?query=te%xx",0,FALSE}
 };
 
 typedef struct _uri_equality {
index 49503317424206ffca54dc5037ab141ce18d87ec..c63cffa900b74ef74379995bda8428bb6502e603 100644 (file)
@@ -120,6 +120,9 @@ typedef struct {
 
     const WCHAR     *path;
     DWORD           path_len;
+
+    const WCHAR     *query;
+    DWORD           query_len;
 } parse_data;
 
 static const CHAR hexDigits[] = "0123456789ABCDEF";
@@ -1738,6 +1741,45 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
     return TRUE;
 }
 
+/* Attempts to parse the query string from the URI.
+ *
+ * NOTES:
+ *  If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
+ *  data is allowed appear in the query string. For unknown scheme types
+ *  invalid percent encoded data is allowed to appear reguardless.
+ */
+static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) {
+    const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
+
+    if(**ptr != '?') {
+        TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags);
+        return TRUE;
+    }
+
+    data->query = *ptr;
+
+    ++(*ptr);
+    while(**ptr && **ptr != '#') {
+        if(**ptr == '%' && known_scheme &&
+           !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
+            if(!check_pct_encoded(ptr)) {
+                *ptr = data->query;
+                data->query = NULL;
+                return FALSE;
+            } else
+                continue;
+        }
+
+        ++(*ptr);
+    }
+
+    data->query_len = *ptr - data->query;
+
+    TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags,
+        debugstr_wn(data->query, data->query_len), data->query_len);
+    return TRUE;
+}
+
 /* Parses and validates the components of the specified by data->uri
  * and stores the information it parses into 'data'.
  *
@@ -1758,7 +1800,10 @@ static BOOL parse_uri(parse_data *data, DWORD flags) {
     if(!parse_hierpart(pptr, data, flags))
         return FALSE;
 
-    /* TODO: Parse query and fragment (if the URI has one). */
+    if(!parse_query(pptr, data, flags))
+        return FALSE;
+
+    /* TODO: Parse fragment (if the URI has one). */
 
     TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
     return TRUE;