1 files changed, 163 insertions, 0 deletions
diff --git a/libs/litehtml/src/url.cpp b/libs/litehtml/src/url.cpp
new file mode 100644
index 0000000000..13076e338b
--- /dev/null
+++ b/libs/litehtml/src/url.cpp
@@ -0,0 +1,163 @@
+// Copyright (C) 2020-2021 Primate Labs Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the names of the copyright holders nor the names of their
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "url.h"
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+#include "codepoint.h"
+#include "url_path.h"
+
+namespace litehtml {
+
+url::url(const string& str)
+: str_(str)
+{
+    // TODO: Rewrite using tstring_view to avoid unnecessary allocations.
+    string tmp = str_;
+
+    // Does the URL include a scheme?
+    size_t offset = tmp.find(':');
+    if (offset != string::npos) {
+        bool valid_scheme = true;
+        for (size_t i = 0; i < offset; i++) {
+            if (!is_url_scheme_codepoint(tmp[i])) {
+                valid_scheme = false;
+                break;
+            }
+        }
+        if (valid_scheme) {
+            scheme_ = tmp.substr(0, offset);
+            tmp = tmp.substr(offset + 1);
+        }
+    }
+
+    // Does the URL include an authority?  An authority component is preceded
+    // by a double slash ("//") and is terminated by the next slash ("/"),
+    // question mark ("?"), number sign ("#"), or the end of the URL.
+
+    if (tmp.size() >= 2 && tmp[0] == '/' && tmp[1] == '/') {
+        tmp = tmp.substr(2);
+        offset = tmp.size();
+        offset = std::min(offset, tmp.find('/'));
+        offset = std::min(offset, tmp.find('?'));
+        offset = std::min(offset, tmp.find('#'));
+        authority_ = tmp.substr(0, offset);
+        tmp = tmp.substr(offset);
+
+        // TODO: Parse the network location into host and port?
+    }
+
+    // Does the URL include a fragment?
+    offset = tmp.find('#');
+    if (offset != string::npos) {
+        fragment_ = tmp.substr(offset + 1);
+        tmp = tmp.substr(0, offset);
+    }
+
+    // Does the URL include a query?
+    offset = tmp.find('?');
+    if (offset != string::npos) {
+        query_ = tmp.substr(offset + 1);
+        tmp = tmp.substr(0, offset);
+    }
+
+    // Whatever remains of the URL after removing the scheme, the network
+    // location, the query, and the fragment is the path.
+    path_ = tmp;
+}
+
+url::url(const string& scheme,
+    const string& authority,
+    const string& path,
+    const string& query,
+    const string& fragment)
+: scheme_(scheme)
+, authority_(authority)
+, path_(path)
+, query_(query)
+, fragment_(fragment)
+{
+    std::stringstream tss;
+
+    if (!scheme_.empty()) {
+        tss << scheme_ << ":";
+    }
+    if (!authority_.empty()) {
+        tss << "//" << authority_;
+    }
+    if (!path_.empty()) {
+        tss << path_;
+    }
+    if (!query_.empty()) {
+        tss << "?" << query_;
+    }
+    if (!fragment_.empty()) {
+        tss << "#" << fragment_;
+    }
+    str_ = tss.str();
+}
+
+url resolve(const url& b, const url& r)
+{
+    // The resolution algorithm roughly follows the resolution algorithm
+    // outlined in Section 5.2 (in particular Section 5.2.2) of RFC 3986.  The
+    // major difference between the resolution algorithm and resolve() is that
+    // resolve() does not attempt to normalize the path components.
+
+    if (r.has_scheme()) {
+        return r;
+    } else if (r.has_authority()) {
+        return url(b.scheme(), r.authority(), r.path(), r.query(), r.fragment());
+    } else if (r.has_path()) {
+
+        // The relative URL path is either an absolute path or a relative
+        // path. If it is an absolute path, build the URL using only the
+        // relative path.  If it is a relative path, resolve the relative path
+        // against the base path and build the URL using the resolved path.
+
+        if (is_url_path_absolute(r.path())) {
+            return url(b.scheme(), b.authority(), r.path(), r.query(), r.fragment());
+        } else {
+            string path = url_path_resolve(b.path(), r.path());
+            return url(b.scheme(), b.authority(), path, r.query(), r.fragment());
+        }
+
+    } else if (r.has_query()) {
+        return url(b.scheme(), b.authority(), b.path(), r.query(), r.fragment());
+    } else {
+        // The resolved URL never includes the base URL fragment (i.e., it
+        // always includes the reference URL fragment).
+        return url(b.scheme(), b.authority(), b.path(), b.query(), r.fragment());
+    }
+}
+
+} // namespace litehtml