summaryrefslogtreecommitdiff
path: root/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp')
-rw-r--r--protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp189
1 files changed, 189 insertions, 0 deletions
diff --git a/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp b/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp
new file mode 100644
index 0000000000..55b66f7b3a
--- /dev/null
+++ b/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp
@@ -0,0 +1,189 @@
+//
+// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+#include "td/utils/HttpUrl.h"
+
+#include "td/utils/format.h"
+#include "td/utils/logging.h"
+#include "td/utils/misc.h"
+#include "td/utils/Parser.h"
+
+namespace td {
+
+string HttpUrl::get_url() const {
+ string result;
+ switch (protocol_) {
+ case Protocol::HTTP:
+ result += "http://";
+ break;
+ case Protocol::HTTPS:
+ result += "https://";
+ break;
+ default:
+ UNREACHABLE();
+ }
+ if (!userinfo_.empty()) {
+ result += userinfo_;
+ result += '@';
+ }
+ if (is_ipv6) {
+ result += '[';
+ }
+ result += host_;
+ if (is_ipv6) {
+ result += ']';
+ }
+ if (specified_port_ > 0) {
+ result += ':';
+ result += to_string(specified_port_);
+ }
+ CHECK(!query_.empty() && query_[0] == '/');
+ result += query_;
+ return result;
+}
+
+Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) {
+ // url == [https?://][userinfo@]host[:port]
+ Parser parser(url);
+ string protocol_str = to_lower(parser.read_till_nofail(':'));
+
+ HttpUrl::Protocol protocol;
+ if (parser.start_with("://")) {
+ parser.advance(3);
+ if (protocol_str == "http") {
+ protocol = HttpUrl::Protocol::HTTP;
+ } else if (protocol_str == "https") {
+ protocol = HttpUrl::Protocol::HTTPS;
+ } else {
+ return Status::Error("Unsupported URL protocol");
+ }
+ } else {
+ parser = Parser(url);
+ protocol = default_protocol;
+ }
+ Slice userinfo_host_port = parser.read_till_nofail("/?#");
+
+ int port = 0;
+ const char *colon = userinfo_host_port.end() - 1;
+ while (colon > userinfo_host_port.begin() && *colon != ':' && *colon != ']' && *colon != '@') {
+ colon--;
+ }
+ Slice userinfo_host;
+ if (colon > userinfo_host_port.begin() && *colon == ':') {
+ port = to_integer<int>(Slice(colon + 1, userinfo_host_port.end()));
+ userinfo_host = Slice(userinfo_host_port.begin(), colon);
+ } else {
+ userinfo_host = userinfo_host_port;
+ }
+ if (port < 0 || port > 65535) {
+ return Status::Error("Wrong port number specified in the URL");
+ }
+
+ auto at_pos = userinfo_host.rfind('@');
+ Slice userinfo = at_pos == static_cast<size_t>(-1) ? "" : userinfo_host.substr(0, at_pos);
+ Slice host = userinfo_host.substr(at_pos + 1);
+
+ bool is_ipv6 = false;
+ if (!host.empty() && host[0] == '[' && host.back() == ']') {
+ host.remove_prefix(1);
+ host.remove_suffix(1);
+ is_ipv6 = true;
+ }
+ if (host.empty()) {
+ return Status::Error("URL host is empty");
+ }
+
+ int specified_port = port;
+ if (port == 0) {
+ if (protocol == HttpUrl::Protocol::HTTP) {
+ port = 80;
+ } else {
+ CHECK(protocol == HttpUrl::Protocol::HTTPS);
+ port = 443;
+ }
+ }
+
+ Slice query = parser.read_all();
+ while (!query.empty() && is_space(query.back())) {
+ query.remove_suffix(1);
+ }
+ if (query.empty()) {
+ query = "/";
+ }
+ string query_str;
+ if (query[0] != '/') {
+ query_str = '/';
+ }
+ for (auto c : query) {
+ if (static_cast<unsigned char>(c) <= 0x20) {
+ query_str += '%';
+ query_str += "0123456789ABCDEF"[c / 16];
+ query_str += "0123456789ABCDEF"[c % 16];
+ } else {
+ query_str += c;
+ }
+ }
+
+ string host_str = to_lower(host);
+ for (size_t i = 0; i < host_str.size(); i++) {
+ char c = host_str[i];
+ if (('a' <= c && c <= 'z') || c == '.' || ('0' <= c && c <= '9') || c == '-' || c == '_' || c == '!' || c == '$' ||
+ c == ',' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' || c == ';' || c == '&' || c == '+' ||
+ c == '=') {
+ // symbols allowed by RFC 7230 and RFC 3986
+ continue;
+ }
+ if (c == '%') {
+ c = host_str[++i];
+ if (('a' <= c && c <= 'f') || ('0' <= c && c <= '9')) {
+ c = host_str[++i];
+ if (('a' <= c && c <= 'f') || ('0' <= c && c <= '9')) {
+ // percent encoded symbol as allowed by RFC 7230 and RFC 3986
+ continue;
+ }
+ }
+ }
+ // all other symbols aren't allowed
+ unsigned char uc = static_cast<unsigned char>(c);
+ if (uc >= 128) {
+ // but we allow plain UTF-8 symbols
+ continue;
+ }
+ return Status::Error("Wrong URL host");
+ }
+
+ return HttpUrl{protocol, userinfo.str(), std::move(host_str), is_ipv6, specified_port, port, std::move(query_str)};
+}
+
+StringBuilder &operator<<(StringBuilder &sb, const HttpUrl &url) {
+ sb << tag("protocol", url.protocol_ == HttpUrl::Protocol::HTTP ? "HTTP" : "HTTPS") << tag("userinfo", url.userinfo_)
+ << tag("host", url.host_) << tag("port", url.port_) << tag("query", url.query_);
+ return sb;
+}
+
+string get_url_query_file_name(const string &query) {
+ Slice query_slice = query;
+ query_slice.truncate(query.find_first_of("?#"));
+
+ auto slash_pos = query_slice.rfind('/');
+ if (slash_pos < query_slice.size()) {
+ return query_slice.substr(slash_pos + 1).str();
+ }
+ return query_slice.str();
+}
+
+string get_url_file_name(const string &url) {
+ // TODO remove copy
+ string url_copy = url;
+ auto r_http_url = parse_url(url_copy);
+ if (r_http_url.is_error()) {
+ LOG(WARNING) << "Receive wrong URL \"" << url << '"';
+ return string();
+ }
+ return get_url_query_file_name(r_http_url.ok().query_);
+}
+
+} // namespace td