diff options
Diffstat (limited to 'protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp')
-rw-r--r-- | protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp | 134 |
1 files changed, 104 insertions, 30 deletions
diff --git a/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp b/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp index 55b66f7b3a..e793f940a8 100644 --- a/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp +++ b/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp @@ -1,5 +1,5 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,16 +10,19 @@ #include "td/utils/logging.h" #include "td/utils/misc.h" #include "td/utils/Parser.h" +#include "td/utils/port/IPAddress.h" + +#include <algorithm> namespace td { string HttpUrl::get_url() const { string result; switch (protocol_) { - case Protocol::HTTP: + case Protocol::Http: result += "http://"; break; - case Protocol::HTTPS: + case Protocol::Https: result += "https://"; break; default: @@ -29,39 +32,32 @@ string HttpUrl::get_url() const { result += userinfo_; result += '@'; } - if (is_ipv6) { - result += '['; - } result += host_; - if (is_ipv6) { - result += ']'; - } if (specified_port_ > 0) { result += ':'; result += to_string(specified_port_); } - CHECK(!query_.empty() && query_[0] == '/'); + LOG_CHECK(!query_.empty() && query_[0] == '/') << query_; result += query_; return result; } -Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) { +Result<HttpUrl> parse_url(Slice url, HttpUrl::Protocol default_protocol) { // url == [https?://][userinfo@]host[:port] - Parser parser(url); - string protocol_str = to_lower(parser.read_till_nofail(':')); + ConstParser parser(url); + string protocol_str = to_lower(parser.read_till_nofail(":/?#@[]")); HttpUrl::Protocol protocol; - if (parser.start_with("://")) { - parser.advance(3); + if (parser.try_skip("://")) { if (protocol_str == "http") { - protocol = HttpUrl::Protocol::HTTP; + protocol = HttpUrl::Protocol::Http; } else if (protocol_str == "https") { - protocol = HttpUrl::Protocol::HTTPS; + protocol = HttpUrl::Protocol::Https; } else { return Status::Error("Unsupported URL protocol"); } } else { - parser = Parser(url); + parser = ConstParser(url); protocol = default_protocol; } Slice userinfo_host_port = parser.read_till_nofail("/?#"); @@ -73,7 +69,16 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) } Slice userinfo_host; if (colon > userinfo_host_port.begin() && *colon == ':') { - port = to_integer<int>(Slice(colon + 1, userinfo_host_port.end())); + Slice port_slice(colon + 1, userinfo_host_port.end()); + while (port_slice.size() > 1 && port_slice[0] == '0') { + port_slice.remove_prefix(1); + } + auto r_port = to_integer_safe<int>(port_slice); + if (r_port.is_error() || r_port.ok() == 0) { + port = -1; + } else { + port = r_port.ok(); + } userinfo_host = Slice(userinfo_host_port.begin(), colon); } else { userinfo_host = userinfo_host_port; @@ -88,20 +93,26 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) bool is_ipv6 = false; if (!host.empty() && host[0] == '[' && host.back() == ']') { - host.remove_prefix(1); - host.remove_suffix(1); + IPAddress ip_address; + if (ip_address.init_ipv6_port(host.str(), 1).is_error()) { + return Status::Error("Wrong IPv6 address specified in the URL"); + } + CHECK(ip_address.is_ipv6()); is_ipv6 = true; } if (host.empty()) { return Status::Error("URL host is empty"); } + if (host == ".") { + return Status::Error("Host is invalid"); + } int specified_port = port; if (port == 0) { - if (protocol == HttpUrl::Protocol::HTTP) { + if (protocol == HttpUrl::Protocol::Http) { port = 80; } else { - CHECK(protocol == HttpUrl::Protocol::HTTPS); + CHECK(protocol == HttpUrl::Protocol::Https); port = 443; } } @@ -111,7 +122,7 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) query.remove_suffix(1); } if (query.empty()) { - query = "/"; + query = Slice("/"); } string query_str; if (query[0] != '/') { @@ -130,6 +141,14 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) string host_str = to_lower(host); for (size_t i = 0; i < host_str.size(); i++) { char c = host_str[i]; + if (is_ipv6) { + if (i == 0 || i + 1 == host_str.size() || c == ':' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || + c == '.') { + continue; + } + return Status::Error("Wrong IPv6 URL host"); + } + if (('a' <= c && c <= 'z') || c == '.' || ('0' <= c && c <= '9') || c == '-' || c == '_' || c == '!' || c == '$' || c == ',' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' || c == ';' || c == '&' || c == '+' || c == '=') { @@ -145,9 +164,11 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) continue; } } + return Status::Error("Wrong percent-encoded symbol in URL host"); } + // all other symbols aren't allowed - unsigned char uc = static_cast<unsigned char>(c); + auto uc = static_cast<unsigned char>(c); if (uc >= 128) { // but we allow plain UTF-8 symbols continue; @@ -159,11 +180,66 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) } StringBuilder &operator<<(StringBuilder &sb, const HttpUrl &url) { - sb << tag("protocol", url.protocol_ == HttpUrl::Protocol::HTTP ? "HTTP" : "HTTPS") << tag("userinfo", url.userinfo_) + sb << tag("protocol", url.protocol_ == HttpUrl::Protocol::Http ? "HTTP" : "HTTPS") << tag("userinfo", url.userinfo_) << tag("host", url.host_) << tag("port", url.port_) << tag("query", url.query_); return sb; } +HttpUrlQuery parse_url_query(Slice query) { + if (!query.empty() && query[0] == '/') { + query.remove_prefix(1); + } + + size_t path_size = 0; + while (path_size < query.size() && query[path_size] != '?' && query[path_size] != '#') { + path_size++; + } + + HttpUrlQuery result; + result.path_ = full_split(url_decode(query.substr(0, path_size), false), '/'); + while (!result.path_.empty() && result.path_.back().empty()) { + result.path_.pop_back(); + } + + if (path_size < query.size() && query[path_size] == '?') { + query = query.substr(path_size + 1); + query.truncate(query.find('#')); + + ConstParser parser(query); + while (!parser.data().empty()) { + auto key_value = split(parser.read_till_nofail('&'), '='); + parser.skip_nofail('&'); + auto key = url_decode(key_value.first, true); + if (!key.empty()) { + result.args_.emplace_back(std::move(key), url_decode(key_value.second, true)); + } + } + CHECK(parser.status().is_ok()); + } + + return result; +} + +bool HttpUrlQuery::has_arg(Slice key) const { + auto it = + std::find_if(args_.begin(), args_.end(), [&key](const std::pair<string, string> &s) { return s.first == key; }); + return it != args_.end(); +} + +Slice HttpUrlQuery::get_arg(Slice key) const { + auto it = + std::find_if(args_.begin(), args_.end(), [&key](const std::pair<string, string> &s) { return s.first == key; }); + return it == args_.end() ? Slice() : it->second; +} + +string get_url_host(Slice url) { + auto r_http_url = parse_url(url); + if (r_http_url.is_error()) { + return string(); + } + return r_http_url.ok().host_; +} + string get_url_query_file_name(const string &query) { Slice query_slice = query; query_slice.truncate(query.find_first_of("?#")); @@ -175,10 +251,8 @@ string get_url_query_file_name(const string &query) { return query_slice.str(); } -string get_url_file_name(const string &url) { - // TODO remove copy - string url_copy = url; - auto r_http_url = parse_url(url_copy); +string get_url_file_name(Slice url) { + auto r_http_url = parse_url(url); if (r_http_url.is_error()) { LOG(WARNING) << "Receive wrong URL \"" << url << '"'; return string(); |