summaryrefslogtreecommitdiff
path: root/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp')
-rw-r--r--protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp134
1 files changed, 104 insertions, 30 deletions
diff --git a/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp b/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp
index 55b66f7b3a..e793f940a8 100644
--- a/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp
+++ b/protocols/Telegram/tdlib/td/tdutils/td/utils/HttpUrl.cpp
@@ -1,5 +1,5 @@
//
-// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018
+// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,16 +10,19 @@
#include "td/utils/logging.h"
#include "td/utils/misc.h"
#include "td/utils/Parser.h"
+#include "td/utils/port/IPAddress.h"
+
+#include <algorithm>
namespace td {
string HttpUrl::get_url() const {
string result;
switch (protocol_) {
- case Protocol::HTTP:
+ case Protocol::Http:
result += "http://";
break;
- case Protocol::HTTPS:
+ case Protocol::Https:
result += "https://";
break;
default:
@@ -29,39 +32,32 @@ string HttpUrl::get_url() const {
result += userinfo_;
result += '@';
}
- if (is_ipv6) {
- result += '[';
- }
result += host_;
- if (is_ipv6) {
- result += ']';
- }
if (specified_port_ > 0) {
result += ':';
result += to_string(specified_port_);
}
- CHECK(!query_.empty() && query_[0] == '/');
+ LOG_CHECK(!query_.empty() && query_[0] == '/') << query_;
result += query_;
return result;
}
-Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol) {
+Result<HttpUrl> parse_url(Slice url, HttpUrl::Protocol default_protocol) {
// url == [https?://][userinfo@]host[:port]
- Parser parser(url);
- string protocol_str = to_lower(parser.read_till_nofail(':'));
+ ConstParser parser(url);
+ string protocol_str = to_lower(parser.read_till_nofail(":/?#@[]"));
HttpUrl::Protocol protocol;
- if (parser.start_with("://")) {
- parser.advance(3);
+ if (parser.try_skip("://")) {
if (protocol_str == "http") {
- protocol = HttpUrl::Protocol::HTTP;
+ protocol = HttpUrl::Protocol::Http;
} else if (protocol_str == "https") {
- protocol = HttpUrl::Protocol::HTTPS;
+ protocol = HttpUrl::Protocol::Https;
} else {
return Status::Error("Unsupported URL protocol");
}
} else {
- parser = Parser(url);
+ parser = ConstParser(url);
protocol = default_protocol;
}
Slice userinfo_host_port = parser.read_till_nofail("/?#");
@@ -73,7 +69,16 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol)
}
Slice userinfo_host;
if (colon > userinfo_host_port.begin() && *colon == ':') {
- port = to_integer<int>(Slice(colon + 1, userinfo_host_port.end()));
+ Slice port_slice(colon + 1, userinfo_host_port.end());
+ while (port_slice.size() > 1 && port_slice[0] == '0') {
+ port_slice.remove_prefix(1);
+ }
+ auto r_port = to_integer_safe<int>(port_slice);
+ if (r_port.is_error() || r_port.ok() == 0) {
+ port = -1;
+ } else {
+ port = r_port.ok();
+ }
userinfo_host = Slice(userinfo_host_port.begin(), colon);
} else {
userinfo_host = userinfo_host_port;
@@ -88,20 +93,26 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol)
bool is_ipv6 = false;
if (!host.empty() && host[0] == '[' && host.back() == ']') {
- host.remove_prefix(1);
- host.remove_suffix(1);
+ IPAddress ip_address;
+ if (ip_address.init_ipv6_port(host.str(), 1).is_error()) {
+ return Status::Error("Wrong IPv6 address specified in the URL");
+ }
+ CHECK(ip_address.is_ipv6());
is_ipv6 = true;
}
if (host.empty()) {
return Status::Error("URL host is empty");
}
+ if (host == ".") {
+ return Status::Error("Host is invalid");
+ }
int specified_port = port;
if (port == 0) {
- if (protocol == HttpUrl::Protocol::HTTP) {
+ if (protocol == HttpUrl::Protocol::Http) {
port = 80;
} else {
- CHECK(protocol == HttpUrl::Protocol::HTTPS);
+ CHECK(protocol == HttpUrl::Protocol::Https);
port = 443;
}
}
@@ -111,7 +122,7 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol)
query.remove_suffix(1);
}
if (query.empty()) {
- query = "/";
+ query = Slice("/");
}
string query_str;
if (query[0] != '/') {
@@ -130,6 +141,14 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol)
string host_str = to_lower(host);
for (size_t i = 0; i < host_str.size(); i++) {
char c = host_str[i];
+ if (is_ipv6) {
+ if (i == 0 || i + 1 == host_str.size() || c == ':' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
+ c == '.') {
+ continue;
+ }
+ return Status::Error("Wrong IPv6 URL host");
+ }
+
if (('a' <= c && c <= 'z') || c == '.' || ('0' <= c && c <= '9') || c == '-' || c == '_' || c == '!' || c == '$' ||
c == ',' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' || c == ';' || c == '&' || c == '+' ||
c == '=') {
@@ -145,9 +164,11 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol)
continue;
}
}
+ return Status::Error("Wrong percent-encoded symbol in URL host");
}
+
// all other symbols aren't allowed
- unsigned char uc = static_cast<unsigned char>(c);
+ auto uc = static_cast<unsigned char>(c);
if (uc >= 128) {
// but we allow plain UTF-8 symbols
continue;
@@ -159,11 +180,66 @@ Result<HttpUrl> parse_url(MutableSlice url, HttpUrl::Protocol default_protocol)
}
StringBuilder &operator<<(StringBuilder &sb, const HttpUrl &url) {
- sb << tag("protocol", url.protocol_ == HttpUrl::Protocol::HTTP ? "HTTP" : "HTTPS") << tag("userinfo", url.userinfo_)
+ sb << tag("protocol", url.protocol_ == HttpUrl::Protocol::Http ? "HTTP" : "HTTPS") << tag("userinfo", url.userinfo_)
<< tag("host", url.host_) << tag("port", url.port_) << tag("query", url.query_);
return sb;
}
+HttpUrlQuery parse_url_query(Slice query) {
+ if (!query.empty() && query[0] == '/') {
+ query.remove_prefix(1);
+ }
+
+ size_t path_size = 0;
+ while (path_size < query.size() && query[path_size] != '?' && query[path_size] != '#') {
+ path_size++;
+ }
+
+ HttpUrlQuery result;
+ result.path_ = full_split(url_decode(query.substr(0, path_size), false), '/');
+ while (!result.path_.empty() && result.path_.back().empty()) {
+ result.path_.pop_back();
+ }
+
+ if (path_size < query.size() && query[path_size] == '?') {
+ query = query.substr(path_size + 1);
+ query.truncate(query.find('#'));
+
+ ConstParser parser(query);
+ while (!parser.data().empty()) {
+ auto key_value = split(parser.read_till_nofail('&'), '=');
+ parser.skip_nofail('&');
+ auto key = url_decode(key_value.first, true);
+ if (!key.empty()) {
+ result.args_.emplace_back(std::move(key), url_decode(key_value.second, true));
+ }
+ }
+ CHECK(parser.status().is_ok());
+ }
+
+ return result;
+}
+
+bool HttpUrlQuery::has_arg(Slice key) const {
+ auto it =
+ std::find_if(args_.begin(), args_.end(), [&key](const std::pair<string, string> &s) { return s.first == key; });
+ return it != args_.end();
+}
+
+Slice HttpUrlQuery::get_arg(Slice key) const {
+ auto it =
+ std::find_if(args_.begin(), args_.end(), [&key](const std::pair<string, string> &s) { return s.first == key; });
+ return it == args_.end() ? Slice() : it->second;
+}
+
+string get_url_host(Slice url) {
+ auto r_http_url = parse_url(url);
+ if (r_http_url.is_error()) {
+ return string();
+ }
+ return r_http_url.ok().host_;
+}
+
string get_url_query_file_name(const string &query) {
Slice query_slice = query;
query_slice.truncate(query.find_first_of("?#"));
@@ -175,10 +251,8 @@ string get_url_query_file_name(const string &query) {
return query_slice.str();
}
-string get_url_file_name(const string &url) {
- // TODO remove copy
- string url_copy = url;
- auto r_http_url = parse_url(url_copy);
+string get_url_file_name(Slice url) {
+ auto r_http_url = parse_url(url);
if (r_http_url.is_error()) {
LOG(WARNING) << "Receive wrong URL \"" << url << '"';
return string();