From 67a42fc97c64c83e02f6f0d68e5a4a22c71138d3 Mon Sep 17 00:00:00 2001 From: dartraiden Date: Thu, 25 Jul 2024 00:50:30 +0300 Subject: libcurl: update to 8.9.0 --- libs/libcurl/src/urlapi.c | 94 ++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 42 deletions(-) (limited to 'libs/libcurl/src/urlapi.c') diff --git a/libs/libcurl/src/urlapi.c b/libs/libcurl/src/urlapi.c index 7a938bbe9c..2179e106de 100644 --- a/libs/libcurl/src/urlapi.c +++ b/libs/libcurl/src/urlapi.c @@ -82,6 +82,7 @@ struct Curl_URL { unsigned short portnum; /* the numerical version (if 'port' is set) */ BIT(query_present); /* to support blank */ BIT(fragment_present); /* to support blank */ + BIT(guessed_scheme); /* when a URL without scheme is parsed */ }; #define DEFAULT_SCHEME "https" @@ -101,7 +102,7 @@ static void free_urlhandle(struct Curl_URL *u) } /* - * Find the separator at the end of the host name, or the '?' in cases like + * Find the separator at the end of the hostname, or the '?' in cases like * http://www.example.com?id=2380 */ static const char *find_host_sep(const char *url) @@ -140,7 +141,7 @@ static const char hexdigits[] = "0123456789abcdef"; /* urlencode_str() writes data into an output dynbuf and URL-encodes the * spaces in the source URL accordingly. * - * URL encoding should be skipped for host names, otherwise IDN resolution + * URL encoding should be skipped for hostnames, otherwise IDN resolution * will fail. */ static CURLUcode urlencode_str(struct dynbuf *o, const char *url, @@ -205,7 +206,7 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url, size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, bool guess_scheme) { - int i = 0; + size_t i = 0; DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN)); (void)buflen; /* only used in debug-builds */ if(buf) @@ -229,7 +230,7 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) { /* If this does not guess scheme, the scheme always ends with the colon so that this also detects data: URLs etc. In guessing mode, data: could - be the host name "data" with a specified port number. */ + be the hostname "data" with a specified port number. */ /* the length of the scheme is the name part only */ size_t len = i; @@ -267,7 +268,7 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl) bool skip_slash = FALSE; *newurl = NULL; - /* protsep points to the start of the host name */ + /* protsep points to the start of the hostname */ protsep = strstr(base, "//"); if(!protsep) protsep = base; @@ -277,13 +278,13 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl) if('/' != relurl[0]) { int level = 0; - /* First we need to find out if there's a ?-letter in the URL, + /* First we need to find out if there is a ?-letter in the URL, and cut it and the right-side of that off */ pathsep = strchr(protsep, '?'); if(pathsep) *pathsep = 0; - /* we have a relative path to append to the last slash if there's one + /* we have a relative path to append to the last slash if there is one available, or the new URL is just a query string (starts with a '?') or a fragment (starts with '#') we append the new one at the end of the current URL */ @@ -292,7 +293,7 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl) if(pathsep) *pathsep = 0; - /* Check if there's any slash after the host name, and if so, remember + /* Check if there is any slash after the hostname, and if so, remember that position instead */ pathsep = strchr(protsep, '/'); if(pathsep) @@ -347,7 +348,7 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl) if(pathsep) { /* When people use badly formatted URLs, such as "http://www.example.com?dir=/home/daniel" we must not use the first - slash, if there's a ?-letter before it! */ + slash, if there is a ?-letter before it! */ char *sep = strchr(protsep, '?'); if(sep && (sep < pathsep)) pathsep = sep; @@ -355,8 +356,8 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl) } else { /* There was no slash. Now, since we might be operating on a badly - formatted URL, such as "http://www.example.com?id=2380" which - doesn't use a slash separator as it is supposed to, we need to check + formatted URL, such as "http://www.example.com?id=2380" which does + not use a slash separator as it is supposed to, we need to check for a ?-letter as well! */ pathsep = strchr(protsep, '?'); if(pathsep) @@ -367,7 +368,7 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl) Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH); - /* copy over the root url part */ + /* copy over the root URL part */ result = Curl_dyn_add(&newest, base); if(result) return result; @@ -420,15 +421,15 @@ static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags) /* * parse_hostname_login() * - * Parse the login details (user name, password and options) from the URL and - * strip them out of the host name + * Parse the login details (username, password and options) from the URL and + * strip them out of the hostname * */ static CURLUcode parse_hostname_login(struct Curl_URL *u, const char *login, size_t len, unsigned int flags, - size_t *offset) /* to the host name */ + size_t *offset) /* to the hostname */ { CURLUcode result = CURLUE_OK; CURLcode ccode; @@ -475,7 +476,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, if(userp) { if(flags & CURLU_DISALLOW_USER) { - /* Option DISALLOW_USER is set and url contains username. */ + /* Option DISALLOW_USER is set and URL contains username. */ result = CURLUE_USER_NOT_ALLOWED; goto out; } @@ -493,7 +494,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, u->options = optionsp; } - /* the host name starts at this offset */ + /* the hostname starts at this offset */ *offset = ptr - login; return CURLUE_OK; @@ -538,11 +539,11 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, unsigned long port; size_t keep = portptr - hostname; - /* Browser behavior adaptation. If there's a colon with no digits after, + /* Browser behavior adaptation. If there is a colon with no digits after, just cut off the name there which makes us ignore the colon and just use the default port. Firefox, Chrome and Safari all do that. - Don't do it if the URL has no scheme, to make something that looks like + Do not do it if the URL has no scheme, to make something that looks like a scheme not work! */ Curl_dyn_setlen(host, keep); @@ -591,7 +592,7 @@ static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname, char zoneid[16]; int i = 0; char *h = &hostname[len + 1]; - /* pass '25' if present and is a url encoded percent sign */ + /* pass '25' if present and is a URL encoded percent sign */ if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']')) h += 2; while(*h && (*h != ']') && (i < 15)) @@ -664,7 +665,6 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, */ #define HOST_ERROR -1 /* out of memory */ -#define HOST_BAD -2 /* bad IPv4 address */ #define HOST_NAME 1 #define HOST_IPV4 2 @@ -686,7 +686,7 @@ static int ipv4_normalize(struct dynbuf *host) char *endp = NULL; unsigned long l; if(!ISDIGIT(*c)) - /* most importantly this doesn't allow a leading plus or minus */ + /* most importantly this does not allow a leading plus or minus */ return HOST_NAME; l = strtoul(c, &endp, 0); if(errno) @@ -802,7 +802,7 @@ static CURLUcode parse_authority(struct Curl_URL *u, CURLcode result; /* - * Parse the login details and strip them out of the host name. + * Parse the login details and strip them out of the hostname. */ uc = parse_hostname_login(u, auth, authlen, flags, &offset); if(uc) @@ -835,7 +835,6 @@ static CURLUcode parse_authority(struct Curl_URL *u, case HOST_ERROR: uc = CURLUE_OUT_OF_MEMORY; break; - case HOST_BAD: default: uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */ break; @@ -907,7 +906,7 @@ UNITTEST int dedotdotify(const char *input, size_t clen, char **outp) do { bool dotdot = TRUE; if(*input == '.') { - /* A. If the input buffer begins with a prefix of "../" or "./", then + /* A. If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, */ if(!strncmp("./", input, 2)) { @@ -918,7 +917,7 @@ UNITTEST int dedotdotify(const char *input, size_t clen, char **outp) input += 3; clen -= 3; } - /* D. if the input buffer consists only of "." or "..", then remove + /* D. if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, */ else if(!strcmp(".", input) || !strcmp("..", input) || @@ -930,7 +929,7 @@ UNITTEST int dedotdotify(const char *input, size_t clen, char **outp) dotdot = FALSE; } else if(*input == '/') { - /* B. if the input buffer begins with a prefix of "/./" or "/.", where + /* B. if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, */ if(!strncmp("/./", input, 3)) { @@ -943,7 +942,7 @@ UNITTEST int dedotdotify(const char *input, size_t clen, char **outp) break; } - /* C. if the input buffer begins with a prefix of "/../" or "/..", + /* C. if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, */ @@ -977,7 +976,7 @@ UNITTEST int dedotdotify(const char *input, size_t clen, char **outp) dotdot = FALSE; if(!dotdot) { - /* E. move the first path segment in the input buffer to the end of + /* E. move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer. */ @@ -1070,7 +1069,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) * Appendix E, but believe me, it was meant to be there. --MK) */ if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { - /* the URL includes a host name, it must match "localhost" or + /* the URL includes a hostname, it must match "localhost" or "127.0.0.1" to be valid */ if(checkprefix("localhost/", ptr) || checkprefix("127.0.0.1/", ptr)) { @@ -1080,9 +1079,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) #if defined(_WIN32) size_t len; - /* the host name, NetBIOS computer name, can not contain disallowed + /* the hostname, NetBIOS computer name, can not contain disallowed chars, and the delimiting slash character must be appended to the - host name */ + hostname */ path = strpbrk(ptr, "/\\:*?\"<>|"); if(!path || *path != '/') { result = CURLUE_BAD_FILE_URL; @@ -1118,7 +1117,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) Curl_dyn_reset(&host); #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__) - /* Don't allow Windows drive letters when not in Windows. + /* Do not allow Windows drive letters when not in Windows. * This catches both "file:/c:" and "file:c:" */ if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || STARTS_WITH_URL_DRIVE_PREFIX(path)) { @@ -1162,7 +1161,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) result = CURLUE_BAD_SLASHES; goto fail; } - hostp = p; /* host name starts here */ + hostp = p; /* hostname starts here */ } else { /* no scheme! */ @@ -1188,7 +1187,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) } } - /* find the end of the host name + port number */ + /* find the end of the hostname + port number */ hostlen = strcspn(hostp, "/?#"); path = &hostp[hostlen]; @@ -1202,7 +1201,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) if((flags & CURLU_GUESS_SCHEME) && !schemep) { const char *hostname = Curl_dyn_ptr(&host); - /* legacy curl-style guess based on host name */ + /* legacy curl-style guess based on hostname */ if(checkprefix("ftp.", hostname)) schemep = "ftp"; else if(checkprefix("dict.", hostname)) @@ -1223,6 +1222,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) result = CURLUE_OUT_OF_MEMORY; goto fail; } + u->guessed_scheme = TRUE; } } else if(flags & CURLU_NO_AUTHORITY) { @@ -1437,6 +1437,8 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, ptr = u->scheme; ifmissing = CURLUE_NO_SCHEME; urldecode = FALSE; /* never for schemes */ + if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme) + return CURLUE_NO_SCHEME; break; case CURLUPART_USER: ptr = u->user; @@ -1465,7 +1467,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, ifmissing = CURLUE_NO_PORT; urldecode = FALSE; /* never for port */ if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) { - /* there's no stored port number, but asked to deliver + /* there is no stored port number, but asked to deliver a default one for the scheme */ const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme); if(h) { @@ -1525,6 +1527,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, return CURLUE_NO_HOST; else { const struct Curl_handler *h = NULL; + char schemebuf[MAX_SCHEME_LEN + 5]; if(u->scheme) scheme = u->scheme; else if(flags & CURLU_DEFAULT_SCHEME) @@ -1534,7 +1537,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, h = Curl_get_scheme_handler(scheme); if(!port && (flags & CURLU_DEFAULT_PORT)) { - /* there's no stored port number, but asked to deliver + /* there is no stored port number, but asked to deliver a default one for the scheme */ if(h) { msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); @@ -1595,8 +1598,13 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, } } - url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - scheme, + if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme) + msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme); + else + schemebuf[0] = 0; + + url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + schemebuf, u->user ? u->user : "", u->password ? ":": "", u->password ? u->password : "", @@ -1718,6 +1726,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, break; case CURLUPART_SCHEME: storep = &u->scheme; + u->guessed_scheme = FALSE; break; case CURLUPART_USER: storep = &u->user; @@ -1790,6 +1799,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, } else return CURLUE_BAD_SCHEME; + u->guessed_scheme = FALSE; break; } case CURLUPART_USER: @@ -1862,7 +1872,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, return CURLUE_MALFORMED_INPUT; /* if the new thing is absolute or the old one is not - * (we could not get an absolute url in 'oldurl'), + * (we could not get an absolute URL in 'oldurl'), * then replace the existing with the new. */ if(Curl_is_absolute_url(part, NULL, 0, flags & (CURLU_GUESS_SCHEME| @@ -1978,7 +1988,7 @@ nomem: else if(what == CURLUPART_HOST) { size_t n = Curl_dyn_len(&enc); if(!n && (flags & CURLU_NO_AUTHORITY)) { - /* Skip hostname check, it's allowed to be empty. */ + /* Skip hostname check, it is allowed to be empty. */ } else { if(!n || hostname_check(u, (char *)newp, n)) { -- cgit v1.2.3