diff options
Diffstat (limited to 'libs/libcurl/src/urlapi.c')
-rw-r--r-- | libs/libcurl/src/urlapi.c | 3772 |
1 files changed, 1886 insertions, 1886 deletions
diff --git a/libs/libcurl/src/urlapi.c b/libs/libcurl/src/urlapi.c index 7dac81c85c..203fbc4149 100644 --- a/libs/libcurl/src/urlapi.c +++ b/libs/libcurl/src/urlapi.c @@ -1,1886 +1,1886 @@ -/*************************************************************************** - * _ _ ____ _ - * Project ___| | | | _ \| | - * / __| | | | |_) | | - * | (__| |_| | _ <| |___ - * \___|\___/|_| \_\_____| - * - * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al. - * - * This software is licensed as described in the file COPYING, which - * you should have received as part of this distribution. The terms - * are also available at https://curl.se/docs/copyright.html. - * - * You may opt to use, copy, modify, merge, publish, distribute and/or sell - * copies of the Software, and permit persons to whom the Software is - * furnished to do so, under the terms of the COPYING file. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - * SPDX-License-Identifier: curl - * - ***************************************************************************/ - -#include "curl_setup.h" - -#include "urldata.h" -#include "urlapi-int.h" -#include "strcase.h" -#include "url.h" -#include "escape.h" -#include "curl_ctype.h" -#include "inet_pton.h" -#include "inet_ntop.h" -#include "strdup.h" - -/* The last 3 #include files should be in this order */ -#include "curl_printf.h" -#include "curl_memory.h" -#include "memdebug.h" - - /* MSDOS/Windows style drive prefix, eg c: in c:foo */ -#define STARTS_WITH_DRIVE_PREFIX(str) \ - ((('a' <= str[0] && str[0] <= 'z') || \ - ('A' <= str[0] && str[0] <= 'Z')) && \ - (str[1] == ':')) - - /* MSDOS/Windows style drive prefix, optionally with - * a '|' instead of ':', followed by a slash or NUL */ -#define STARTS_WITH_URL_DRIVE_PREFIX(str) \ - ((('a' <= (str)[0] && (str)[0] <= 'z') || \ - ('A' <= (str)[0] && (str)[0] <= 'Z')) && \ - ((str)[1] == ':' || (str)[1] == '|') && \ - ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0)) - -/* scheme is not URL encoded, the longest libcurl supported ones are... */ -#define MAX_SCHEME_LEN 40 - -/* Internal representation of CURLU. Point to URL-encoded strings. */ -struct Curl_URL { - char *scheme; - char *user; - char *password; - char *options; /* IMAP only? */ - char *host; - char *zoneid; /* for numerical IPv6 addresses */ - char *port; - char *path; - char *query; - char *fragment; - long portnum; /* the numerical version */ -}; - -#define DEFAULT_SCHEME "https" - -static void free_urlhandle(struct Curl_URL *u) -{ - free(u->scheme); - free(u->user); - free(u->password); - free(u->options); - free(u->host); - free(u->zoneid); - free(u->port); - free(u->path); - free(u->query); - free(u->fragment); -} - -/* - * Find the separator at the end of the host name, or the '?' in cases like - * http://www.url.com?id=2380 - */ -static const char *find_host_sep(const char *url) -{ - const char *sep; - const char *query; - - /* Find the start of the hostname */ - sep = strstr(url, "//"); - if(!sep) - sep = url; - else - sep += 2; - - query = strchr(sep, '?'); - sep = strchr(sep, '/'); - - if(!sep) - sep = url + strlen(url); - - if(!query) - query = url + strlen(url); - - return sep < query ? sep : query; -} - -/* - * Decide in an encoding-independent manner whether a character in a URL must - * be escaped. This is used in urlencode_str(). - */ -static bool urlchar_needs_escaping(int c) -{ - return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); -} - -/* urlencode_str() writes data into an output dynbuf and URL-encodes the - * spaces in the source URL accordingly. - * - * URL encoding should be skipped for host names, otherwise IDN resolution - * will fail. - */ -static CURLUcode urlencode_str(struct dynbuf *o, const char *url, - size_t len, bool relative, - bool query) -{ - /* we must add this with whitespace-replacing */ - bool left = !query; - const unsigned char *iptr; - const unsigned char *host_sep = (const unsigned char *) url; - - if(!relative) - host_sep = (const unsigned char *) find_host_sep(url); - - for(iptr = (unsigned char *)url; /* read from here */ - len; iptr++, len--) { - - if(iptr < host_sep) { - if(Curl_dyn_addn(o, iptr, 1)) - return CURLUE_OUT_OF_MEMORY; - continue; - } - - if(*iptr == ' ') { - if(left) { - if(Curl_dyn_addn(o, "%20", 3)) - return CURLUE_OUT_OF_MEMORY; - } - else { - if(Curl_dyn_addn(o, "+", 1)) - return CURLUE_OUT_OF_MEMORY; - } - continue; - } - - if(*iptr == '?') - left = FALSE; - - if(urlchar_needs_escaping(*iptr)) { - if(Curl_dyn_addf(o, "%%%02x", *iptr)) - return CURLUE_OUT_OF_MEMORY; - } - else { - if(Curl_dyn_addn(o, iptr, 1)) - return CURLUE_OUT_OF_MEMORY; - } - } - - return CURLUE_OK; -} - -/* - * Returns the length of the scheme if the given URL is absolute (as opposed - * to relative). Stores the scheme in the buffer if TRUE and 'buf' is - * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set. - * - * If 'guess_scheme' is TRUE, it means the URL might be provided without - * scheme. - */ -size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, - bool guess_scheme) -{ - int i; - DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN)); - (void)buflen; /* only used in debug-builds */ - if(buf) - buf[0] = 0; /* always leave a defined value in buf */ -#ifdef WIN32 - if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url)) - return 0; -#endif - for(i = 0; i < MAX_SCHEME_LEN; ++i) { - char s = url[i]; - if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) { - /* RFC 3986 3.1 explains: - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - */ - } - else { - break; - } - } - if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) { - /* If this does not guess scheme, the scheme always ends with the colon so - that this also detects data: URLs etc. In guessing mode, data: could - be the host name "data" with a specified port number. */ - - /* the length of the scheme is the name part only */ - size_t len = i; - if(buf) { - buf[i] = 0; - while(i--) { - buf[i] = Curl_raw_tolower(url[i]); - } - } - return len; - } - return 0; -} - -/* - * Concatenate a relative URL to a base URL making it absolute. - * URL-encodes any spaces. - * The returned pointer must be freed by the caller unless NULL - * (returns NULL on out of memory). - * - * Note that this function destroys the 'base' string. - */ -static char *concat_url(char *base, const char *relurl) -{ - /*** - TRY to append this new path to the old URL - to the right of the host part. Oh crap, this is doomed to cause - problems in the future... - */ - struct dynbuf newest; - char *protsep; - char *pathsep; - bool host_changed = FALSE; - const char *useurl = relurl; - - /* protsep points to the start of the host name */ - protsep = strstr(base, "//"); - if(!protsep) - protsep = base; - else - protsep += 2; /* pass the slashes */ - - if('/' != relurl[0]) { - int level = 0; - - /* First we need to find out if there's a ?-letter in the URL, - and cut it and the right-side of that off */ - pathsep = strchr(protsep, '?'); - if(pathsep) - *pathsep = 0; - - /* we have a relative path to append to the last slash if there's one - available, or if the new URL is just a query string (starts with a - '?') we append the new one at the end of the entire currently worked - out URL */ - if(useurl[0] != '?') { - pathsep = strrchr(protsep, '/'); - if(pathsep) - *pathsep = 0; - } - - /* Check if there's any slash after the host name, and if so, remember - that position instead */ - pathsep = strchr(protsep, '/'); - if(pathsep) - protsep = pathsep + 1; - else - protsep = NULL; - - /* now deal with one "./" or any amount of "../" in the newurl - and act accordingly */ - - if((useurl[0] == '.') && (useurl[1] == '/')) - useurl += 2; /* just skip the "./" */ - - while((useurl[0] == '.') && - (useurl[1] == '.') && - (useurl[2] == '/')) { - level++; - useurl += 3; /* pass the "../" */ - } - - if(protsep) { - while(level--) { - /* cut off one more level from the right of the original URL */ - pathsep = strrchr(protsep, '/'); - if(pathsep) - *pathsep = 0; - else { - *protsep = 0; - break; - } - } - } - } - else { - /* We got a new absolute path for this server */ - - if(relurl[1] == '/') { - /* the new URL starts with //, just keep the protocol part from the - original one */ - *protsep = 0; - useurl = &relurl[2]; /* we keep the slashes from the original, so we - skip the new ones */ - host_changed = TRUE; - } - else { - /* cut off the original URL from the first slash, or deal with URLs - without slash */ - pathsep = strchr(protsep, '/'); - if(pathsep) { - /* When people use badly formatted URLs, such as - "http://www.url.com?dir=/home/daniel" we must not use the first - slash, if there's a ?-letter before it! */ - char *sep = strchr(protsep, '?'); - if(sep && (sep < pathsep)) - pathsep = sep; - *pathsep = 0; - } - else { - /* There was no slash. Now, since we might be operating on a badly - formatted URL, such as "http://www.url.com?id=2380" which doesn't - use a slash separator as it is supposed to, we need to check for a - ?-letter as well! */ - pathsep = strchr(protsep, '?'); - if(pathsep) - *pathsep = 0; - } - } - } - - Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH); - - /* copy over the root url part */ - if(Curl_dyn_add(&newest, base)) - return NULL; - - /* check if we need to append a slash */ - if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) - ; - else { - if(Curl_dyn_addn(&newest, "/", 1)) - return NULL; - } - - /* then append the new piece on the right side */ - urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE); - - return Curl_dyn_ptr(&newest); -} - -/* scan for byte values < 31 or 127 */ -static bool junkscan(const char *part, unsigned int flags) -{ - if(part) { - static const char badbytes[]={ - /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x7f, 0x00 /* null-terminate */ - }; - size_t n = strlen(part); - size_t nfine = strcspn(part, badbytes); - if(nfine != n) - /* since we don't know which part is scanned, return a generic error - code */ - return TRUE; - if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' ')) - return TRUE; - } - return FALSE; -} - -/* - * parse_hostname_login() - * - * Parse the login details (user name, password and options) from the URL and - * strip them out of the host name - * - */ -static CURLUcode parse_hostname_login(struct Curl_URL *u, - struct dynbuf *host, - unsigned int flags) -{ - CURLUcode result = CURLUE_OK; - CURLcode ccode; - char *userp = NULL; - char *passwdp = NULL; - char *optionsp = NULL; - const struct Curl_handler *h = NULL; - - /* At this point, we assume all the other special cases have been taken - * care of, so the host is at most - * - * [user[:password][;options]]@]hostname - * - * We need somewhere to put the embedded details, so do that first. - */ - - char *login = Curl_dyn_ptr(host); - char *ptr; - - DEBUGASSERT(login); - - ptr = strchr(login, '@'); - if(!ptr) - goto out; - - /* We will now try to extract the - * possible login information in a string like: - * ftp://user:password@ftp.my.site:8021/README */ - ptr++; - - /* if this is a known scheme, get some details */ - if(u->scheme) - h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED); - - /* We could use the login information in the URL so extract it. Only parse - options if the handler says we should. Note that 'h' might be NULL! */ - ccode = Curl_parse_login_details(login, ptr - login - 1, - &userp, &passwdp, - (h && (h->flags & PROTOPT_URLOPTIONS)) ? - &optionsp:NULL); - if(ccode) { - result = CURLUE_BAD_LOGIN; - goto out; - } - - if(userp) { - if(flags & CURLU_DISALLOW_USER) { - /* Option DISALLOW_USER is set and url contains username. */ - result = CURLUE_USER_NOT_ALLOWED; - goto out; - } - if(junkscan(userp, flags)) { - result = CURLUE_BAD_USER; - goto out; - } - u->user = userp; - } - - if(passwdp) { - if(junkscan(passwdp, flags)) { - result = CURLUE_BAD_PASSWORD; - goto out; - } - u->password = passwdp; - } - - if(optionsp) { - if(junkscan(optionsp, flags)) { - result = CURLUE_BAD_LOGIN; - goto out; - } - u->options = optionsp; - } - - /* move the name to the start of the host buffer */ - if(Curl_dyn_tail(host, strlen(ptr))) - return CURLUE_OUT_OF_MEMORY; - - return CURLUE_OK; - out: - - free(userp); - free(passwdp); - free(optionsp); - u->user = NULL; - u->password = NULL; - u->options = NULL; - - return result; -} - -UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, - bool has_scheme) -{ - char *portptr = NULL; - char endbracket; - int len; - char *hostname = Curl_dyn_ptr(host); - /* - * Find the end of an IPv6 address, either on the ']' ending bracket or - * a percent-encoded zone index. - */ - if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n", - &endbracket, &len)) { - if(']' == endbracket) - portptr = &hostname[len]; - else if('%' == endbracket) { - int zonelen = len; - if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) { - if(']' != endbracket) - return CURLUE_BAD_IPV6; - portptr = &hostname[--zonelen + len + 1]; - } - else - return CURLUE_BAD_IPV6; - } - else - return CURLUE_BAD_IPV6; - - /* this is a RFC2732-style specified IP-address */ - if(portptr && *portptr) { - if(*portptr != ':') - return CURLUE_BAD_IPV6; - } - else - portptr = NULL; - } - else - portptr = strchr(hostname, ':'); - - if(portptr) { - char *rest; - long port; - char portbuf[7]; - size_t keep = portptr - hostname; - - /* Browser behavior adaptation. If there's a colon with no digits after, - just cut off the name there which makes us ignore the colon and just - use the default port. Firefox, Chrome and Safari all do that. - - Don't do it if the URL has no scheme, to make something that looks like - a scheme not work! - */ - Curl_dyn_setlen(host, keep); - portptr++; - if(!*portptr) - return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER; - - if(!ISDIGIT(*portptr)) - return CURLUE_BAD_PORT_NUMBER; - - port = strtol(portptr, &rest, 10); /* Port number must be decimal */ - - if(port > 0xffff) - return CURLUE_BAD_PORT_NUMBER; - - if(rest[0]) - return CURLUE_BAD_PORT_NUMBER; - - *rest = 0; - /* generate a new port number string to get rid of leading zeroes etc */ - msnprintf(portbuf, sizeof(portbuf), "%ld", port); - u->portnum = port; - u->port = strdup(portbuf); - if(!u->port) - return CURLUE_OUT_OF_MEMORY; - } - - return CURLUE_OK; -} - -static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, - size_t hlen) /* length of hostname */ -{ - size_t len; - DEBUGASSERT(hostname); - - if(!hostname[0]) - return CURLUE_NO_HOST; - else if(hostname[0] == '[') { - const char *l = "0123456789abcdefABCDEF:."; - if(hlen < 4) /* '[::]' is the shortest possible valid string */ - return CURLUE_BAD_IPV6; - hostname++; - hlen -= 2; - - if(hostname[hlen] != ']') - return CURLUE_BAD_IPV6; - - /* only valid letters are ok */ - len = strspn(hostname, l); - if(hlen != len) { - hlen = len; - if(hostname[len] == '%') { - /* this could now be '%[zone id]' */ - char zoneid[16]; - int i = 0; - char *h = &hostname[len + 1]; - /* pass '25' if present and is a url encoded percent sign */ - if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']')) - h += 2; - while(*h && (*h != ']') && (i < 15)) - zoneid[i++] = *h++; - if(!i || (']' != *h)) - /* impossible to reach? */ - return CURLUE_MALFORMED_INPUT; - zoneid[i] = 0; - u->zoneid = strdup(zoneid); - if(!u->zoneid) - return CURLUE_OUT_OF_MEMORY; - hostname[len] = ']'; /* insert end bracket */ - hostname[len + 1] = 0; /* terminate the hostname */ - } - else - return CURLUE_BAD_IPV6; - /* hostname is fine */ - } -#ifdef ENABLE_IPV6 - { - char dest[16]; /* fits a binary IPv6 address */ - char norm[MAX_IPADR_LEN]; - hostname[hlen] = 0; /* end the address there */ - if(1 != Curl_inet_pton(AF_INET6, hostname, dest)) - return CURLUE_BAD_IPV6; - - /* check if it can be done shorter */ - if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) && - (strlen(norm) < hlen)) { - strcpy(hostname, norm); - hlen = strlen(norm); - hostname[hlen + 1] = 0; - } - hostname[hlen] = ']'; /* restore ending bracket */ - } -#endif - } - else { - /* letters from the second string are not ok */ - len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,"); - if(hlen != len) - /* hostname with bad content */ - return CURLUE_BAD_HOSTNAME; - } - return CURLUE_OK; -} - -#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#')) - -/* - * Handle partial IPv4 numerical addresses and different bases, like - * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc. - * - * If the given input string is syntactically wrong or any part for example is - * too big, this function returns FALSE and doesn't create any output. - * - * Output the "normalized" version of that input string in plain quad decimal - * integers and return TRUE. - */ -static bool ipv4_normalize(const char *hostname, char *outp, size_t olen) -{ - bool done = FALSE; - int n = 0; - const char *c = hostname; - unsigned long parts[4] = {0, 0, 0, 0}; - - while(!done) { - char *endp; - unsigned long l; - if((*c < '0') || (*c > '9')) - /* most importantly this doesn't allow a leading plus or minus */ - return FALSE; - l = strtoul(c, &endp, 0); - - /* overflow or nothing parsed at all */ - if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c)) - return FALSE; - -#if SIZEOF_LONG > 4 - /* a value larger than 32 bits */ - if(l > UINT_MAX) - return FALSE; -#endif - - parts[n] = l; - c = endp; - - switch (*c) { - case '.' : - if(n == 3) - return FALSE; - n++; - c++; - break; - - case '\0': - done = TRUE; - break; - - default: - return FALSE; - } - } - - /* this is deemed a valid IPv4 numerical address */ - - switch(n) { - case 0: /* a -- 32 bits */ - msnprintf(outp, olen, "%u.%u.%u.%u", - parts[0] >> 24, (parts[0] >> 16) & 0xff, - (parts[0] >> 8) & 0xff, parts[0] & 0xff); - break; - case 1: /* a.b -- 8.24 bits */ - if((parts[0] > 0xff) || (parts[1] > 0xffffff)) - return FALSE; - msnprintf(outp, olen, "%u.%u.%u.%u", - parts[0], (parts[1] >> 16) & 0xff, - (parts[1] >> 8) & 0xff, parts[1] & 0xff); - break; - case 2: /* a.b.c -- 8.8.16 bits */ - if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff)) - return FALSE; - msnprintf(outp, olen, "%u.%u.%u.%u", - parts[0], parts[1], (parts[2] >> 8) & 0xff, - parts[2] & 0xff); - break; - case 3: /* a.b.c.d -- 8.8.8.8 bits */ - if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) || - (parts[3] > 0xff)) - return FALSE; - msnprintf(outp, olen, "%u.%u.%u.%u", - parts[0], parts[1], parts[2], parts[3]); - break; - } - return TRUE; -} - -/* if necessary, replace the host content with a URL decoded version */ -static CURLUcode decode_host(struct dynbuf *host) -{ - char *per = NULL; - const char *hostname = Curl_dyn_ptr(host); - if(hostname[0] == '[') - /* only decode if not an ipv6 numerical */ - return CURLUE_OK; - per = strchr(hostname, '%'); - if(!per) - /* nothing to decode */ - return CURLUE_OK; - else { - /* encoded */ - size_t dlen; - char *decoded; - CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen, - REJECT_CTRL); - if(result) - return CURLUE_BAD_HOSTNAME; - Curl_dyn_reset(host); - result = Curl_dyn_addn(host, decoded, dlen); - free(decoded); - if(result) - return CURLUE_OUT_OF_MEMORY; - } - - return CURLUE_OK; -} - -/* - * "Remove Dot Segments" - * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 - */ - -/* - * dedotdotify() - * @unittest: 1395 - * - * This function gets a null-terminated path with dot and dotdot sequences - * passed in and strips them off according to the rules in RFC 3986 section - * 5.2.4. - * - * The function handles a query part ('?' + stuff) appended but it expects - * that fragments ('#' + stuff) have already been cut off. - * - * RETURNS - * - * an allocated dedotdotified output string - */ -UNITTEST char *dedotdotify(const char *input, size_t clen); -UNITTEST char *dedotdotify(const char *input, size_t clen) -{ - char *out = malloc(clen + 1); - char *outptr; - const char *orginput = input; - char *queryp; - if(!out) - return NULL; /* out of memory */ - - *out = 0; /* null-terminates, for inputs like "./" */ - outptr = out; - - if(!*input) - /* zero length input string, return that */ - return out; - - /* - * To handle query-parts properly, we must find it and remove it during the - * dotdot-operation and then append it again at the end to the output - * string. - */ - queryp = strchr(input, '?'); - - do { - bool dotdot = TRUE; - if(*input == '.') { - /* A. If the input buffer begins with a prefix of "../" or "./", then - remove that prefix from the input buffer; otherwise, */ - - if(!strncmp("./", input, 2)) { - input += 2; - clen -= 2; - } - else if(!strncmp("../", input, 3)) { - input += 3; - clen -= 3; - } - /* D. if the input buffer consists only of "." or "..", then remove - that from the input buffer; otherwise, */ - - else if(!strcmp(".", input) || !strcmp("..", input) || - !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) { - *out = 0; - break; - } - else - dotdot = FALSE; - } - else if(*input == '/') { - /* B. if the input buffer begins with a prefix of "/./" or "/.", where - "." is a complete path segment, then replace that prefix with "/" in - the input buffer; otherwise, */ - if(!strncmp("/./", input, 3)) { - input += 2; - clen -= 2; - } - else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) { - *outptr++ = '/'; - *outptr = 0; - break; - } - - /* C. if the input buffer begins with a prefix of "/../" or "/..", - where ".." is a complete path segment, then replace that prefix with - "/" in the input buffer and remove the last segment and its - preceding "/" (if any) from the output buffer; otherwise, */ - - else if(!strncmp("/../", input, 4)) { - input += 3; - clen -= 3; - /* remove the last segment from the output buffer */ - while(outptr > out) { - outptr--; - if(*outptr == '/') - break; - } - *outptr = 0; /* null-terminate where it stops */ - } - else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) { - /* remove the last segment from the output buffer */ - while(outptr > out) { - outptr--; - if(*outptr == '/') - break; - } - *outptr++ = '/'; - *outptr = 0; /* null-terminate where it stops */ - break; - } - else - dotdot = FALSE; - } - else - dotdot = FALSE; - - if(!dotdot) { - /* E. move the first path segment in the input buffer to the end of - the output buffer, including the initial "/" character (if any) and - any subsequent characters up to, but not including, the next "/" - character or the end of the input buffer. */ - - do { - *outptr++ = *input++; - clen--; - } while(*input && (*input != '/') && (*input != '?')); - *outptr = 0; - } - - /* continue until end of input string OR, if there is a terminating - query part, stop there */ - } while(*input && (!queryp || (input < queryp))); - - if(queryp) { - size_t qlen; - /* There was a query part, append that to the output. */ - size_t oindex = queryp - orginput; - qlen = strlen(&orginput[oindex]); - memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */ - } - - return out; -} - -static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) -{ - const char *path; - size_t pathlen; - bool uncpath = FALSE; - char *query = NULL; - char *fragment = NULL; - char schemebuf[MAX_SCHEME_LEN + 1]; - const char *schemep = NULL; - size_t schemelen = 0; - size_t urllen; - CURLUcode result = CURLUE_OK; - size_t fraglen = 0; - struct dynbuf host; - - DEBUGASSERT(url); - - Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH); - - /************************************************************* - * Parse the URL. - ************************************************************/ - /* allocate scratch area */ - urllen = strlen(url); - if(urllen > CURL_MAX_INPUT_LENGTH) { - /* excessive input length */ - result = CURLUE_MALFORMED_INPUT; - goto fail; - } - - schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf), - flags & (CURLU_GUESS_SCHEME| - CURLU_DEFAULT_SCHEME)); - - /* handle the file: scheme */ - if(schemelen && !strcmp(schemebuf, "file")) { - if(urllen <= 6) { - /* file:/ is not enough to actually be a complete file: URL */ - result = CURLUE_BAD_FILE_URL; - goto fail; - } - - /* path has been allocated large enough to hold this */ - path = (char *)&url[5]; - - schemep = u->scheme = strdup("file"); - if(!u->scheme) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - - /* Extra handling URLs with an authority component (i.e. that start with - * "file://") - * - * We allow omitted hostname (e.g. file:/<path>) -- valid according to - * RFC 8089, but not the (current) WHAT-WG URL spec. - */ - if(path[0] == '/' && path[1] == '/') { - /* swallow the two slashes */ - const char *ptr = &path[2]; - - /* - * According to RFC 8089, a file: URL can be reliably dereferenced if: - * - * o it has no/blank hostname, or - * - * o the hostname matches "localhost" (case-insensitively), or - * - * o the hostname is a FQDN that resolves to this machine, or - * - * o it is an UNC String transformed to an URI (Windows only, RFC 8089 - * Appendix E.3). - * - * For brevity, we only consider URLs with empty, "localhost", or - * "127.0.0.1" hostnames as local, otherwise as an UNC String. - * - * Additionally, there is an exception for URLs with a Windows drive - * letter in the authority (which was accidentally omitted from RFC 8089 - * Appendix E, but believe me, it was meant to be there. --MK) - */ - if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { - /* the URL includes a host name, it must match "localhost" or - "127.0.0.1" to be valid */ - if(checkprefix("localhost/", ptr) || - checkprefix("127.0.0.1/", ptr)) { - ptr += 9; /* now points to the slash after the host */ - } - else { -#if defined(WIN32) - size_t len; - - /* the host name, NetBIOS computer name, can not contain disallowed - chars, and the delimiting slash character must be appended to the - host name */ - path = strpbrk(ptr, "/\\:*?\"<>|"); - if(!path || *path != '/') { - result = CURLUE_BAD_FILE_URL; - goto fail; - } - - len = path - ptr; - if(len) { - if(Curl_dyn_addn(&host, ptr, len)) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - uncpath = TRUE; - } - - ptr -= 2; /* now points to the // before the host in UNC */ -#else - /* Invalid file://hostname/, expected localhost or 127.0.0.1 or - none */ - result = CURLUE_BAD_FILE_URL; - goto fail; -#endif - } - } - - path = ptr; - } - - if(!uncpath) - /* no host for file: URLs by default */ - Curl_dyn_reset(&host); - -#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__) - /* Don't allow Windows drive letters when not in Windows. - * This catches both "file:/c:" and "file:c:" */ - if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || - STARTS_WITH_URL_DRIVE_PREFIX(path)) { - /* File drive letters are only accepted in MSDOS/Windows */ - result = CURLUE_BAD_FILE_URL; - goto fail; - } -#else - /* If the path starts with a slash and a drive letter, ditch the slash */ - if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) { - /* This cannot be done with strcpy, as the memory chunks overlap! */ - path++; - } -#endif - - } - else { - /* clear path */ - const char *p; - const char *hostp; - size_t len; - - if(schemelen) { - int i = 0; - p = &url[schemelen + 1]; - while(p && (*p == '/') && (i < 4)) { - p++; - i++; - } - - schemep = schemebuf; - if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) && - !(flags & CURLU_NON_SUPPORT_SCHEME)) { - result = CURLUE_UNSUPPORTED_SCHEME; - goto fail; - } - - if((i < 1) || (i>3)) { - /* less than one or more than three slashes */ - result = CURLUE_BAD_SLASHES; - goto fail; - } - if(junkscan(schemep, flags)) { - result = CURLUE_BAD_SCHEME; - goto fail; - } - } - else { - /* no scheme! */ - - if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) { - result = CURLUE_BAD_SCHEME; - goto fail; - } - if(flags & CURLU_DEFAULT_SCHEME) - schemep = DEFAULT_SCHEME; - - /* - * The URL was badly formatted, let's try without scheme specified. - */ - p = url; - } - hostp = p; /* host name starts here */ - - /* find the end of the host name + port number */ - while(*p && !HOSTNAME_END(*p)) - p++; - - len = p - hostp; - if(len) { - if(Curl_dyn_addn(&host, hostp, len)) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - } - else { - if(!(flags & CURLU_NO_AUTHORITY)) { - result = CURLUE_NO_HOST; - goto fail; - } - } - - path = (char *)p; - - if(schemep) { - u->scheme = strdup(schemep); - if(!u->scheme) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - } - } - - fragment = strchr(path, '#'); - if(fragment) { - fraglen = strlen(fragment); - if(fraglen > 1) { - /* skip the leading '#' in the copy but include the terminating null */ - u->fragment = Curl_memdup(fragment + 1, fraglen); - if(!u->fragment) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - - if(junkscan(u->fragment, flags)) { - result = CURLUE_BAD_FRAGMENT; - goto fail; - } - } - } - - query = strchr(path, '?'); - if(query && (!fragment || (query < fragment))) { - size_t qlen = strlen(query) - fraglen; /* includes '?' */ - pathlen = strlen(path) - qlen - fraglen; - if(qlen > 1) { - if(qlen && (flags & CURLU_URLENCODE)) { - struct dynbuf enc; - Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - /* skip the leading question mark */ - if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - u->query = Curl_dyn_ptr(&enc); - } - else { - u->query = Curl_memdup(query + 1, qlen); - if(!u->query) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - u->query[qlen - 1] = 0; - } - - if(junkscan(u->query, flags)) { - result = CURLUE_BAD_QUERY; - goto fail; - } - } - else { - /* single byte query */ - u->query = strdup(""); - if(!u->query) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - } - } - else - pathlen = strlen(path) - fraglen; - - if(pathlen && (flags & CURLU_URLENCODE)) { - struct dynbuf enc; - Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - pathlen = Curl_dyn_len(&enc); - path = u->path = Curl_dyn_ptr(&enc); - } - - if(!pathlen) { - /* there is no path left, unset */ - path = NULL; - } - else { - if(!u->path) { - u->path = Curl_memdup(path, pathlen + 1); - if(!u->path) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - u->path[pathlen] = 0; - path = u->path; - } - else if(flags & CURLU_URLENCODE) - /* it might have encoded more than just the path so cut it */ - u->path[pathlen] = 0; - - if(junkscan(u->path, flags)) { - result = CURLUE_BAD_PATH; - goto fail; - } - - if(!(flags & CURLU_PATH_AS_IS)) { - /* remove ../ and ./ sequences according to RFC3986 */ - char *newp = dedotdotify((char *)path, pathlen); - if(!newp) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - free(u->path); - u->path = newp; - } - } - - if(Curl_dyn_len(&host)) { - char normalized_ipv4[sizeof("255.255.255.255") + 1]; - - /* - * Parse the login details and strip them out of the host name. - */ - result = parse_hostname_login(u, &host, flags); - if(!result) - result = Curl_parse_port(u, &host, schemelen); - if(result) - goto fail; - - if(junkscan(Curl_dyn_ptr(&host), flags)) { - result = CURLUE_BAD_HOSTNAME; - goto fail; - } - - if(ipv4_normalize(Curl_dyn_ptr(&host), - normalized_ipv4, sizeof(normalized_ipv4))) { - Curl_dyn_reset(&host); - if(Curl_dyn_add(&host, normalized_ipv4)) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - } - else { - result = decode_host(&host); - if(!result) - result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host)); - if(result) - goto fail; - } - - if((flags & CURLU_GUESS_SCHEME) && !schemep) { - const char *hostname = Curl_dyn_ptr(&host); - /* legacy curl-style guess based on host name */ - if(checkprefix("ftp.", hostname)) - schemep = "ftp"; - else if(checkprefix("dict.", hostname)) - schemep = "dict"; - else if(checkprefix("ldap.", hostname)) - schemep = "ldap"; - else if(checkprefix("imap.", hostname)) - schemep = "imap"; - else if(checkprefix("smtp.", hostname)) - schemep = "smtp"; - else if(checkprefix("pop3.", hostname)) - schemep = "pop3"; - else - schemep = "http"; - - u->scheme = strdup(schemep); - if(!u->scheme) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - } - } - else if(flags & CURLU_NO_AUTHORITY) { - /* allowed to be empty. */ - if(Curl_dyn_add(&host, "")) { - result = CURLUE_OUT_OF_MEMORY; - goto fail; - } - } - - u->host = Curl_dyn_ptr(&host); - - return result; - fail: - Curl_dyn_free(&host); - free_urlhandle(u); - return result; -} - -/* - * Parse the URL and, if successful, replace everything in the Curl_URL struct. - */ -static CURLUcode parseurl_and_replace(const char *url, CURLU *u, - unsigned int flags) -{ - CURLUcode result; - CURLU tmpurl; - memset(&tmpurl, 0, sizeof(tmpurl)); - result = parseurl(url, &tmpurl, flags); - if(!result) { - free_urlhandle(u); - *u = tmpurl; - } - return result; -} - -/* - */ -CURLU *curl_url(void) -{ - return calloc(sizeof(struct Curl_URL), 1); -} - -void curl_url_cleanup(CURLU *u) -{ - if(u) { - free_urlhandle(u); - free(u); - } -} - -#define DUP(dest, src, name) \ - do { \ - if(src->name) { \ - dest->name = strdup(src->name); \ - if(!dest->name) \ - goto fail; \ - } \ - } while(0) - -CURLU *curl_url_dup(CURLU *in) -{ - struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1); - if(u) { - DUP(u, in, scheme); - DUP(u, in, user); - DUP(u, in, password); - DUP(u, in, options); - DUP(u, in, host); - DUP(u, in, port); - DUP(u, in, path); - DUP(u, in, query); - DUP(u, in, fragment); - u->portnum = in->portnum; - } - return u; - fail: - curl_url_cleanup(u); - return NULL; -} - -CURLUcode curl_url_get(CURLU *u, CURLUPart what, - char **part, unsigned int flags) -{ - char *ptr; - CURLUcode ifmissing = CURLUE_UNKNOWN_PART; - char portbuf[7]; - bool urldecode = (flags & CURLU_URLDECODE)?1:0; - bool urlencode = (flags & CURLU_URLENCODE)?1:0; - bool plusdecode = FALSE; - (void)flags; - if(!u) - return CURLUE_BAD_HANDLE; - if(!part) - return CURLUE_BAD_PARTPOINTER; - *part = NULL; - - switch(what) { - case CURLUPART_SCHEME: - ptr = u->scheme; - ifmissing = CURLUE_NO_SCHEME; - urldecode = FALSE; /* never for schemes */ - break; - case CURLUPART_USER: - ptr = u->user; - ifmissing = CURLUE_NO_USER; - break; - case CURLUPART_PASSWORD: - ptr = u->password; - ifmissing = CURLUE_NO_PASSWORD; - break; - case CURLUPART_OPTIONS: - ptr = u->options; - ifmissing = CURLUE_NO_OPTIONS; - break; - case CURLUPART_HOST: - ptr = u->host; - ifmissing = CURLUE_NO_HOST; - break; - case CURLUPART_ZONEID: - ptr = u->zoneid; - ifmissing = CURLUE_NO_ZONEID; - break; - case CURLUPART_PORT: - ptr = u->port; - ifmissing = CURLUE_NO_PORT; - urldecode = FALSE; /* never for port */ - if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) { - /* there's no stored port number, but asked to deliver - a default one for the scheme */ - const struct Curl_handler *h = - Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED); - if(h) { - msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); - ptr = portbuf; - } - } - else if(ptr && u->scheme) { - /* there is a stored port number, but ask to inhibit if - it matches the default one for the scheme */ - const struct Curl_handler *h = - Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED); - if(h && (h->defport == u->portnum) && - (flags & CURLU_NO_DEFAULT_PORT)) - ptr = NULL; - } - break; - case CURLUPART_PATH: - ptr = u->path; - if(!ptr) { - ptr = u->path = strdup("/"); - if(!u->path) - return CURLUE_OUT_OF_MEMORY; - } - break; - case CURLUPART_QUERY: - ptr = u->query; - ifmissing = CURLUE_NO_QUERY; - plusdecode = urldecode; - break; - case CURLUPART_FRAGMENT: - ptr = u->fragment; - ifmissing = CURLUE_NO_FRAGMENT; - break; - case CURLUPART_URL: { - char *url; - char *scheme; - char *options = u->options; - char *port = u->port; - char *allochost = NULL; - if(u->scheme && strcasecompare("file", u->scheme)) { - url = aprintf("file://%s%s%s", - u->path, - u->fragment? "#": "", - u->fragment? u->fragment : ""); - } - else if(!u->host) - return CURLUE_NO_HOST; - else { - const struct Curl_handler *h = NULL; - if(u->scheme) - scheme = u->scheme; - else if(flags & CURLU_DEFAULT_SCHEME) - scheme = (char *) DEFAULT_SCHEME; - else - return CURLUE_NO_SCHEME; - - h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED); - if(!port && (flags & CURLU_DEFAULT_PORT)) { - /* there's no stored port number, but asked to deliver - a default one for the scheme */ - if(h) { - msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); - port = portbuf; - } - } - else if(port) { - /* there is a stored port number, but asked to inhibit if it matches - the default one for the scheme */ - if(h && (h->defport == u->portnum) && - (flags & CURLU_NO_DEFAULT_PORT)) - port = NULL; - } - - if(h && !(h->flags & PROTOPT_URLOPTIONS)) - options = NULL; - - if(u->host[0] == '[') { - if(u->zoneid) { - /* make it '[ host %25 zoneid ]' */ - struct dynbuf enc; - size_t hostlen = strlen(u->host); - Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host, - u->zoneid)) - return CURLUE_OUT_OF_MEMORY; - allochost = Curl_dyn_ptr(&enc); - } - } - else if(urlencode) { - allochost = curl_easy_escape(NULL, u->host, 0); - if(!allochost) - return CURLUE_OUT_OF_MEMORY; - } - else { - /* only encode '%' in output host name */ - char *host = u->host; - bool percent = FALSE; - /* first, count number of percents present in the name */ - while(*host) { - if(*host == '%') { - percent = TRUE; - break; - } - host++; - } - /* if there were percent(s), encode the host name */ - if(percent) { - struct dynbuf enc; - CURLcode result; - Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - host = u->host; - while(*host) { - if(*host == '%') - result = Curl_dyn_addn(&enc, "%25", 3); - else - result = Curl_dyn_addn(&enc, host, 1); - if(result) - return CURLUE_OUT_OF_MEMORY; - host++; - } - free(u->host); - u->host = Curl_dyn_ptr(&enc); - } - } - - url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - scheme, - u->user ? u->user : "", - u->password ? ":": "", - u->password ? u->password : "", - options ? ";" : "", - options ? options : "", - (u->user || u->password || options) ? "@": "", - allochost ? allochost : u->host, - port ? ":": "", - port ? port : "", - (u->path && (u->path[0] != '/')) ? "/": "", - u->path ? u->path : "/", - (u->query && u->query[0]) ? "?": "", - (u->query && u->query[0]) ? u->query : "", - u->fragment? "#": "", - u->fragment? u->fragment : ""); - free(allochost); - } - if(!url) - return CURLUE_OUT_OF_MEMORY; - *part = url; - return CURLUE_OK; - } - default: - ptr = NULL; - break; - } - if(ptr) { - size_t partlen = strlen(ptr); - size_t i = 0; - *part = Curl_memdup(ptr, partlen + 1); - if(!*part) - return CURLUE_OUT_OF_MEMORY; - if(plusdecode) { - /* convert + to space */ - char *plus = *part; - for(i = 0; i < partlen; ++plus, i++) { - if(*plus == '+') - *plus = ' '; - } - } - if(urldecode) { - char *decoded; - size_t dlen; - /* this unconditional rejection of control bytes is documented - API behavior */ - CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL); - free(*part); - if(res) { - *part = NULL; - return CURLUE_URLDECODE; - } - *part = decoded; - partlen = dlen; - } - if(urlencode) { - struct dynbuf enc; - Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - if(urlencode_str(&enc, *part, partlen, TRUE, - what == CURLUPART_QUERY)) - return CURLUE_OUT_OF_MEMORY; - free(*part); - *part = Curl_dyn_ptr(&enc); - } - - return CURLUE_OK; - } - else - return ifmissing; -} - -CURLUcode curl_url_set(CURLU *u, CURLUPart what, - const char *part, unsigned int flags) -{ - char **storep = NULL; - long port = 0; - bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0; - bool plusencode = FALSE; - bool urlskipslash = FALSE; - bool appendquery = FALSE; - bool equalsencode = FALSE; - - if(!u) - return CURLUE_BAD_HANDLE; - if(!part) { - /* setting a part to NULL clears it */ - switch(what) { - case CURLUPART_URL: - break; - case CURLUPART_SCHEME: - storep = &u->scheme; - break; - case CURLUPART_USER: - storep = &u->user; - break; - case CURLUPART_PASSWORD: - storep = &u->password; - break; - case CURLUPART_OPTIONS: - storep = &u->options; - break; - case CURLUPART_HOST: - storep = &u->host; - break; - case CURLUPART_ZONEID: - storep = &u->zoneid; - break; - case CURLUPART_PORT: - u->portnum = 0; - storep = &u->port; - break; - case CURLUPART_PATH: - storep = &u->path; - break; - case CURLUPART_QUERY: - storep = &u->query; - break; - case CURLUPART_FRAGMENT: - storep = &u->fragment; - break; - default: - return CURLUE_UNKNOWN_PART; - } - if(storep && *storep) { - Curl_safefree(*storep); - } - else if(!storep) { - free_urlhandle(u); - memset(u, 0, sizeof(struct Curl_URL)); - } - return CURLUE_OK; - } - - switch(what) { - case CURLUPART_SCHEME: - if(strlen(part) > MAX_SCHEME_LEN) - /* too long */ - return CURLUE_BAD_SCHEME; - if(!(flags & CURLU_NON_SUPPORT_SCHEME) && - /* verify that it is a fine scheme */ - !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED)) - return CURLUE_UNSUPPORTED_SCHEME; - storep = &u->scheme; - urlencode = FALSE; /* never */ - break; - case CURLUPART_USER: - storep = &u->user; - break; - case CURLUPART_PASSWORD: - storep = &u->password; - break; - case CURLUPART_OPTIONS: - storep = &u->options; - break; - case CURLUPART_HOST: { - size_t len = strcspn(part, " \r\n"); - if(strlen(part) != len) - /* hostname with bad content */ - return CURLUE_BAD_HOSTNAME; - storep = &u->host; - Curl_safefree(u->zoneid); - break; - } - case CURLUPART_ZONEID: - storep = &u->zoneid; - break; - case CURLUPART_PORT: - { - char *endp; - urlencode = FALSE; /* never */ - port = strtol(part, &endp, 10); /* Port number must be decimal */ - if((port <= 0) || (port > 0xffff)) - return CURLUE_BAD_PORT_NUMBER; - if(*endp) - /* weirdly provided number, not good! */ - return CURLUE_BAD_PORT_NUMBER; - storep = &u->port; - } - break; - case CURLUPART_PATH: - urlskipslash = TRUE; - storep = &u->path; - break; - case CURLUPART_QUERY: - plusencode = urlencode; - appendquery = (flags & CURLU_APPENDQUERY)?1:0; - equalsencode = appendquery; - storep = &u->query; - break; - case CURLUPART_FRAGMENT: - storep = &u->fragment; - break; - case CURLUPART_URL: { - /* - * Allow a new URL to replace the existing (if any) contents. - * - * If the existing contents is enough for a URL, allow a relative URL to - * replace it. - */ - CURLUcode result; - char *oldurl; - char *redired_url; - - /* if the new thing is absolute or the old one is not - * (we could not get an absolute url in 'oldurl'), - * then replace the existing with the new. */ - if(Curl_is_absolute_url(part, NULL, 0, - flags & (CURLU_GUESS_SCHEME| - CURLU_DEFAULT_SCHEME)) - || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) { - return parseurl_and_replace(part, u, flags); - } - - /* apply the relative part to create a new URL - * and replace the existing one with it. */ - redired_url = concat_url(oldurl, part); - free(oldurl); - if(!redired_url) - return CURLUE_OUT_OF_MEMORY; - - result = parseurl_and_replace(redired_url, u, flags); - free(redired_url); - return result; - } - default: - return CURLUE_UNKNOWN_PART; - } - DEBUGASSERT(storep); - { - const char *newp = part; - size_t nalloc = strlen(part); - - if(nalloc > CURL_MAX_INPUT_LENGTH) - /* excessive input length */ - return CURLUE_MALFORMED_INPUT; - - if(urlencode) { - const unsigned char *i; - struct dynbuf enc; - - Curl_dyn_init(&enc, nalloc * 3 + 1); - - for(i = (const unsigned char *)part; *i; i++) { - CURLcode result; - if((*i == ' ') && plusencode) { - result = Curl_dyn_addn(&enc, "+", 1); - if(result) - return CURLUE_OUT_OF_MEMORY; - } - else if(Curl_isunreserved(*i) || - ((*i == '/') && urlskipslash) || - ((*i == '=') && equalsencode)) { - if((*i == '=') && equalsencode) - /* only skip the first equals sign */ - equalsencode = FALSE; - result = Curl_dyn_addn(&enc, i, 1); - if(result) - return CURLUE_OUT_OF_MEMORY; - } - else { - result = Curl_dyn_addf(&enc, "%%%02x", *i); - if(result) - return CURLUE_OUT_OF_MEMORY; - } - } - newp = Curl_dyn_ptr(&enc); - } - else { - char *p; - newp = strdup(part); - if(!newp) - return CURLUE_OUT_OF_MEMORY; - p = (char *)newp; - while(*p) { - /* make sure percent encoded are lower case */ - if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) && - (ISUPPER(p[1]) || ISUPPER(p[2]))) { - p[1] = Curl_raw_tolower(p[1]); - p[2] = Curl_raw_tolower(p[2]); - p += 3; - } - else - p++; - } - } - - if(appendquery) { - /* Append the 'newp' string onto the old query. Add a '&' separator if - none is present at the end of the existing query already */ - - size_t querylen = u->query ? strlen(u->query) : 0; - bool addamperand = querylen && (u->query[querylen -1] != '&'); - if(querylen) { - struct dynbuf enc; - Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - - if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */ - goto nomem; - - if(addamperand) { - if(Curl_dyn_addn(&enc, "&", 1)) - goto nomem; - } - if(Curl_dyn_add(&enc, newp)) - goto nomem; - free((char *)newp); - free(*storep); - *storep = Curl_dyn_ptr(&enc); - return CURLUE_OK; - nomem: - free((char *)newp); - return CURLUE_OUT_OF_MEMORY; - } - } - - if(what == CURLUPART_HOST) { - size_t n = strlen(newp); - if(!n && (flags & CURLU_NO_AUTHORITY)) { - /* Skip hostname check, it's allowed to be empty. */ - } - else { - if(hostname_check(u, (char *)newp, n)) { - free((char *)newp); - return CURLUE_BAD_HOSTNAME; - } - } - } - - free(*storep); - *storep = (char *)newp; - } - /* set after the string, to make it not assigned if the allocation above - fails */ - if(port) - u->portnum = port; - return CURLUE_OK; -} +/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ * SPDX-License-Identifier: curl
+ *
+ ***************************************************************************/
+
+#include "curl_setup.h"
+
+#include "urldata.h"
+#include "urlapi-int.h"
+#include "strcase.h"
+#include "url.h"
+#include "escape.h"
+#include "curl_ctype.h"
+#include "inet_pton.h"
+#include "inet_ntop.h"
+#include "strdup.h"
+
+/* The last 3 #include files should be in this order */
+#include "curl_printf.h"
+#include "curl_memory.h"
+#include "memdebug.h"
+
+ /* MSDOS/Windows style drive prefix, eg c: in c:foo */
+#define STARTS_WITH_DRIVE_PREFIX(str) \
+ ((('a' <= str[0] && str[0] <= 'z') || \
+ ('A' <= str[0] && str[0] <= 'Z')) && \
+ (str[1] == ':'))
+
+ /* MSDOS/Windows style drive prefix, optionally with
+ * a '|' instead of ':', followed by a slash or NUL */
+#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
+ ((('a' <= (str)[0] && (str)[0] <= 'z') || \
+ ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
+ ((str)[1] == ':' || (str)[1] == '|') && \
+ ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
+
+/* scheme is not URL encoded, the longest libcurl supported ones are... */
+#define MAX_SCHEME_LEN 40
+
+/* Internal representation of CURLU. Point to URL-encoded strings. */
+struct Curl_URL {
+ char *scheme;
+ char *user;
+ char *password;
+ char *options; /* IMAP only? */
+ char *host;
+ char *zoneid; /* for numerical IPv6 addresses */
+ char *port;
+ char *path;
+ char *query;
+ char *fragment;
+ long portnum; /* the numerical version */
+};
+
+#define DEFAULT_SCHEME "https"
+
+static void free_urlhandle(struct Curl_URL *u)
+{
+ free(u->scheme);
+ free(u->user);
+ free(u->password);
+ free(u->options);
+ free(u->host);
+ free(u->zoneid);
+ free(u->port);
+ free(u->path);
+ free(u->query);
+ free(u->fragment);
+}
+
+/*
+ * Find the separator at the end of the host name, or the '?' in cases like
+ * http://www.url.com?id=2380
+ */
+static const char *find_host_sep(const char *url)
+{
+ const char *sep;
+ const char *query;
+
+ /* Find the start of the hostname */
+ sep = strstr(url, "//");
+ if(!sep)
+ sep = url;
+ else
+ sep += 2;
+
+ query = strchr(sep, '?');
+ sep = strchr(sep, '/');
+
+ if(!sep)
+ sep = url + strlen(url);
+
+ if(!query)
+ query = url + strlen(url);
+
+ return sep < query ? sep : query;
+}
+
+/*
+ * Decide in an encoding-independent manner whether a character in a URL must
+ * be escaped. This is used in urlencode_str().
+ */
+static bool urlchar_needs_escaping(int c)
+{
+ return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
+}
+
+/* urlencode_str() writes data into an output dynbuf and URL-encodes the
+ * spaces in the source URL accordingly.
+ *
+ * URL encoding should be skipped for host names, otherwise IDN resolution
+ * will fail.
+ */
+static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
+ size_t len, bool relative,
+ bool query)
+{
+ /* we must add this with whitespace-replacing */
+ bool left = !query;
+ const unsigned char *iptr;
+ const unsigned char *host_sep = (const unsigned char *) url;
+
+ if(!relative)
+ host_sep = (const unsigned char *) find_host_sep(url);
+
+ for(iptr = (unsigned char *)url; /* read from here */
+ len; iptr++, len--) {
+
+ if(iptr < host_sep) {
+ if(Curl_dyn_addn(o, iptr, 1))
+ return CURLUE_OUT_OF_MEMORY;
+ continue;
+ }
+
+ if(*iptr == ' ') {
+ if(left) {
+ if(Curl_dyn_addn(o, "%20", 3))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ if(Curl_dyn_addn(o, "+", 1))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ continue;
+ }
+
+ if(*iptr == '?')
+ left = FALSE;
+
+ if(urlchar_needs_escaping(*iptr)) {
+ if(Curl_dyn_addf(o, "%%%02x", *iptr))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ if(Curl_dyn_addn(o, iptr, 1))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ }
+
+ return CURLUE_OK;
+}
+
+/*
+ * Returns the length of the scheme if the given URL is absolute (as opposed
+ * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
+ * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
+ *
+ * If 'guess_scheme' is TRUE, it means the URL might be provided without
+ * scheme.
+ */
+size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
+ bool guess_scheme)
+{
+ int i;
+ DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
+ (void)buflen; /* only used in debug-builds */
+ if(buf)
+ buf[0] = 0; /* always leave a defined value in buf */
+#ifdef WIN32
+ if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
+ return 0;
+#endif
+ for(i = 0; i < MAX_SCHEME_LEN; ++i) {
+ char s = url[i];
+ if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
+ /* RFC 3986 3.1 explains:
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ */
+ }
+ else {
+ break;
+ }
+ }
+ if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
+ /* If this does not guess scheme, the scheme always ends with the colon so
+ that this also detects data: URLs etc. In guessing mode, data: could
+ be the host name "data" with a specified port number. */
+
+ /* the length of the scheme is the name part only */
+ size_t len = i;
+ if(buf) {
+ buf[i] = 0;
+ while(i--) {
+ buf[i] = Curl_raw_tolower(url[i]);
+ }
+ }
+ return len;
+ }
+ return 0;
+}
+
+/*
+ * Concatenate a relative URL to a base URL making it absolute.
+ * URL-encodes any spaces.
+ * The returned pointer must be freed by the caller unless NULL
+ * (returns NULL on out of memory).
+ *
+ * Note that this function destroys the 'base' string.
+ */
+static char *concat_url(char *base, const char *relurl)
+{
+ /***
+ TRY to append this new path to the old URL
+ to the right of the host part. Oh crap, this is doomed to cause
+ problems in the future...
+ */
+ struct dynbuf newest;
+ char *protsep;
+ char *pathsep;
+ bool host_changed = FALSE;
+ const char *useurl = relurl;
+
+ /* protsep points to the start of the host name */
+ protsep = strstr(base, "//");
+ if(!protsep)
+ protsep = base;
+ else
+ protsep += 2; /* pass the slashes */
+
+ if('/' != relurl[0]) {
+ int level = 0;
+
+ /* First we need to find out if there's a ?-letter in the URL,
+ and cut it and the right-side of that off */
+ pathsep = strchr(protsep, '?');
+ if(pathsep)
+ *pathsep = 0;
+
+ /* we have a relative path to append to the last slash if there's one
+ available, or if the new URL is just a query string (starts with a
+ '?') we append the new one at the end of the entire currently worked
+ out URL */
+ if(useurl[0] != '?') {
+ pathsep = strrchr(protsep, '/');
+ if(pathsep)
+ *pathsep = 0;
+ }
+
+ /* Check if there's any slash after the host name, and if so, remember
+ that position instead */
+ pathsep = strchr(protsep, '/');
+ if(pathsep)
+ protsep = pathsep + 1;
+ else
+ protsep = NULL;
+
+ /* now deal with one "./" or any amount of "../" in the newurl
+ and act accordingly */
+
+ if((useurl[0] == '.') && (useurl[1] == '/'))
+ useurl += 2; /* just skip the "./" */
+
+ while((useurl[0] == '.') &&
+ (useurl[1] == '.') &&
+ (useurl[2] == '/')) {
+ level++;
+ useurl += 3; /* pass the "../" */
+ }
+
+ if(protsep) {
+ while(level--) {
+ /* cut off one more level from the right of the original URL */
+ pathsep = strrchr(protsep, '/');
+ if(pathsep)
+ *pathsep = 0;
+ else {
+ *protsep = 0;
+ break;
+ }
+ }
+ }
+ }
+ else {
+ /* We got a new absolute path for this server */
+
+ if(relurl[1] == '/') {
+ /* the new URL starts with //, just keep the protocol part from the
+ original one */
+ *protsep = 0;
+ useurl = &relurl[2]; /* we keep the slashes from the original, so we
+ skip the new ones */
+ host_changed = TRUE;
+ }
+ else {
+ /* cut off the original URL from the first slash, or deal with URLs
+ without slash */
+ pathsep = strchr(protsep, '/');
+ if(pathsep) {
+ /* When people use badly formatted URLs, such as
+ "http://www.url.com?dir=/home/daniel" we must not use the first
+ slash, if there's a ?-letter before it! */
+ char *sep = strchr(protsep, '?');
+ if(sep && (sep < pathsep))
+ pathsep = sep;
+ *pathsep = 0;
+ }
+ else {
+ /* There was no slash. Now, since we might be operating on a badly
+ formatted URL, such as "http://www.url.com?id=2380" which doesn't
+ use a slash separator as it is supposed to, we need to check for a
+ ?-letter as well! */
+ pathsep = strchr(protsep, '?');
+ if(pathsep)
+ *pathsep = 0;
+ }
+ }
+ }
+
+ Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
+
+ /* copy over the root url part */
+ if(Curl_dyn_add(&newest, base))
+ return NULL;
+
+ /* check if we need to append a slash */
+ if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
+ ;
+ else {
+ if(Curl_dyn_addn(&newest, "/", 1))
+ return NULL;
+ }
+
+ /* then append the new piece on the right side */
+ urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
+
+ return Curl_dyn_ptr(&newest);
+}
+
+/* scan for byte values < 31 or 127 */
+static bool junkscan(const char *part, unsigned int flags)
+{
+ if(part) {
+ static const char badbytes[]={
+ /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x7f, 0x00 /* null-terminate */
+ };
+ size_t n = strlen(part);
+ size_t nfine = strcspn(part, badbytes);
+ if(nfine != n)
+ /* since we don't know which part is scanned, return a generic error
+ code */
+ return TRUE;
+ if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ * parse_hostname_login()
+ *
+ * Parse the login details (user name, password and options) from the URL and
+ * strip them out of the host name
+ *
+ */
+static CURLUcode parse_hostname_login(struct Curl_URL *u,
+ struct dynbuf *host,
+ unsigned int flags)
+{
+ CURLUcode result = CURLUE_OK;
+ CURLcode ccode;
+ char *userp = NULL;
+ char *passwdp = NULL;
+ char *optionsp = NULL;
+ const struct Curl_handler *h = NULL;
+
+ /* At this point, we assume all the other special cases have been taken
+ * care of, so the host is at most
+ *
+ * [user[:password][;options]]@]hostname
+ *
+ * We need somewhere to put the embedded details, so do that first.
+ */
+
+ char *login = Curl_dyn_ptr(host);
+ char *ptr;
+
+ DEBUGASSERT(login);
+
+ ptr = strchr(login, '@');
+ if(!ptr)
+ goto out;
+
+ /* We will now try to extract the
+ * possible login information in a string like:
+ * ftp://user:password@ftp.my.site:8021/README */
+ ptr++;
+
+ /* if this is a known scheme, get some details */
+ if(u->scheme)
+ h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+
+ /* We could use the login information in the URL so extract it. Only parse
+ options if the handler says we should. Note that 'h' might be NULL! */
+ ccode = Curl_parse_login_details(login, ptr - login - 1,
+ &userp, &passwdp,
+ (h && (h->flags & PROTOPT_URLOPTIONS)) ?
+ &optionsp:NULL);
+ if(ccode) {
+ result = CURLUE_BAD_LOGIN;
+ goto out;
+ }
+
+ if(userp) {
+ if(flags & CURLU_DISALLOW_USER) {
+ /* Option DISALLOW_USER is set and url contains username. */
+ result = CURLUE_USER_NOT_ALLOWED;
+ goto out;
+ }
+ if(junkscan(userp, flags)) {
+ result = CURLUE_BAD_USER;
+ goto out;
+ }
+ u->user = userp;
+ }
+
+ if(passwdp) {
+ if(junkscan(passwdp, flags)) {
+ result = CURLUE_BAD_PASSWORD;
+ goto out;
+ }
+ u->password = passwdp;
+ }
+
+ if(optionsp) {
+ if(junkscan(optionsp, flags)) {
+ result = CURLUE_BAD_LOGIN;
+ goto out;
+ }
+ u->options = optionsp;
+ }
+
+ /* move the name to the start of the host buffer */
+ if(Curl_dyn_tail(host, strlen(ptr)))
+ return CURLUE_OUT_OF_MEMORY;
+
+ return CURLUE_OK;
+ out:
+
+ free(userp);
+ free(passwdp);
+ free(optionsp);
+ u->user = NULL;
+ u->password = NULL;
+ u->options = NULL;
+
+ return result;
+}
+
+UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
+ bool has_scheme)
+{
+ char *portptr = NULL;
+ char endbracket;
+ int len;
+ char *hostname = Curl_dyn_ptr(host);
+ /*
+ * Find the end of an IPv6 address, either on the ']' ending bracket or
+ * a percent-encoded zone index.
+ */
+ if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
+ &endbracket, &len)) {
+ if(']' == endbracket)
+ portptr = &hostname[len];
+ else if('%' == endbracket) {
+ int zonelen = len;
+ if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
+ if(']' != endbracket)
+ return CURLUE_BAD_IPV6;
+ portptr = &hostname[--zonelen + len + 1];
+ }
+ else
+ return CURLUE_BAD_IPV6;
+ }
+ else
+ return CURLUE_BAD_IPV6;
+
+ /* this is a RFC2732-style specified IP-address */
+ if(portptr && *portptr) {
+ if(*portptr != ':')
+ return CURLUE_BAD_IPV6;
+ }
+ else
+ portptr = NULL;
+ }
+ else
+ portptr = strchr(hostname, ':');
+
+ if(portptr) {
+ char *rest;
+ long port;
+ char portbuf[7];
+ size_t keep = portptr - hostname;
+
+ /* Browser behavior adaptation. If there's a colon with no digits after,
+ just cut off the name there which makes us ignore the colon and just
+ use the default port. Firefox, Chrome and Safari all do that.
+
+ Don't do it if the URL has no scheme, to make something that looks like
+ a scheme not work!
+ */
+ Curl_dyn_setlen(host, keep);
+ portptr++;
+ if(!*portptr)
+ return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
+
+ if(!ISDIGIT(*portptr))
+ return CURLUE_BAD_PORT_NUMBER;
+
+ port = strtol(portptr, &rest, 10); /* Port number must be decimal */
+
+ if(port > 0xffff)
+ return CURLUE_BAD_PORT_NUMBER;
+
+ if(rest[0])
+ return CURLUE_BAD_PORT_NUMBER;
+
+ *rest = 0;
+ /* generate a new port number string to get rid of leading zeroes etc */
+ msnprintf(portbuf, sizeof(portbuf), "%ld", port);
+ u->portnum = port;
+ u->port = strdup(portbuf);
+ if(!u->port)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+
+ return CURLUE_OK;
+}
+
+static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
+ size_t hlen) /* length of hostname */
+{
+ size_t len;
+ DEBUGASSERT(hostname);
+
+ if(!hostname[0])
+ return CURLUE_NO_HOST;
+ else if(hostname[0] == '[') {
+ const char *l = "0123456789abcdefABCDEF:.";
+ if(hlen < 4) /* '[::]' is the shortest possible valid string */
+ return CURLUE_BAD_IPV6;
+ hostname++;
+ hlen -= 2;
+
+ if(hostname[hlen] != ']')
+ return CURLUE_BAD_IPV6;
+
+ /* only valid letters are ok */
+ len = strspn(hostname, l);
+ if(hlen != len) {
+ hlen = len;
+ if(hostname[len] == '%') {
+ /* this could now be '%[zone id]' */
+ char zoneid[16];
+ int i = 0;
+ char *h = &hostname[len + 1];
+ /* pass '25' if present and is a url encoded percent sign */
+ if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
+ h += 2;
+ while(*h && (*h != ']') && (i < 15))
+ zoneid[i++] = *h++;
+ if(!i || (']' != *h))
+ /* impossible to reach? */
+ return CURLUE_MALFORMED_INPUT;
+ zoneid[i] = 0;
+ u->zoneid = strdup(zoneid);
+ if(!u->zoneid)
+ return CURLUE_OUT_OF_MEMORY;
+ hostname[len] = ']'; /* insert end bracket */
+ hostname[len + 1] = 0; /* terminate the hostname */
+ }
+ else
+ return CURLUE_BAD_IPV6;
+ /* hostname is fine */
+ }
+#ifdef ENABLE_IPV6
+ {
+ char dest[16]; /* fits a binary IPv6 address */
+ char norm[MAX_IPADR_LEN];
+ hostname[hlen] = 0; /* end the address there */
+ if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
+ return CURLUE_BAD_IPV6;
+
+ /* check if it can be done shorter */
+ if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
+ (strlen(norm) < hlen)) {
+ strcpy(hostname, norm);
+ hlen = strlen(norm);
+ hostname[hlen + 1] = 0;
+ }
+ hostname[hlen] = ']'; /* restore ending bracket */
+ }
+#endif
+ }
+ else {
+ /* letters from the second string are not ok */
+ len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()");
+ if(hlen != len)
+ /* hostname with bad content */
+ return CURLUE_BAD_HOSTNAME;
+ }
+ return CURLUE_OK;
+}
+
+#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
+
+/*
+ * Handle partial IPv4 numerical addresses and different bases, like
+ * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
+ *
+ * If the given input string is syntactically wrong or any part for example is
+ * too big, this function returns FALSE and doesn't create any output.
+ *
+ * Output the "normalized" version of that input string in plain quad decimal
+ * integers and return TRUE.
+ */
+static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
+{
+ bool done = FALSE;
+ int n = 0;
+ const char *c = hostname;
+ unsigned long parts[4] = {0, 0, 0, 0};
+
+ while(!done) {
+ char *endp;
+ unsigned long l;
+ if((*c < '0') || (*c > '9'))
+ /* most importantly this doesn't allow a leading plus or minus */
+ return FALSE;
+ l = strtoul(c, &endp, 0);
+
+ /* overflow or nothing parsed at all */
+ if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
+ return FALSE;
+
+#if SIZEOF_LONG > 4
+ /* a value larger than 32 bits */
+ if(l > UINT_MAX)
+ return FALSE;
+#endif
+
+ parts[n] = l;
+ c = endp;
+
+ switch (*c) {
+ case '.' :
+ if(n == 3)
+ return FALSE;
+ n++;
+ c++;
+ break;
+
+ case '\0':
+ done = TRUE;
+ break;
+
+ default:
+ return FALSE;
+ }
+ }
+
+ /* this is deemed a valid IPv4 numerical address */
+
+ switch(n) {
+ case 0: /* a -- 32 bits */
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0] >> 24, (parts[0] >> 16) & 0xff,
+ (parts[0] >> 8) & 0xff, parts[0] & 0xff);
+ break;
+ case 1: /* a.b -- 8.24 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xffffff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], (parts[1] >> 16) & 0xff,
+ (parts[1] >> 8) & 0xff, parts[1] & 0xff);
+ break;
+ case 2: /* a.b.c -- 8.8.16 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], parts[1], (parts[2] >> 8) & 0xff,
+ parts[2] & 0xff);
+ break;
+ case 3: /* a.b.c.d -- 8.8.8.8 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
+ (parts[3] > 0xff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], parts[1], parts[2], parts[3]);
+ break;
+ }
+ return TRUE;
+}
+
+/* if necessary, replace the host content with a URL decoded version */
+static CURLUcode decode_host(struct dynbuf *host)
+{
+ char *per = NULL;
+ const char *hostname = Curl_dyn_ptr(host);
+ if(hostname[0] == '[')
+ /* only decode if not an ipv6 numerical */
+ return CURLUE_OK;
+ per = strchr(hostname, '%');
+ if(!per)
+ /* nothing to decode */
+ return CURLUE_OK;
+ else {
+ /* encoded */
+ size_t dlen;
+ char *decoded;
+ CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
+ REJECT_CTRL);
+ if(result)
+ return CURLUE_BAD_HOSTNAME;
+ Curl_dyn_reset(host);
+ result = Curl_dyn_addn(host, decoded, dlen);
+ free(decoded);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+
+ return CURLUE_OK;
+}
+
+/*
+ * "Remove Dot Segments"
+ * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
+ */
+
+/*
+ * dedotdotify()
+ * @unittest: 1395
+ *
+ * This function gets a null-terminated path with dot and dotdot sequences
+ * passed in and strips them off according to the rules in RFC 3986 section
+ * 5.2.4.
+ *
+ * The function handles a query part ('?' + stuff) appended but it expects
+ * that fragments ('#' + stuff) have already been cut off.
+ *
+ * RETURNS
+ *
+ * an allocated dedotdotified output string
+ */
+UNITTEST char *dedotdotify(const char *input, size_t clen);
+UNITTEST char *dedotdotify(const char *input, size_t clen)
+{
+ char *out = malloc(clen + 1);
+ char *outptr;
+ const char *orginput = input;
+ char *queryp;
+ if(!out)
+ return NULL; /* out of memory */
+
+ *out = 0; /* null-terminates, for inputs like "./" */
+ outptr = out;
+
+ if(!*input)
+ /* zero length input string, return that */
+ return out;
+
+ /*
+ * To handle query-parts properly, we must find it and remove it during the
+ * dotdot-operation and then append it again at the end to the output
+ * string.
+ */
+ queryp = strchr(input, '?');
+
+ do {
+ bool dotdot = TRUE;
+ if(*input == '.') {
+ /* A. If the input buffer begins with a prefix of "../" or "./", then
+ remove that prefix from the input buffer; otherwise, */
+
+ if(!strncmp("./", input, 2)) {
+ input += 2;
+ clen -= 2;
+ }
+ else if(!strncmp("../", input, 3)) {
+ input += 3;
+ clen -= 3;
+ }
+ /* D. if the input buffer consists only of "." or "..", then remove
+ that from the input buffer; otherwise, */
+
+ else if(!strcmp(".", input) || !strcmp("..", input) ||
+ !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
+ *out = 0;
+ break;
+ }
+ else
+ dotdot = FALSE;
+ }
+ else if(*input == '/') {
+ /* B. if the input buffer begins with a prefix of "/./" or "/.", where
+ "." is a complete path segment, then replace that prefix with "/" in
+ the input buffer; otherwise, */
+ if(!strncmp("/./", input, 3)) {
+ input += 2;
+ clen -= 2;
+ }
+ else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
+ *outptr++ = '/';
+ *outptr = 0;
+ break;
+ }
+
+ /* C. if the input buffer begins with a prefix of "/../" or "/..",
+ where ".." is a complete path segment, then replace that prefix with
+ "/" in the input buffer and remove the last segment and its
+ preceding "/" (if any) from the output buffer; otherwise, */
+
+ else if(!strncmp("/../", input, 4)) {
+ input += 3;
+ clen -= 3;
+ /* remove the last segment from the output buffer */
+ while(outptr > out) {
+ outptr--;
+ if(*outptr == '/')
+ break;
+ }
+ *outptr = 0; /* null-terminate where it stops */
+ }
+ else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
+ /* remove the last segment from the output buffer */
+ while(outptr > out) {
+ outptr--;
+ if(*outptr == '/')
+ break;
+ }
+ *outptr++ = '/';
+ *outptr = 0; /* null-terminate where it stops */
+ break;
+ }
+ else
+ dotdot = FALSE;
+ }
+ else
+ dotdot = FALSE;
+
+ if(!dotdot) {
+ /* E. move the first path segment in the input buffer to the end of
+ the output buffer, including the initial "/" character (if any) and
+ any subsequent characters up to, but not including, the next "/"
+ character or the end of the input buffer. */
+
+ do {
+ *outptr++ = *input++;
+ clen--;
+ } while(*input && (*input != '/') && (*input != '?'));
+ *outptr = 0;
+ }
+
+ /* continue until end of input string OR, if there is a terminating
+ query part, stop there */
+ } while(*input && (!queryp || (input < queryp)));
+
+ if(queryp) {
+ size_t qlen;
+ /* There was a query part, append that to the output. */
+ size_t oindex = queryp - orginput;
+ qlen = strlen(&orginput[oindex]);
+ memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
+ }
+
+ return out;
+}
+
+static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
+{
+ const char *path;
+ size_t pathlen;
+ bool uncpath = FALSE;
+ char *query = NULL;
+ char *fragment = NULL;
+ char schemebuf[MAX_SCHEME_LEN + 1];
+ const char *schemep = NULL;
+ size_t schemelen = 0;
+ size_t urllen;
+ CURLUcode result = CURLUE_OK;
+ size_t fraglen = 0;
+ struct dynbuf host;
+
+ DEBUGASSERT(url);
+
+ Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
+
+ /*************************************************************
+ * Parse the URL.
+ ************************************************************/
+ /* allocate scratch area */
+ urllen = strlen(url);
+ if(urllen > CURL_MAX_INPUT_LENGTH) {
+ /* excessive input length */
+ result = CURLUE_MALFORMED_INPUT;
+ goto fail;
+ }
+
+ schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
+ flags & (CURLU_GUESS_SCHEME|
+ CURLU_DEFAULT_SCHEME));
+
+ /* handle the file: scheme */
+ if(schemelen && !strcmp(schemebuf, "file")) {
+ if(urllen <= 6) {
+ /* file:/ is not enough to actually be a complete file: URL */
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+ }
+
+ /* path has been allocated large enough to hold this */
+ path = (char *)&url[5];
+
+ schemep = u->scheme = strdup("file");
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+
+ /* Extra handling URLs with an authority component (i.e. that start with
+ * "file://")
+ *
+ * We allow omitted hostname (e.g. file:/<path>) -- valid according to
+ * RFC 8089, but not the (current) WHAT-WG URL spec.
+ */
+ if(path[0] == '/' && path[1] == '/') {
+ /* swallow the two slashes */
+ const char *ptr = &path[2];
+
+ /*
+ * According to RFC 8089, a file: URL can be reliably dereferenced if:
+ *
+ * o it has no/blank hostname, or
+ *
+ * o the hostname matches "localhost" (case-insensitively), or
+ *
+ * o the hostname is a FQDN that resolves to this machine, or
+ *
+ * o it is an UNC String transformed to an URI (Windows only, RFC 8089
+ * Appendix E.3).
+ *
+ * For brevity, we only consider URLs with empty, "localhost", or
+ * "127.0.0.1" hostnames as local, otherwise as an UNC String.
+ *
+ * Additionally, there is an exception for URLs with a Windows drive
+ * letter in the authority (which was accidentally omitted from RFC 8089
+ * Appendix E, but believe me, it was meant to be there. --MK)
+ */
+ if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
+ /* the URL includes a host name, it must match "localhost" or
+ "127.0.0.1" to be valid */
+ if(checkprefix("localhost/", ptr) ||
+ checkprefix("127.0.0.1/", ptr)) {
+ ptr += 9; /* now points to the slash after the host */
+ }
+ else {
+#if defined(WIN32)
+ size_t len;
+
+ /* the host name, NetBIOS computer name, can not contain disallowed
+ chars, and the delimiting slash character must be appended to the
+ host name */
+ path = strpbrk(ptr, "/\\:*?\"<>|");
+ if(!path || *path != '/') {
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+ }
+
+ len = path - ptr;
+ if(len) {
+ if(Curl_dyn_addn(&host, ptr, len)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ uncpath = TRUE;
+ }
+
+ ptr -= 2; /* now points to the // before the host in UNC */
+#else
+ /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
+ none */
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+#endif
+ }
+ }
+
+ path = ptr;
+ }
+
+ if(!uncpath)
+ /* no host for file: URLs by default */
+ Curl_dyn_reset(&host);
+
+#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
+ /* Don't allow Windows drive letters when not in Windows.
+ * This catches both "file:/c:" and "file:c:" */
+ if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
+ STARTS_WITH_URL_DRIVE_PREFIX(path)) {
+ /* File drive letters are only accepted in MSDOS/Windows */
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+ }
+#else
+ /* If the path starts with a slash and a drive letter, ditch the slash */
+ if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
+ /* This cannot be done with strcpy, as the memory chunks overlap! */
+ path++;
+ }
+#endif
+
+ }
+ else {
+ /* clear path */
+ const char *p;
+ const char *hostp;
+ size_t len;
+
+ if(schemelen) {
+ int i = 0;
+ p = &url[schemelen + 1];
+ while(p && (*p == '/') && (i < 4)) {
+ p++;
+ i++;
+ }
+
+ schemep = schemebuf;
+ if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
+ !(flags & CURLU_NON_SUPPORT_SCHEME)) {
+ result = CURLUE_UNSUPPORTED_SCHEME;
+ goto fail;
+ }
+
+ if((i < 1) || (i>3)) {
+ /* less than one or more than three slashes */
+ result = CURLUE_BAD_SLASHES;
+ goto fail;
+ }
+ if(junkscan(schemep, flags)) {
+ result = CURLUE_BAD_SCHEME;
+ goto fail;
+ }
+ }
+ else {
+ /* no scheme! */
+
+ if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
+ result = CURLUE_BAD_SCHEME;
+ goto fail;
+ }
+ if(flags & CURLU_DEFAULT_SCHEME)
+ schemep = DEFAULT_SCHEME;
+
+ /*
+ * The URL was badly formatted, let's try without scheme specified.
+ */
+ p = url;
+ }
+ hostp = p; /* host name starts here */
+
+ /* find the end of the host name + port number */
+ while(*p && !HOSTNAME_END(*p))
+ p++;
+
+ len = p - hostp;
+ if(len) {
+ if(Curl_dyn_addn(&host, hostp, len)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ else {
+ if(!(flags & CURLU_NO_AUTHORITY)) {
+ result = CURLUE_NO_HOST;
+ goto fail;
+ }
+ }
+
+ path = (char *)p;
+
+ if(schemep) {
+ u->scheme = strdup(schemep);
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+
+ fragment = strchr(path, '#');
+ if(fragment) {
+ fraglen = strlen(fragment);
+ if(fraglen > 1) {
+ /* skip the leading '#' in the copy but include the terminating null */
+ u->fragment = Curl_memdup(fragment + 1, fraglen);
+ if(!u->fragment) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+
+ if(junkscan(u->fragment, flags)) {
+ result = CURLUE_BAD_FRAGMENT;
+ goto fail;
+ }
+ }
+ }
+
+ query = strchr(path, '?');
+ if(query && (!fragment || (query < fragment))) {
+ size_t qlen = strlen(query) - fraglen; /* includes '?' */
+ pathlen = strlen(path) - qlen - fraglen;
+ if(qlen > 1) {
+ if(qlen && (flags & CURLU_URLENCODE)) {
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ /* skip the leading question mark */
+ if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->query = Curl_dyn_ptr(&enc);
+ }
+ else {
+ u->query = Curl_memdup(query + 1, qlen);
+ if(!u->query) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->query[qlen - 1] = 0;
+ }
+
+ if(junkscan(u->query, flags)) {
+ result = CURLUE_BAD_QUERY;
+ goto fail;
+ }
+ }
+ else {
+ /* single byte query */
+ u->query = strdup("");
+ if(!u->query) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+ else
+ pathlen = strlen(path) - fraglen;
+
+ if(pathlen && (flags & CURLU_URLENCODE)) {
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ pathlen = Curl_dyn_len(&enc);
+ path = u->path = Curl_dyn_ptr(&enc);
+ }
+
+ if(!pathlen) {
+ /* there is no path left, unset */
+ path = NULL;
+ }
+ else {
+ if(!u->path) {
+ u->path = Curl_memdup(path, pathlen + 1);
+ if(!u->path) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->path[pathlen] = 0;
+ path = u->path;
+ }
+ else if(flags & CURLU_URLENCODE)
+ /* it might have encoded more than just the path so cut it */
+ u->path[pathlen] = 0;
+
+ if(junkscan(u->path, flags)) {
+ result = CURLUE_BAD_PATH;
+ goto fail;
+ }
+
+ if(!(flags & CURLU_PATH_AS_IS)) {
+ /* remove ../ and ./ sequences according to RFC3986 */
+ char *newp = dedotdotify((char *)path, pathlen);
+ if(!newp) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ free(u->path);
+ u->path = newp;
+ }
+ }
+
+ if(Curl_dyn_len(&host)) {
+ char normalized_ipv4[sizeof("255.255.255.255") + 1];
+
+ /*
+ * Parse the login details and strip them out of the host name.
+ */
+ result = parse_hostname_login(u, &host, flags);
+ if(!result)
+ result = Curl_parse_port(u, &host, schemelen);
+ if(result)
+ goto fail;
+
+ if(junkscan(Curl_dyn_ptr(&host), flags)) {
+ result = CURLUE_BAD_HOSTNAME;
+ goto fail;
+ }
+
+ if(ipv4_normalize(Curl_dyn_ptr(&host),
+ normalized_ipv4, sizeof(normalized_ipv4))) {
+ Curl_dyn_reset(&host);
+ if(Curl_dyn_add(&host, normalized_ipv4)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ else {
+ result = decode_host(&host);
+ if(!result)
+ result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
+ if(result)
+ goto fail;
+ }
+
+ if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+ const char *hostname = Curl_dyn_ptr(&host);
+ /* legacy curl-style guess based on host name */
+ if(checkprefix("ftp.", hostname))
+ schemep = "ftp";
+ else if(checkprefix("dict.", hostname))
+ schemep = "dict";
+ else if(checkprefix("ldap.", hostname))
+ schemep = "ldap";
+ else if(checkprefix("imap.", hostname))
+ schemep = "imap";
+ else if(checkprefix("smtp.", hostname))
+ schemep = "smtp";
+ else if(checkprefix("pop3.", hostname))
+ schemep = "pop3";
+ else
+ schemep = "http";
+
+ u->scheme = strdup(schemep);
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+ else if(flags & CURLU_NO_AUTHORITY) {
+ /* allowed to be empty. */
+ if(Curl_dyn_add(&host, "")) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+
+ u->host = Curl_dyn_ptr(&host);
+
+ return result;
+ fail:
+ Curl_dyn_free(&host);
+ free_urlhandle(u);
+ return result;
+}
+
+/*
+ * Parse the URL and, if successful, replace everything in the Curl_URL struct.
+ */
+static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
+ unsigned int flags)
+{
+ CURLUcode result;
+ CURLU tmpurl;
+ memset(&tmpurl, 0, sizeof(tmpurl));
+ result = parseurl(url, &tmpurl, flags);
+ if(!result) {
+ free_urlhandle(u);
+ *u = tmpurl;
+ }
+ return result;
+}
+
+/*
+ */
+CURLU *curl_url(void)
+{
+ return calloc(sizeof(struct Curl_URL), 1);
+}
+
+void curl_url_cleanup(CURLU *u)
+{
+ if(u) {
+ free_urlhandle(u);
+ free(u);
+ }
+}
+
+#define DUP(dest, src, name) \
+ do { \
+ if(src->name) { \
+ dest->name = strdup(src->name); \
+ if(!dest->name) \
+ goto fail; \
+ } \
+ } while(0)
+
+CURLU *curl_url_dup(CURLU *in)
+{
+ struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
+ if(u) {
+ DUP(u, in, scheme);
+ DUP(u, in, user);
+ DUP(u, in, password);
+ DUP(u, in, options);
+ DUP(u, in, host);
+ DUP(u, in, port);
+ DUP(u, in, path);
+ DUP(u, in, query);
+ DUP(u, in, fragment);
+ u->portnum = in->portnum;
+ }
+ return u;
+ fail:
+ curl_url_cleanup(u);
+ return NULL;
+}
+
+CURLUcode curl_url_get(CURLU *u, CURLUPart what,
+ char **part, unsigned int flags)
+{
+ char *ptr;
+ CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
+ char portbuf[7];
+ bool urldecode = (flags & CURLU_URLDECODE)?1:0;
+ bool urlencode = (flags & CURLU_URLENCODE)?1:0;
+ bool plusdecode = FALSE;
+ (void)flags;
+ if(!u)
+ return CURLUE_BAD_HANDLE;
+ if(!part)
+ return CURLUE_BAD_PARTPOINTER;
+ *part = NULL;
+
+ switch(what) {
+ case CURLUPART_SCHEME:
+ ptr = u->scheme;
+ ifmissing = CURLUE_NO_SCHEME;
+ urldecode = FALSE; /* never for schemes */
+ break;
+ case CURLUPART_USER:
+ ptr = u->user;
+ ifmissing = CURLUE_NO_USER;
+ break;
+ case CURLUPART_PASSWORD:
+ ptr = u->password;
+ ifmissing = CURLUE_NO_PASSWORD;
+ break;
+ case CURLUPART_OPTIONS:
+ ptr = u->options;
+ ifmissing = CURLUE_NO_OPTIONS;
+ break;
+ case CURLUPART_HOST:
+ ptr = u->host;
+ ifmissing = CURLUE_NO_HOST;
+ break;
+ case CURLUPART_ZONEID:
+ ptr = u->zoneid;
+ ifmissing = CURLUE_NO_ZONEID;
+ break;
+ case CURLUPART_PORT:
+ ptr = u->port;
+ ifmissing = CURLUE_NO_PORT;
+ urldecode = FALSE; /* never for port */
+ if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
+ /* there's no stored port number, but asked to deliver
+ a default one for the scheme */
+ const struct Curl_handler *h =
+ Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+ if(h) {
+ msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
+ ptr = portbuf;
+ }
+ }
+ else if(ptr && u->scheme) {
+ /* there is a stored port number, but ask to inhibit if
+ it matches the default one for the scheme */
+ const struct Curl_handler *h =
+ Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+ if(h && (h->defport == u->portnum) &&
+ (flags & CURLU_NO_DEFAULT_PORT))
+ ptr = NULL;
+ }
+ break;
+ case CURLUPART_PATH:
+ ptr = u->path;
+ if(!ptr) {
+ ptr = u->path = strdup("/");
+ if(!u->path)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ break;
+ case CURLUPART_QUERY:
+ ptr = u->query;
+ ifmissing = CURLUE_NO_QUERY;
+ plusdecode = urldecode;
+ break;
+ case CURLUPART_FRAGMENT:
+ ptr = u->fragment;
+ ifmissing = CURLUE_NO_FRAGMENT;
+ break;
+ case CURLUPART_URL: {
+ char *url;
+ char *scheme;
+ char *options = u->options;
+ char *port = u->port;
+ char *allochost = NULL;
+ if(u->scheme && strcasecompare("file", u->scheme)) {
+ url = aprintf("file://%s%s%s",
+ u->path,
+ u->fragment? "#": "",
+ u->fragment? u->fragment : "");
+ }
+ else if(!u->host)
+ return CURLUE_NO_HOST;
+ else {
+ const struct Curl_handler *h = NULL;
+ if(u->scheme)
+ scheme = u->scheme;
+ else if(flags & CURLU_DEFAULT_SCHEME)
+ scheme = (char *) DEFAULT_SCHEME;
+ else
+ return CURLUE_NO_SCHEME;
+
+ h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
+ if(!port && (flags & CURLU_DEFAULT_PORT)) {
+ /* there's no stored port number, but asked to deliver
+ a default one for the scheme */
+ if(h) {
+ msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
+ port = portbuf;
+ }
+ }
+ else if(port) {
+ /* there is a stored port number, but asked to inhibit if it matches
+ the default one for the scheme */
+ if(h && (h->defport == u->portnum) &&
+ (flags & CURLU_NO_DEFAULT_PORT))
+ port = NULL;
+ }
+
+ if(h && !(h->flags & PROTOPT_URLOPTIONS))
+ options = NULL;
+
+ if(u->host[0] == '[') {
+ if(u->zoneid) {
+ /* make it '[ host %25 zoneid ]' */
+ struct dynbuf enc;
+ size_t hostlen = strlen(u->host);
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
+ u->zoneid))
+ return CURLUE_OUT_OF_MEMORY;
+ allochost = Curl_dyn_ptr(&enc);
+ }
+ }
+ else if(urlencode) {
+ allochost = curl_easy_escape(NULL, u->host, 0);
+ if(!allochost)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ /* only encode '%' in output host name */
+ char *host = u->host;
+ bool percent = FALSE;
+ /* first, count number of percents present in the name */
+ while(*host) {
+ if(*host == '%') {
+ percent = TRUE;
+ break;
+ }
+ host++;
+ }
+ /* if there were percent(s), encode the host name */
+ if(percent) {
+ struct dynbuf enc;
+ CURLcode result;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ host = u->host;
+ while(*host) {
+ if(*host == '%')
+ result = Curl_dyn_addn(&enc, "%25", 3);
+ else
+ result = Curl_dyn_addn(&enc, host, 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ host++;
+ }
+ free(u->host);
+ u->host = Curl_dyn_ptr(&enc);
+ }
+ }
+
+ url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ scheme,
+ u->user ? u->user : "",
+ u->password ? ":": "",
+ u->password ? u->password : "",
+ options ? ";" : "",
+ options ? options : "",
+ (u->user || u->password || options) ? "@": "",
+ allochost ? allochost : u->host,
+ port ? ":": "",
+ port ? port : "",
+ (u->path && (u->path[0] != '/')) ? "/": "",
+ u->path ? u->path : "/",
+ (u->query && u->query[0]) ? "?": "",
+ (u->query && u->query[0]) ? u->query : "",
+ u->fragment? "#": "",
+ u->fragment? u->fragment : "");
+ free(allochost);
+ }
+ if(!url)
+ return CURLUE_OUT_OF_MEMORY;
+ *part = url;
+ return CURLUE_OK;
+ }
+ default:
+ ptr = NULL;
+ break;
+ }
+ if(ptr) {
+ size_t partlen = strlen(ptr);
+ size_t i = 0;
+ *part = Curl_memdup(ptr, partlen + 1);
+ if(!*part)
+ return CURLUE_OUT_OF_MEMORY;
+ if(plusdecode) {
+ /* convert + to space */
+ char *plus = *part;
+ for(i = 0; i < partlen; ++plus, i++) {
+ if(*plus == '+')
+ *plus = ' ';
+ }
+ }
+ if(urldecode) {
+ char *decoded;
+ size_t dlen;
+ /* this unconditional rejection of control bytes is documented
+ API behavior */
+ CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
+ free(*part);
+ if(res) {
+ *part = NULL;
+ return CURLUE_URLDECODE;
+ }
+ *part = decoded;
+ partlen = dlen;
+ }
+ if(urlencode) {
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(urlencode_str(&enc, *part, partlen, TRUE,
+ what == CURLUPART_QUERY))
+ return CURLUE_OUT_OF_MEMORY;
+ free(*part);
+ *part = Curl_dyn_ptr(&enc);
+ }
+
+ return CURLUE_OK;
+ }
+ else
+ return ifmissing;
+}
+
+CURLUcode curl_url_set(CURLU *u, CURLUPart what,
+ const char *part, unsigned int flags)
+{
+ char **storep = NULL;
+ long port = 0;
+ bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
+ bool plusencode = FALSE;
+ bool urlskipslash = FALSE;
+ bool appendquery = FALSE;
+ bool equalsencode = FALSE;
+
+ if(!u)
+ return CURLUE_BAD_HANDLE;
+ if(!part) {
+ /* setting a part to NULL clears it */
+ switch(what) {
+ case CURLUPART_URL:
+ break;
+ case CURLUPART_SCHEME:
+ storep = &u->scheme;
+ break;
+ case CURLUPART_USER:
+ storep = &u->user;
+ break;
+ case CURLUPART_PASSWORD:
+ storep = &u->password;
+ break;
+ case CURLUPART_OPTIONS:
+ storep = &u->options;
+ break;
+ case CURLUPART_HOST:
+ storep = &u->host;
+ break;
+ case CURLUPART_ZONEID:
+ storep = &u->zoneid;
+ break;
+ case CURLUPART_PORT:
+ u->portnum = 0;
+ storep = &u->port;
+ break;
+ case CURLUPART_PATH:
+ storep = &u->path;
+ break;
+ case CURLUPART_QUERY:
+ storep = &u->query;
+ break;
+ case CURLUPART_FRAGMENT:
+ storep = &u->fragment;
+ break;
+ default:
+ return CURLUE_UNKNOWN_PART;
+ }
+ if(storep && *storep) {
+ Curl_safefree(*storep);
+ }
+ else if(!storep) {
+ free_urlhandle(u);
+ memset(u, 0, sizeof(struct Curl_URL));
+ }
+ return CURLUE_OK;
+ }
+
+ switch(what) {
+ case CURLUPART_SCHEME:
+ if(strlen(part) > MAX_SCHEME_LEN)
+ /* too long */
+ return CURLUE_BAD_SCHEME;
+ if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
+ /* verify that it is a fine scheme */
+ !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
+ return CURLUE_UNSUPPORTED_SCHEME;
+ storep = &u->scheme;
+ urlencode = FALSE; /* never */
+ break;
+ case CURLUPART_USER:
+ storep = &u->user;
+ break;
+ case CURLUPART_PASSWORD:
+ storep = &u->password;
+ break;
+ case CURLUPART_OPTIONS:
+ storep = &u->options;
+ break;
+ case CURLUPART_HOST: {
+ size_t len = strcspn(part, " \r\n");
+ if(strlen(part) != len)
+ /* hostname with bad content */
+ return CURLUE_BAD_HOSTNAME;
+ storep = &u->host;
+ Curl_safefree(u->zoneid);
+ break;
+ }
+ case CURLUPART_ZONEID:
+ storep = &u->zoneid;
+ break;
+ case CURLUPART_PORT:
+ {
+ char *endp;
+ urlencode = FALSE; /* never */
+ port = strtol(part, &endp, 10); /* Port number must be decimal */
+ if((port <= 0) || (port > 0xffff))
+ return CURLUE_BAD_PORT_NUMBER;
+ if(*endp)
+ /* weirdly provided number, not good! */
+ return CURLUE_BAD_PORT_NUMBER;
+ storep = &u->port;
+ }
+ break;
+ case CURLUPART_PATH:
+ urlskipslash = TRUE;
+ storep = &u->path;
+ break;
+ case CURLUPART_QUERY:
+ plusencode = urlencode;
+ appendquery = (flags & CURLU_APPENDQUERY)?1:0;
+ equalsencode = appendquery;
+ storep = &u->query;
+ break;
+ case CURLUPART_FRAGMENT:
+ storep = &u->fragment;
+ break;
+ case CURLUPART_URL: {
+ /*
+ * Allow a new URL to replace the existing (if any) contents.
+ *
+ * If the existing contents is enough for a URL, allow a relative URL to
+ * replace it.
+ */
+ CURLUcode result;
+ char *oldurl;
+ char *redired_url;
+
+ /* if the new thing is absolute or the old one is not
+ * (we could not get an absolute url in 'oldurl'),
+ * then replace the existing with the new. */
+ if(Curl_is_absolute_url(part, NULL, 0,
+ flags & (CURLU_GUESS_SCHEME|
+ CURLU_DEFAULT_SCHEME))
+ || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
+ return parseurl_and_replace(part, u, flags);
+ }
+
+ /* apply the relative part to create a new URL
+ * and replace the existing one with it. */
+ redired_url = concat_url(oldurl, part);
+ free(oldurl);
+ if(!redired_url)
+ return CURLUE_OUT_OF_MEMORY;
+
+ result = parseurl_and_replace(redired_url, u, flags);
+ free(redired_url);
+ return result;
+ }
+ default:
+ return CURLUE_UNKNOWN_PART;
+ }
+ DEBUGASSERT(storep);
+ {
+ const char *newp = part;
+ size_t nalloc = strlen(part);
+
+ if(nalloc > CURL_MAX_INPUT_LENGTH)
+ /* excessive input length */
+ return CURLUE_MALFORMED_INPUT;
+
+ if(urlencode) {
+ const unsigned char *i;
+ struct dynbuf enc;
+
+ Curl_dyn_init(&enc, nalloc * 3 + 1);
+
+ for(i = (const unsigned char *)part; *i; i++) {
+ CURLcode result;
+ if((*i == ' ') && plusencode) {
+ result = Curl_dyn_addn(&enc, "+", 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else if(Curl_isunreserved(*i) ||
+ ((*i == '/') && urlskipslash) ||
+ ((*i == '=') && equalsencode)) {
+ if((*i == '=') && equalsencode)
+ /* only skip the first equals sign */
+ equalsencode = FALSE;
+ result = Curl_dyn_addn(&enc, i, 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ result = Curl_dyn_addf(&enc, "%%%02x", *i);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ }
+ newp = Curl_dyn_ptr(&enc);
+ }
+ else {
+ char *p;
+ newp = strdup(part);
+ if(!newp)
+ return CURLUE_OUT_OF_MEMORY;
+ p = (char *)newp;
+ while(*p) {
+ /* make sure percent encoded are lower case */
+ if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
+ (ISUPPER(p[1]) || ISUPPER(p[2]))) {
+ p[1] = Curl_raw_tolower(p[1]);
+ p[2] = Curl_raw_tolower(p[2]);
+ p += 3;
+ }
+ else
+ p++;
+ }
+ }
+
+ if(appendquery) {
+ /* Append the 'newp' string onto the old query. Add a '&' separator if
+ none is present at the end of the existing query already */
+
+ size_t querylen = u->query ? strlen(u->query) : 0;
+ bool addamperand = querylen && (u->query[querylen -1] != '&');
+ if(querylen) {
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+
+ if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
+ goto nomem;
+
+ if(addamperand) {
+ if(Curl_dyn_addn(&enc, "&", 1))
+ goto nomem;
+ }
+ if(Curl_dyn_add(&enc, newp))
+ goto nomem;
+ free((char *)newp);
+ free(*storep);
+ *storep = Curl_dyn_ptr(&enc);
+ return CURLUE_OK;
+ nomem:
+ free((char *)newp);
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ }
+
+ if(what == CURLUPART_HOST) {
+ size_t n = strlen(newp);
+ if(!n && (flags & CURLU_NO_AUTHORITY)) {
+ /* Skip hostname check, it's allowed to be empty. */
+ }
+ else {
+ if(hostname_check(u, (char *)newp, n)) {
+ free((char *)newp);
+ return CURLUE_BAD_HOSTNAME;
+ }
+ }
+ }
+
+ free(*storep);
+ *storep = (char *)newp;
+ }
+ /* set after the string, to make it not assigned if the allocation above
+ fails */
+ if(port)
+ u->portnum = port;
+ return CURLUE_OK;
+}
|