From a8f96968a96d5b0a90118402e81742d26c8347cb Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:35 +0000 Subject: [PATCH 1/8] connect: rename enum protocol to url_scheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 1738 names the part of a URL before the colon a "scheme". connect.c calls it "protocol", which is more generic and collides with the unrelated enum protocol_version. Rename: enum protocol -> enum url_scheme PROTO_* -> URL_SCHEME_* prot_name -> url_scheme_name get_protocol -> url_get_scheme The local variables in parse_connect_url and git_connect are renamed accordingly, from protocol to scheme. No behavior change. The user-visible diagnostics and translated error messages are preserved: "Diag: protocol=..." "protocol '%s' is not supported" "unknown protocol" This rename also prepares for moving the scheme-detection functions to a shared header so that a future plumbing command can parse URLs using the same logic as the connect path. Suggested-by: Torsten Bögershausen Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- connect.c | 68 +++++++++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/connect.c b/connect.c index fcd35c5539..46da89905e 100644 --- a/connect.c +++ b/connect.c @@ -700,11 +700,11 @@ int server_supports(const char *feature) return !!server_feature_value(feature, NULL); } -enum protocol { - PROTO_LOCAL = 1, - PROTO_FILE, - PROTO_SSH, - PROTO_GIT +enum url_scheme { + URL_SCHEME_LOCAL = 1, + URL_SCHEME_FILE, + URL_SCHEME_SSH, + URL_SCHEME_GIT }; int url_is_local_not_ssh(const char *url) @@ -715,33 +715,33 @@ int url_is_local_not_ssh(const char *url) (has_dos_drive_prefix(url) && is_valid_path(url)); } -static const char *prot_name(enum protocol protocol) +static const char *url_scheme_name(enum url_scheme scheme) { - switch (protocol) { - case PROTO_LOCAL: - case PROTO_FILE: + switch (scheme) { + case URL_SCHEME_LOCAL: + case URL_SCHEME_FILE: return "file"; - case PROTO_SSH: + case URL_SCHEME_SSH: return "ssh"; - case PROTO_GIT: + case URL_SCHEME_GIT: return "git"; default: return "unknown protocol"; } } -static enum protocol get_protocol(const char *name) +static enum url_scheme url_get_scheme(const char *name) { if (!strcmp(name, "ssh")) - return PROTO_SSH; + return URL_SCHEME_SSH; if (!strcmp(name, "git")) - return PROTO_GIT; + return URL_SCHEME_GIT; if (!strcmp(name, "git+ssh")) /* deprecated - do not use */ - return PROTO_SSH; + return URL_SCHEME_SSH; if (!strcmp(name, "ssh+git")) /* deprecated - do not use */ - return PROTO_SSH; + return URL_SCHEME_SSH; if (!strcmp(name, "file")) - return PROTO_FILE; + return URL_SCHEME_FILE; die(_("protocol '%s' is not supported"), name); } @@ -1083,14 +1083,14 @@ static char *get_port(char *host) * Extract protocol and relevant parts from the specified connection URL. * The caller must free() the returned strings. */ -static enum protocol parse_connect_url(const char *url_orig, char **ret_host, - char **ret_path) +static enum url_scheme parse_connect_url(const char *url_orig, char **ret_host, + char **ret_path) { char *url; char *host, *path; char *end; int separator = '/'; - enum protocol protocol = PROTO_LOCAL; + enum url_scheme scheme = URL_SCHEME_LOCAL; if (is_url(url_orig)) url = url_decode(url_orig); @@ -1100,12 +1100,12 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, host = strstr(url, "://"); if (host) { *host = '\0'; - protocol = get_protocol(url); + scheme = url_get_scheme(url); host += 3; } else { host = url; if (!url_is_local_not_ssh(url)) { - protocol = PROTO_SSH; + scheme = URL_SCHEME_SSH; separator = ':'; } } @@ -1116,13 +1116,13 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, */ end = host_end(&host, 0); - if (protocol == PROTO_LOCAL) + if (scheme == URL_SCHEME_LOCAL) path = end; - else if (protocol == PROTO_FILE && *host != '/' && + else if (scheme == URL_SCHEME_FILE && *host != '/' && !has_dos_drive_prefix(host) && offset_1st_component(host - 2) > 1) path = host - 2; /* include the leading "//" */ - else if (protocol == PROTO_FILE && has_dos_drive_prefix(end)) + else if (scheme == URL_SCHEME_FILE && has_dos_drive_prefix(end)) path = end; /* "file://$(pwd)" may be "file://C:/projects/repo" */ else path = strchr(end, separator); @@ -1138,7 +1138,7 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, end = path; /* Need to \0 terminate host here */ if (separator == ':') path++; /* path starts after ':' */ - if (protocol == PROTO_GIT || protocol == PROTO_SSH) { + if (scheme == URL_SCHEME_GIT || scheme == URL_SCHEME_SSH) { if (path[1] == '~') path++; } @@ -1149,7 +1149,7 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, *ret_host = xstrdup(host); *ret_path = path; free(url); - return protocol; + return scheme; } static const char *get_ssh_command(void) @@ -1434,7 +1434,7 @@ struct child_process *git_connect(int fd[2], const char *url, { char *hostandport, *path; struct child_process *conn; - enum protocol protocol; + enum url_scheme scheme; enum protocol_version version = get_protocol_version_config(); /* @@ -1451,14 +1451,14 @@ struct child_process *git_connect(int fd[2], const char *url, */ signal(SIGCHLD, SIG_DFL); - protocol = parse_connect_url(url, &hostandport, &path); - if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) { + scheme = parse_connect_url(url, &hostandport, &path); + if ((flags & CONNECT_DIAG_URL) && (scheme != URL_SCHEME_SSH)) { printf("Diag: url=%s\n", url ? url : "NULL"); - printf("Diag: protocol=%s\n", prot_name(protocol)); + printf("Diag: protocol=%s\n", url_scheme_name(scheme)); printf("Diag: hostandport=%s\n", hostandport ? hostandport : "NULL"); printf("Diag: path=%s\n", path ? path : "NULL"); conn = NULL; - } else if (protocol == PROTO_GIT) { + } else if (scheme == URL_SCHEME_GIT) { conn = git_connect_git(fd, hostandport, path, prog, version, flags); conn->trace2_child_class = "transport/git"; } else { @@ -1481,7 +1481,7 @@ struct child_process *git_connect(int fd[2], const char *url, conn->use_shell = 1; conn->in = conn->out = -1; - if (protocol == PROTO_SSH) { + if (scheme == URL_SCHEME_SSH) { char *ssh_host = hostandport; const char *port = NULL; transport_check_allowed("ssh"); @@ -1492,7 +1492,7 @@ struct child_process *git_connect(int fd[2], const char *url, if (flags & CONNECT_DIAG_URL) { printf("Diag: url=%s\n", url ? url : "NULL"); - printf("Diag: protocol=%s\n", prot_name(protocol)); + printf("Diag: protocol=%s\n", url_scheme_name(scheme)); printf("Diag: userandhost=%s\n", ssh_host ? ssh_host : "NULL"); printf("Diag: port=%s\n", port ? port : "NONE"); printf("Diag: path=%s\n", path ? path : "NULL"); From 51fcf73014f542f074a253add5867c24c82c854f Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:36 +0000 Subject: [PATCH 2/8] url: move url_is_local_not_ssh to url.h Move url_is_local_not_ssh from connect.c/connect.h to url.c/url.h so that the new url_parse function in urlmatch.c, and any future code that needs to distinguish a local path from an scp style SSH URL, can reuse the heuristic without depending on connect.c. No behavior change. Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- connect.c | 8 -------- connect.h | 1 - remote.c | 1 + url.c | 8 ++++++++ url.h | 2 ++ 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/connect.c b/connect.c index 46da89905e..cb145de30e 100644 --- a/connect.c +++ b/connect.c @@ -707,14 +707,6 @@ enum url_scheme { URL_SCHEME_GIT }; -int url_is_local_not_ssh(const char *url) -{ - const char *colon = strchr(url, ':'); - const char *slash = strchr(url, '/'); - return !colon || (slash && slash < colon) || - (has_dos_drive_prefix(url) && is_valid_path(url)); -} - static const char *url_scheme_name(enum url_scheme scheme) { switch (scheme) { diff --git a/connect.h b/connect.h index 1645126c17..8d84f6656b 100644 --- a/connect.h +++ b/connect.h @@ -13,7 +13,6 @@ int git_connection_is_socket(struct child_process *conn); int server_supports(const char *feature); int parse_feature_request(const char *features, const char *feature); const char *server_feature_value(const char *feature, size_t *len_ret); -int url_is_local_not_ssh(const char *url); struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); diff --git a/remote.c b/remote.c index a664cd166a..24a8118d25 100644 --- a/remote.c +++ b/remote.c @@ -8,6 +8,7 @@ #include "gettext.h" #include "hex.h" #include "remote.h" +#include "url.h" #include "urlmatch.h" #include "refs.h" #include "refspec.h" diff --git a/url.c b/url.c index 3ca5987e90..057576042a 100644 --- a/url.c +++ b/url.c @@ -132,3 +132,11 @@ void str_end_url_with_slash(const char *url, char **dest) free(*dest); *dest = strbuf_detach(&buf, NULL); } + +int url_is_local_not_ssh(const char *url) +{ + const char *colon = strchr(url, ':'); + const char *slash = strchr(url, '/'); + return !colon || (slash && slash < colon) || + (has_dos_drive_prefix(url) && is_valid_path(url)); +} diff --git a/url.h b/url.h index cd9140e994..39d621312f 100644 --- a/url.h +++ b/url.h @@ -21,6 +21,8 @@ char *url_decode_parameter_value(const char **query); void end_url_with_slash(struct strbuf *buf, const char *url); void str_end_url_with_slash(const char *url, char **dest); +int url_is_local_not_ssh(const char *url); + /* * The set of unreserved characters as per STD66 (RFC3986) is * '[A-Za-z0-9-._~]'. These characters are safe to appear in URI From d48e36a8a23d931e869fbb3156fc95a5732cb061 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:37 +0000 Subject: [PATCH 3/8] url: move scheme detection to URL header/source Move enum url_scheme and url_get_scheme() from connect.c to url.h and url.c so that other code can identify a URL's scheme without depending on connect.c. No behavior change. url_get_scheme() still dies on an unrecognized scheme name, with the same translated message as before. scheme_name() stays in connect.c because it has no other callers. Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- connect.c | 22 ---------------------- url.c | 16 ++++++++++++++++ url.h | 13 +++++++++++++ 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/connect.c b/connect.c index cb145de30e..1ac7acc6e8 100644 --- a/connect.c +++ b/connect.c @@ -700,13 +700,6 @@ int server_supports(const char *feature) return !!server_feature_value(feature, NULL); } -enum url_scheme { - URL_SCHEME_LOCAL = 1, - URL_SCHEME_FILE, - URL_SCHEME_SSH, - URL_SCHEME_GIT -}; - static const char *url_scheme_name(enum url_scheme scheme) { switch (scheme) { @@ -722,21 +715,6 @@ static const char *url_scheme_name(enum url_scheme scheme) } } -static enum url_scheme url_get_scheme(const char *name) -{ - if (!strcmp(name, "ssh")) - return URL_SCHEME_SSH; - if (!strcmp(name, "git")) - return URL_SCHEME_GIT; - if (!strcmp(name, "git+ssh")) /* deprecated - do not use */ - return URL_SCHEME_SSH; - if (!strcmp(name, "ssh+git")) /* deprecated - do not use */ - return URL_SCHEME_SSH; - if (!strcmp(name, "file")) - return URL_SCHEME_FILE; - die(_("protocol '%s' is not supported"), name); -} - static char *host_end(char **hoststart, int removebrackets) { char *host = *hoststart; diff --git a/url.c b/url.c index 057576042a..300acf98fe 100644 --- a/url.c +++ b/url.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "gettext.h" #include "hex-ll.h" #include "strbuf.h" #include "url.h" @@ -140,3 +141,18 @@ int url_is_local_not_ssh(const char *url) return !colon || (slash && slash < colon) || (has_dos_drive_prefix(url) && is_valid_path(url)); } + +enum url_scheme url_get_scheme(const char *name) +{ + if (!strcmp(name, "ssh")) + return URL_SCHEME_SSH; + if (!strcmp(name, "git")) + return URL_SCHEME_GIT; + if (!strcmp(name, "git+ssh")) /* deprecated - do not use */ + return URL_SCHEME_SSH; + if (!strcmp(name, "ssh+git")) /* deprecated - do not use */ + return URL_SCHEME_SSH; + if (!strcmp(name, "file")) + return URL_SCHEME_FILE; + die(_("protocol '%s' is not supported"), name); +} diff --git a/url.h b/url.h index 39d621312f..24c8cd91d0 100644 --- a/url.h +++ b/url.h @@ -23,6 +23,19 @@ void str_end_url_with_slash(const char *url, char **dest); int url_is_local_not_ssh(const char *url); +enum url_scheme { + URL_SCHEME_LOCAL = 1, + URL_SCHEME_FILE, + URL_SCHEME_SSH, + URL_SCHEME_GIT, +}; + +/* + * Identify the URL scheme by name. Dies if the name does not match + * any scheme that Git knows about. + */ +enum url_scheme url_get_scheme(const char *name); + /* * The set of unreserved characters as per STD66 (RFC3986) is * '[A-Za-z0-9-._~]'. These characters are safe to appear in URI From 46d6fb752e7d8550a3511eb370536d216ddb5b8f Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:38 +0000 Subject: [PATCH 4/8] url: return URL_SCHEME_UNKNOWN instead of dying Enumerate a URL_SCHEME_UNKNOWN result with value 0. Have url_get_scheme() return it for unrecognized schemes instead of calling die() itself. Move the die() call to parse_connect_url() where url_get_scheme() is used. This lets url_get_scheme() be used from contexts that need to identify a URL's scheme without aborting the program. For example, a future plumbing command that validates URLs. No external behavior change. parse_connect_url() still dies with the same translated message for unrecognized schemes. Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- connect.c | 2 ++ url.c | 3 +-- url.h | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/connect.c b/connect.c index 1ac7acc6e8..73d7a6b8d0 100644 --- a/connect.c +++ b/connect.c @@ -1071,6 +1071,8 @@ static enum url_scheme parse_connect_url(const char *url_orig, char **ret_host, if (host) { *host = '\0'; scheme = url_get_scheme(url); + if (scheme == URL_SCHEME_UNKNOWN) + die(_("protocol '%s' is not supported"), url); host += 3; } else { host = url; diff --git a/url.c b/url.c index 300acf98fe..a59818278f 100644 --- a/url.c +++ b/url.c @@ -1,5 +1,4 @@ #include "git-compat-util.h" -#include "gettext.h" #include "hex-ll.h" #include "strbuf.h" #include "url.h" @@ -154,5 +153,5 @@ enum url_scheme url_get_scheme(const char *name) return URL_SCHEME_SSH; if (!strcmp(name, "file")) return URL_SCHEME_FILE; - die(_("protocol '%s' is not supported"), name); + return URL_SCHEME_UNKNOWN; } diff --git a/url.h b/url.h index 24c8cd91d0..7289523605 100644 --- a/url.h +++ b/url.h @@ -24,15 +24,16 @@ void str_end_url_with_slash(const char *url, char **dest); int url_is_local_not_ssh(const char *url); enum url_scheme { - URL_SCHEME_LOCAL = 1, + URL_SCHEME_UNKNOWN = 0, + URL_SCHEME_LOCAL, URL_SCHEME_FILE, URL_SCHEME_SSH, URL_SCHEME_GIT, }; /* - * Identify the URL scheme by name. Dies if the name does not match - * any scheme that Git knows about. + * Identify the URL scheme by name. Returns URL_SCHEME_UNKNOWN + * if the name does not match any scheme that Git knows about. */ enum url_scheme url_get_scheme(const char *name); From 18a828171243b630bc7585c7bc8d85bb37125c01 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:39 +0000 Subject: [PATCH 5/8] urlmatch: define url_parse function Define url_parse, a general parsing function that supports all Git URLs including scp style URLs such as hostname:~user/repo. It is adapted from the algorithm in connect.c's parse_connect_url and reuses the shared enum url_scheme and url_get_scheme function that previous commits made available in url.h. The new parser and the connect path agree on scheme classification. url_parse has the same interface as url_normalize and uses the same data structures. Both functions accept the same URL forms with one deliberate exception. Bare local paths such as "/abs/path", "./rel" or "repo" are accepted by parse_connect_url as URL_SCHEME_LOCAL, but rejected by url_parse because url_normalize requires a URL with a scheme://host form. A consumer that wants to handle both URLs and local paths needs to dispatch on url_is_local_not_ssh before calling url_parse, just as the connect path does internally. The duplication with parse_connect_url is intentional. The two functions have different contracts: - parse_connect_url Calls die() on an unknown scheme and returns NUL-terminated host/path strings for the connect path - url_parse Returns NULL on failure while populating out_info->err, and exposes components as offset/length pairs into the normalized URL buffer, matching url_normalize. Reconciling both is possible, but not in the scope of the current patch set. Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- t/unit-tests/u-urlmatch-normalization.c | 45 +++++++++ urlmatch.c | 127 ++++++++++++++++++++++++ urlmatch.h | 1 + 3 files changed, 173 insertions(+) diff --git a/t/unit-tests/u-urlmatch-normalization.c b/t/unit-tests/u-urlmatch-normalization.c index 39f6e1ba26..3595d893a2 100644 --- a/t/unit-tests/u-urlmatch-normalization.c +++ b/t/unit-tests/u-urlmatch-normalization.c @@ -245,3 +245,48 @@ void test_urlmatch_normalization__equivalents(void) compare_normalized_urls("https://@x.y/^/../abc", "httpS://@x.y:0443/abc", 1); compare_normalized_urls("https://@x.y/^/..", "httpS://@x.y:0443/", 1); } + +static void check_parsed_path(const char *url, const char *expected_path) +{ + struct url_info info; + char *parsed = url_parse(url, &info); + char *path; + + cl_assert(parsed != NULL); + path = xstrndup(parsed + info.path_off, info.path_len); + cl_assert_equal_s(path, expected_path); + free(path); + free(parsed); +} + +void test_urlmatch_normalization__parse_scp(void) +{ + check_parsed_path("host:path", "/path"); + check_parsed_path("user@host:path", "/path"); + check_parsed_path("host:~user/repo", "~user/repo"); + check_parsed_path("user@host:~user/repo", "~user/repo"); + check_parsed_path("[host]:src", "/src"); + check_parsed_path("[host:123]:src", "/src"); + check_parsed_path("[::1]:repo", "/repo"); + check_parsed_path("user@[::1]:repo", "/repo"); +} + +void test_urlmatch_normalization__parse_url_form(void) +{ + check_parsed_path("ssh://host/repo", "/repo"); + check_parsed_path("ssh://host/~user/repo", "~user/repo"); + check_parsed_path("git://host:9418/repo", "/repo"); + check_parsed_path("git://host/~user/repo", "~user/repo"); + check_parsed_path("ssh://[::1]:1234/repo", "/repo"); + check_parsed_path("http://[2001:db8::1]/repo", "/repo"); +} + +void test_urlmatch_normalization__parse_strips_query_and_fragment(void) +{ + check_parsed_path("ssh://host/~user/repo?q", "~user/repo"); + check_parsed_path("ssh://host/~user/repo#frag", "~user/repo"); + check_parsed_path("git://host/~user/repo?q", "~user/repo"); + check_parsed_path("user@host:~user/repo?q", "~user/repo"); + check_parsed_path("https://host/repo?q", "/repo"); + check_parsed_path("https://host/repo#frag", "/repo"); +} diff --git a/urlmatch.c b/urlmatch.c index eea8300489..bf8cce6de9 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -5,6 +5,7 @@ #include "hex-ll.h" #include "strbuf.h" #include "urlmatch.h" +#include "url.h" #define URL_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" #define URL_DIGIT "0123456789" @@ -440,6 +441,132 @@ char *url_normalize(const char *url, struct url_info *out_info) return url_normalize_1(url, out_info, 0); } +char *url_parse(const char *url_orig, struct url_info *out_info) +{ + struct strbuf url; + char *host, *separator; + char *detached, *normalized; + char *url_decoded; + enum url_scheme scheme = URL_SCHEME_LOCAL; + struct url_info local_info; + struct url_info *info = out_info ? out_info : &local_info; + bool scp_syntax = false; + + if (is_url(url_orig)) + url_decoded = url_decode(url_orig); + else + url_decoded = xstrdup(url_orig); + + strbuf_init(&url, strlen(url_decoded) + sizeof("ssh://")); + strbuf_addstr(&url, url_decoded); + free(url_decoded); + + host = strstr(url.buf, "://"); + if (host) { + /* + * Temporarily NUL-terminate the scheme name + * so we can pass it to url_get_scheme(), + * then restore the ':' so the buffer + * is intact for url_normalize() below. + */ + char saved = *host; + *host = '\0'; + scheme = url_get_scheme(url.buf); + *host = saved; + host += 3; + } else { + if (!url_is_local_not_ssh(url.buf)) { + scp_syntax = true; + scheme = URL_SCHEME_SSH; + strbuf_insertstr(&url, 0, "ssh://"); + host = url.buf + strlen("ssh://"); + } + } + + /* + * Path starts after ':' in scp style SSH URLs. + * + * The host portion can begin with an optional "user@", + * and the host itself can be wrapped in '[' ']' brackets. + * The bracket form is git's legacy way of supporting: + * + * - IPv6 literals: [::1]:repo + * - host:port pairs in the short form: [myhost:123]:src + * - Plain hostnames that happen to need bracketing: [host]:path + * + * Treat '[' followed by 0 or 1 inner colons as the host:port + * or plain hostname form and strip the brackets so url_normalize + * sees host[:port] natively. Two or more inner colons mark an + * IPv6 literal: keep the brackets for url_normalize to recognize. + * + * The scp path separator is the ':' that follows the host part, + * and we must skip over user@ and any '[...]' before searching. + */ + if (scp_syntax) { + char *user_at; + char *host_start; + char *bracket_end; + + user_at = strchr(host, '@'); + host_start = user_at ? user_at + 1 : host; + + if (*host_start == '[') { + char *p; + int inner_colons; + + bracket_end = strchr(host_start, ']'); + inner_colons = 0; + for (p = host_start + 1; bracket_end && p < bracket_end; p++) + if (*p == ':') + inner_colons++; + + if (bracket_end && inner_colons <= 1) { + size_t close_off = bracket_end - url.buf; + size_t open_off = host_start - url.buf; + strbuf_remove(&url, close_off, 1); + strbuf_remove(&url, open_off, 1); + separator = url.buf + close_off - 1; + } else if (bracket_end) { + separator = strchr(bracket_end + 1, ':'); + } else { + separator = strchr(host_start, ':'); + } + } else { + separator = strchr(host_start, ':'); + } + + if (separator) { + if (separator[1] == '/') + strbuf_remove(&url, separator - url.buf, 1); + else + *separator = '/'; + } + } + + detached = strbuf_detach(&url, NULL); + normalized = url_normalize(detached, info); + free(detached); + + if (!normalized) + return NULL; + + /* + * Point path to ~ for URLs like this: + * + * ssh://host.xz/~user/repo + * git://host.xz/~user/repo + * host.xz:~user/repo + */ + if (scheme == URL_SCHEME_GIT || scheme == URL_SCHEME_SSH) { + if (normalized[info->path_off + 1] == '~') { + info->path_off++; + info->path_len--; + } + } + + return normalized; +} + static size_t url_match_prefix(const char *url, const char *url_prefix, size_t url_prefix_len) diff --git a/urlmatch.h b/urlmatch.h index 5ba85cea13..6b3ce42858 100644 --- a/urlmatch.h +++ b/urlmatch.h @@ -35,6 +35,7 @@ struct url_info { }; char *url_normalize(const char *, struct url_info *); +char *url_parse(const char *, struct url_info *); struct urlmatch_item { size_t hostmatch_len; From 533eb14798d0e4e288401b90d4684730a3ed9266 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:40 +0000 Subject: [PATCH 6/8] builtin: create url-parse command Git commands can accept a rather wide variety of URLs syntaxes. The range of accepted inputs might expand even more in the future. This makes the parsing of URL components difficult since standard URL parsers cannot be used. Extracting the components of a git URL would require implementing all the schemes that git itself supports, not to mention tracking its development continuously in case new URL schemes are added. The url-parse builtin command is designed to solve this problem by exposing git's native URL parsing facilities as a plumbing command. Other programs can then call upon git itself to parse the git URLs and extract their components. This should be quite useful for scripts. Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- .gitignore | 1 + Makefile | 1 + builtin.h | 1 + builtin/url-parse.c | 135 ++++++++++++++++++++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + 7 files changed, 141 insertions(+) create mode 100644 builtin/url-parse.c diff --git a/.gitignore b/.gitignore index 24635cf2d6..c5673daa6e 100644 --- a/.gitignore +++ b/.gitignore @@ -182,6 +182,7 @@ /git-update-server-info /git-upload-archive /git-upload-pack +/git-url-parse /git-var /git-verify-commit /git-verify-pack diff --git a/Makefile b/Makefile index cedc234173..1c757a1aa0 100644 --- a/Makefile +++ b/Makefile @@ -1497,6 +1497,7 @@ BUILTIN_OBJS += builtin/update-ref.o BUILTIN_OBJS += builtin/update-server-info.o BUILTIN_OBJS += builtin/upload-archive.o BUILTIN_OBJS += builtin/upload-pack.o +BUILTIN_OBJS += builtin/url-parse.o BUILTIN_OBJS += builtin/var.o BUILTIN_OBJS += builtin/verify-commit.o BUILTIN_OBJS += builtin/verify-pack.o diff --git a/builtin.h b/builtin.h index 235c51f30e..c6f7672991 100644 --- a/builtin.h +++ b/builtin.h @@ -271,6 +271,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix, stru int cmd_upload_archive(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_upload_pack(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_url_parse(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_var(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_verify_commit(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_verify_tag(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/url-parse.c b/builtin/url-parse.c new file mode 100644 index 0000000000..7e705538c0 --- /dev/null +++ b/builtin/url-parse.c @@ -0,0 +1,135 @@ +#include "builtin.h" +#include "gettext.h" +#include "parse-options.h" +#include "url.h" +#include "urlmatch.h" + +static const char * const builtin_url_parse_usage[] = { + N_("git url-parse [-c ] [--] ..."), + NULL +}; + +static char *component_arg; + +static struct option builtin_url_parse_options[] = { + OPT_STRING('c', "component", &component_arg, N_("component"), + N_("which URL component to extract")), + OPT_END(), +}; + +enum url_component { + URL_NONE = 0, + URL_SCHEME, + URL_USER, + URL_PASSWORD, + URL_HOST, + URL_PORT, + URL_PATH, +}; + +static void parse_or_die(const char *url, struct url_info *info) +{ + if (url_is_local_not_ssh(url)) { + if (*url == '/') + die("'%s' is not a URL; if you meant a local " + "repository, use 'file://%s'", url, url); + if (has_dos_drive_prefix(url)) + die("'%s' is not a URL; if you meant a local " + "repository, use 'file:///%s'", url, url); + die("'%s' is not a URL; if you meant a local repository, " + "use a 'file://' URL with an absolute path", url); + } + if (!url_parse(url, info)) + die("invalid git URL '%s': %s", url, info->err); +} + +static enum url_component get_component_or_die(const char *arg) +{ + if (!strcmp("path", arg)) + return URL_PATH; + if (!strcmp("host", arg)) + return URL_HOST; + if (!strcmp("scheme", arg)) + return URL_SCHEME; + if (!strcmp("user", arg)) + return URL_USER; + if (!strcmp("password", arg)) + return URL_PASSWORD; + if (!strcmp("port", arg)) + return URL_PORT; + die("invalid git URL component '%s'", arg); +} + +static char *extract_component(enum url_component component, + struct url_info *info) +{ + size_t offset, length; + + switch (component) { + case URL_SCHEME: + offset = 0; + length = info->scheme_len; + break; + case URL_USER: + offset = info->user_off; + length = info->user_len; + break; + case URL_PASSWORD: + offset = info->passwd_off; + length = info->passwd_len; + break; + case URL_HOST: + offset = info->host_off; + length = info->host_len; + break; + case URL_PORT: + offset = info->port_off; + length = info->port_len; + break; + case URL_PATH: + offset = info->path_off; + length = info->path_len; + break; + case URL_NONE: + return NULL; + } + + return xstrndup(info->url + offset, length); +} + +int cmd_url_parse(int argc, + const char **argv, + const char *prefix, + struct repository *repo UNUSED) +{ + struct url_info info; + enum url_component selected = URL_NONE; + char *extracted; + int i; + + argc = parse_options(argc, argv, prefix, builtin_url_parse_options, + builtin_url_parse_usage, 0); + + if (argc == 0) + usage_with_options(builtin_url_parse_usage, + builtin_url_parse_options); + + if (component_arg) + selected = get_component_or_die(component_arg); + + for (i = 0; i < argc; i++) { + parse_or_die(argv[i], &info); + + if (selected != URL_NONE) { + extracted = extract_component(selected, &info); + if (extracted) { + puts(extracted); + free(extracted); + } + } + + free(info.url); + } + + return 0; +} diff --git a/command-list.txt b/command-list.txt index f9005cf459..1ede48186f 100644 --- a/command-list.txt +++ b/command-list.txt @@ -202,6 +202,7 @@ git-update-ref plumbingmanipulators git-update-server-info synchingrepositories git-upload-archive synchelpers git-upload-pack synchelpers +git-url-parse purehelpers git-var plumbinginterrogators git-verify-commit ancillaryinterrogators git-verify-pack plumbinginterrogators diff --git a/git.c b/git.c index 5a40eab8a2..a073eed931 100644 --- a/git.c +++ b/git.c @@ -670,6 +670,7 @@ static struct cmd_struct commands[] = { { "upload-archive", cmd_upload_archive, NO_PARSEOPT }, { "upload-archive--writer", cmd_upload_archive_writer, NO_PARSEOPT }, { "upload-pack", cmd_upload_pack }, + { "url-parse", cmd_url_parse }, { "var", cmd_var, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "verify-commit", cmd_verify_commit, RUN_SETUP }, { "verify-pack", cmd_verify_pack }, diff --git a/meson.build b/meson.build index 11488623bf..dc3cf68ee5 100644 --- a/meson.build +++ b/meson.build @@ -686,6 +686,7 @@ builtin_sources = [ 'builtin/update-server-info.c', 'builtin/upload-archive.c', 'builtin/upload-pack.c', + 'builtin/url-parse.c', 'builtin/var.c', 'builtin/verify-commit.c', 'builtin/verify-pack.c', From d1671b13dc3c5d87368bd09604540ad0a8ed33b5 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:41 +0000 Subject: [PATCH 7/8] doc: describe the url-parse builtin The new url-parse builtin validates git URLs and optionally extracts their components. Helped-by: Ghanshyam Thakkar Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- Documentation/git-url-parse.adoc | 80 ++++++++++++++++++++++++++++++++ Documentation/meson.build | 1 + 2 files changed, 81 insertions(+) create mode 100644 Documentation/git-url-parse.adoc diff --git a/Documentation/git-url-parse.adoc b/Documentation/git-url-parse.adoc new file mode 100644 index 0000000000..9d0d93da4a --- /dev/null +++ b/Documentation/git-url-parse.adoc @@ -0,0 +1,80 @@ +git-url-parse(1) +================ + +NAME +---- +git-url-parse - Parse and extract git URL components + +SYNOPSIS +-------- +[synopsis] +git url-parse [-c ] [--] ... + +DESCRIPTION +----------- + +Git supports many ways to specify URLs, some of them non-standard. +For example, git supports the scp style [user@]host:[path] format. +This command eases interoperability with git URLs by enabling the +parsing and extraction of the components of all git URLs. + +Any syntactically valid URL is parsed, even if the scheme is not one +git supports for fetching or pushing. + +OPTIONS +------- + +`-c `:: +`--component `:: + Extract the __ component from the given Git URLs. + __ can be one of: + `scheme`, `user`, `password`, `host`, `port`, `path`. + +OUTPUT +------ + +When `--component` is given, the requested component of each URL +is printed on its own line, in the order the URLs were given. If +the URL has no such component (for example, a port in a URL that +does not specify one), an empty line is printed in its place. + +When `--component` is not given, no output is produced. The exit +status is zero if every URL parses successfully and non-zero +otherwise, allowing the command to be used purely as a validator. + +EXAMPLES +-------- + +* Print the host name: ++ +------------ +$ git url-parse --component host https://example.com/user/repo +example.com +------------ + +* Print the path: ++ +------------ +$ git url-parse --component path https://example.com/user/repo +/user/repo +$ git url-parse --component path example.com:~user/repo +~user/repo +$ git url-parse --component path example.com:user/repo +/user/repo +------------ + +* Validate URLs without outputting anything: ++ +------------ +$ git url-parse https://example.com/user/repo example.com:~user/repo +------------ + +SEE ALSO +-------- +linkgit:git-clone[1], +linkgit:git-fetch[1], +linkgit:git-config[1] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index d6365b888b..32c8606a80 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -155,6 +155,7 @@ manpages = { 'git-update-server-info.adoc' : 1, 'git-upload-archive.adoc' : 1, 'git-upload-pack.adoc' : 1, + 'git-url-parse.adoc' : 1, 'git-var.adoc' : 1, 'git-verify-commit.adoc' : 1, 'git-verify-pack.adoc' : 1, From 0e2149cff1c37c5f9602d515d1d39d9701d15e24 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:42 +0000 Subject: [PATCH 8/8] t9904: add tests for the new url-parse builtin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test git URL parsing, validation and component extraction on all documented git URL schemes and syntaxes. Add IPv6 host coverage in URL form: ssh://[::1]/path ssh://user@[::1]:1234/path git://[::1]:9418/path http://[2001:db8::1]/path https://[2001:db8::1]/path In URL form the brackets are kept in the host component (RFC 3986 syntax for IPv6 literals). Also exercise the bracketed scp short forms that t5601-clone.sh covers via parse_connect_url: [host]:path [host:port]:path [::1]:repo user@[::1]:repo user@[host:port]:path In scp form, brackets are kept for IPv6 literals (two or more inner colons) and stripped for plain hostnames or host:port pairs. Suggested-by: Torsten Bögershausen Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t9904-url-parse.sh | 319 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 320 insertions(+) create mode 100755 t/t9904-url-parse.sh diff --git a/t/meson.build b/t/meson.build index 7528e5cda5..41b389a472 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1114,6 +1114,7 @@ integration_tests = [ 't9901-git-web--browse.sh', 't9902-completion.sh', 't9903-bash-prompt.sh', + 't9904-url-parse.sh', ] benchmarks = [ diff --git a/t/t9904-url-parse.sh b/t/t9904-url-parse.sh new file mode 100755 index 0000000000..8a369d2040 --- /dev/null +++ b/t/t9904-url-parse.sh @@ -0,0 +1,319 @@ +#!/bin/sh +# +# Copyright (c) 2024 Matheus Afonso Martins Moreira +# + +test_description='git url-parse tests' + +. ./test-lib.sh + +test_expect_success 'git url-parse -- ssh syntax' ' + git url-parse "ssh://user@example.com:1234/repository/path" && + git url-parse "ssh://user@example.com/repository/path" && + git url-parse "ssh://example.com:1234/repository/path" && + git url-parse "ssh://example.com/repository/path" +' + +test_expect_success 'git url-parse -- git syntax' ' + git url-parse "git://example.com:1234/repository/path" && + git url-parse "git://example.com/repository/path" +' + +test_expect_success 'git url-parse -- http syntax' ' + git url-parse "https://example.com:1234/repository/path" && + git url-parse "https://example.com/repository/path" && + git url-parse "http://example.com:1234/repository/path" && + git url-parse "http://example.com/repository/path" +' + +test_expect_success 'git url-parse -- scp syntax' ' + git url-parse "user@example.com:/repository/path" && + git url-parse "example.com:/repository/path" +' + +test_expect_success 'git url-parse -- username expansion - ssh syntax' ' + git url-parse "ssh://user@example.com:1234/~user/repository" && + git url-parse "ssh://user@example.com/~user/repository" && + git url-parse "ssh://example.com:1234/~user/repository" && + git url-parse "ssh://example.com/~user/repository" +' + +test_expect_success 'git url-parse -- username expansion - git syntax' ' + git url-parse "git://example.com:1234/~user/repository" && + git url-parse "git://example.com/~user/repository" +' + +test_expect_success 'git url-parse -- username expansion - scp syntax' ' + git url-parse "user@example.com:~user/repository" && + git url-parse "example.com:~user/repository" +' + +test_expect_success 'git url-parse -- file urls' ' + git url-parse "file:///repository/path" && + git url-parse "file://" +' + +test_expect_success 'git url-parse -c scheme -- ssh syntax' ' + test ssh = "$(git url-parse -c scheme "ssh://user@example.com:1234/repository/path")" && + test ssh = "$(git url-parse -c scheme "ssh://user@example.com/repository/path")" && + test ssh = "$(git url-parse -c scheme "ssh://example.com:1234/repository/path")" && + test ssh = "$(git url-parse -c scheme "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c scheme -- git syntax' ' + test git = "$(git url-parse -c scheme "git://example.com:1234/repository/path")" && + test git = "$(git url-parse -c scheme "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c scheme -- http syntax' ' + test https = "$(git url-parse -c scheme "https://example.com:1234/repository/path")" && + test https = "$(git url-parse -c scheme "https://example.com/repository/path")" && + test http = "$(git url-parse -c scheme "http://example.com:1234/repository/path")" && + test http = "$(git url-parse -c scheme "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c scheme -- scp syntax' ' + test ssh = "$(git url-parse -c scheme "user@example.com:/repository/path")" && + test ssh = "$(git url-parse -c scheme "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c user -- ssh syntax' ' + test user = "$(git url-parse -c user "ssh://user@example.com:1234/repository/path")" && + test user = "$(git url-parse -c user "ssh://user@example.com/repository/path")" && + test "" = "$(git url-parse -c user "ssh://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c user -- git syntax' ' + test "" = "$(git url-parse -c user "git://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c user -- http syntax' ' + test "" = "$(git url-parse -c user "https://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "https://example.com/repository/path")" && + test "" = "$(git url-parse -c user "http://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c user -- scp syntax' ' + test user = "$(git url-parse -c user "user@example.com:/repository/path")" && + test "" = "$(git url-parse -c user "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c password -- http syntax' ' + test secret = "$(git url-parse -c password "https://user:secret@example.com:1234/repository/path")" && + test secret = "$(git url-parse -c password "http://user:secret@example.com/repository/path")" && + test "" = "$(git url-parse -c password "https://user@example.com/repository/path")" && + test "" = "$(git url-parse -c password "https://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- ssh syntax' ' + test example.com = "$(git url-parse -c host "ssh://user@example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "ssh://user@example.com/repository/path")" && + test example.com = "$(git url-parse -c host "ssh://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- git syntax' ' + test example.com = "$(git url-parse -c host "git://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- http syntax' ' + test example.com = "$(git url-parse -c host "https://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "https://example.com/repository/path")" && + test example.com = "$(git url-parse -c host "http://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- scp syntax' ' + test example.com = "$(git url-parse -c host "user@example.com:/repository/path")" && + test example.com = "$(git url-parse -c host "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c port -- ssh syntax' ' + test 1234 = "$(git url-parse -c port "ssh://user@example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "ssh://user@example.com/repository/path")" && + test 1234 = "$(git url-parse -c port "ssh://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c port -- git syntax' ' + test 1234 = "$(git url-parse -c port "git://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c port -- http syntax' ' + test 1234 = "$(git url-parse -c port "https://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "https://example.com/repository/path")" && + test 1234 = "$(git url-parse -c port "http://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c port -- scp syntax' ' + test "" = "$(git url-parse -c port "user@example.com:/repository/path")" && + test "" = "$(git url-parse -c port "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c path -- ssh syntax' ' + test "/repository/path" = "$(git url-parse -c path "ssh://user@example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "ssh://user@example.com/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "ssh://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c path -- git syntax' ' + test "/repository/path" = "$(git url-parse -c path "git://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c path -- http syntax' ' + test "/repository/path" = "$(git url-parse -c path "https://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "https://example.com/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "http://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c path -- scp syntax' ' + test "/repository/path" = "$(git url-parse -c path "user@example.com:/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c path -- username expansion - ssh syntax' ' + test "~user/repository" = "$(git url-parse -c path "ssh://user@example.com:1234/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "ssh://user@example.com/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "ssh://example.com:1234/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository")" +' + +test_expect_success 'git url-parse -c path -- username expansion - git syntax' ' + test "~user/repository" = "$(git url-parse -c path "git://example.com:1234/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "git://example.com/~user/repository")" +' + +test_expect_success 'git url-parse -c path -- username expansion - scp syntax' ' + test "~user/repository" = "$(git url-parse -c path "user@example.com:~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "example.com:~user/repository")" +' + +test_expect_success 'git url-parse -c path -- username expansion strips query and fragment' ' + test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository?query")" && + test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository#fragment")" && + test "~user/repository" = "$(git url-parse -c path "git://example.com/~user/repository?query")" && + test "~user/repository" = "$(git url-parse -c path "user@example.com:~user/repository?query")" +' + +test_expect_success 'git url-parse -- ssh syntax with IPv6' ' + git url-parse "ssh://user@[::1]:1234/repository/path" && + git url-parse "ssh://user@[::1]/repository/path" && + git url-parse "ssh://[::1]:1234/repository/path" && + git url-parse "ssh://[::1]/repository/path" && + git url-parse "ssh://[2001:db8::1]/repository/path" +' + +test_expect_success 'git url-parse -- git syntax with IPv6' ' + git url-parse "git://[::1]:9418/repository/path" && + git url-parse "git://[::1]/repository/path" +' + +test_expect_success 'git url-parse -- http syntax with IPv6' ' + git url-parse "https://[::1]:1234/repository/path" && + git url-parse "https://[::1]/repository/path" && + git url-parse "http://[2001:db8::1]/repository/path" +' + +test_expect_success 'git url-parse -c host -- IPv6 in URL form' ' + test "[::1]" = "$(git url-parse -c host "ssh://user@[::1]:1234/repository/path")" && + test "[::1]" = "$(git url-parse -c host "ssh://[::1]/repository/path")" && + test "[2001:db8::1]" = "$(git url-parse -c host "ssh://[2001:db8::1]/repository/path")" && + test "[::1]" = "$(git url-parse -c host "git://[::1]/repository/path")" && + test "[2001:db8::1]" = "$(git url-parse -c host "https://[2001:db8::1]/repository/path")" +' + +test_expect_success 'git url-parse -c port -- IPv6 in URL form' ' + test 1234 = "$(git url-parse -c port "ssh://user@[::1]:1234/repository/path")" && + test "" = "$(git url-parse -c port "ssh://[::1]/repository/path")" && + test 9418 = "$(git url-parse -c port "git://[::1]:9418/repository/path")" +' + +test_expect_success 'git url-parse -- scp syntax with IPv6' ' + git url-parse "[::1]:repository/path" && + git url-parse "user@[::1]:repository/path" && + git url-parse "[2001:db8::1]:repo" +' + +test_expect_success 'git url-parse -- scp syntax with bracketed hostname' ' + git url-parse "[myhost]:src" && + git url-parse "user@[myhost]:src" +' + +test_expect_success 'git url-parse -- scp syntax with bracketed host:port' ' + git url-parse "[myhost:123]:src" && + git url-parse "user@[myhost:123]:src" +' + +test_expect_success 'git url-parse -c host -- scp+IPv6' ' + test "[::1]" = "$(git url-parse -c host "[::1]:repository/path")" && + test "[::1]" = "$(git url-parse -c host "user@[::1]:repository/path")" && + test "[2001:db8::1]" = "$(git url-parse -c host "[2001:db8::1]:repo")" +' + +test_expect_success 'git url-parse -c path -- scp+IPv6' ' + test "/repository/path" = "$(git url-parse -c path "[::1]:/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "[::1]:repository/path")" && + test "/repo" = "$(git url-parse -c path "[2001:db8::1]:repo")" +' + +test_expect_success 'git url-parse -c host,port,path -- scp [host:port]:src' ' + test myhost = "$(git url-parse -c host "[myhost:123]:src")" && + test 123 = "$(git url-parse -c port "[myhost:123]:src")" && + test "/src" = "$(git url-parse -c path "[myhost:123]:src")" +' + +test_expect_success 'git url-parse -c host,path -- scp [host]:src' ' + test myhost = "$(git url-parse -c host "[myhost]:src")" && + test "/src" = "$(git url-parse -c path "[myhost]:src")" +' + +test_expect_success 'git url-parse -c user -- scp with user@ and brackets' ' + test user = "$(git url-parse -c user "user@[::1]:repo")" && + test user = "$(git url-parse -c user "user@[myhost:123]:src")" && + test user = "$(git url-parse -c user "user@[myhost]:src")" +' + +test_expect_success 'git url-parse -- scp+IPv6 with username expansion' ' + test "~user/repo" = "$(git url-parse -c path "[::1]:~user/repo")" && + test "~user/repo" = "$(git url-parse -c path "user@[::1]:~user/repo")" +' + +test_expect_success 'git url-parse fails on invalid URL' ' + test_must_fail git url-parse "not a url" +' + +test_expect_success 'git url-parse helpful error for absolute local path' ' + test_must_fail git url-parse "/abs/path" 2>err && + test_grep "is not a URL" err && + test_grep "file:///" err +' + +test_expect_success 'git url-parse helpful error for relative local path' ' + test_must_fail git url-parse "./rel" 2>err && + test_grep "is not a URL" err && + test_grep "absolute path" err +' + +test_expect_success 'git url-parse fails on unknown -c component name' ' + test_must_fail git url-parse -c bogus "https://example.com/repo" +' + +test_expect_success 'git url-parse fails on URL missing host' ' + test_must_fail git url-parse "https://" +' + +test_expect_success 'git url-parse with no URL prints usage' ' + test_must_fail git url-parse 2>err && + test_grep "usage:" err +' + +test_done