diff --git a/.gitignore b/.gitignore index e406d3741c..4da58c6754 100644 --- a/.gitignore +++ b/.gitignore @@ -183,6 +183,7 @@ /git-update-server-info /git-upload-archive /git-upload-pack +/git-url-parse /git-var /git-verify-commit /git-verify-pack diff --git a/Documentation/git-url-parse.adoc b/Documentation/git-url-parse.adoc new file mode 100644 index 0000000000..9d0d93da4a --- /dev/null +++ b/Documentation/git-url-parse.adoc @@ -0,0 +1,80 @@ +git-url-parse(1) +================ + +NAME +---- +git-url-parse - Parse and extract git URL components + +SYNOPSIS +-------- +[synopsis] +git url-parse [-c ] [--] ... + +DESCRIPTION +----------- + +Git supports many ways to specify URLs, some of them non-standard. +For example, git supports the scp style [user@]host:[path] format. +This command eases interoperability with git URLs by enabling the +parsing and extraction of the components of all git URLs. + +Any syntactically valid URL is parsed, even if the scheme is not one +git supports for fetching or pushing. + +OPTIONS +------- + +`-c `:: +`--component `:: + Extract the __ component from the given Git URLs. + __ can be one of: + `scheme`, `user`, `password`, `host`, `port`, `path`. + +OUTPUT +------ + +When `--component` is given, the requested component of each URL +is printed on its own line, in the order the URLs were given. If +the URL has no such component (for example, a port in a URL that +does not specify one), an empty line is printed in its place. + +When `--component` is not given, no output is produced. The exit +status is zero if every URL parses successfully and non-zero +otherwise, allowing the command to be used purely as a validator. + +EXAMPLES +-------- + +* Print the host name: ++ +------------ +$ git url-parse --component host https://example.com/user/repo +example.com +------------ + +* Print the path: ++ +------------ +$ git url-parse --component path https://example.com/user/repo +/user/repo +$ git url-parse --component path example.com:~user/repo +~user/repo +$ git url-parse --component path example.com:user/repo +/user/repo +------------ + +* Validate URLs without outputting anything: ++ +------------ +$ git url-parse https://example.com/user/repo example.com:~user/repo +------------ + +SEE ALSO +-------- +linkgit:git-clone[1], +linkgit:git-fetch[1], +linkgit:git-config[1] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index 58e7c6a0b8..f4854f802d 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -156,6 +156,7 @@ manpages = { 'git-update-server-info.adoc' : 1, 'git-upload-archive.adoc' : 1, 'git-upload-pack.adoc' : 1, + 'git-url-parse.adoc' : 1, 'git-var.adoc' : 1, 'git-verify-commit.adoc' : 1, 'git-verify-pack.adoc' : 1, diff --git a/Makefile b/Makefile index c1f0074f32..fb50c57e4f 100644 --- a/Makefile +++ b/Makefile @@ -1495,6 +1495,7 @@ BUILTIN_OBJS += builtin/update-ref.o BUILTIN_OBJS += builtin/update-server-info.o BUILTIN_OBJS += builtin/upload-archive.o BUILTIN_OBJS += builtin/upload-pack.o +BUILTIN_OBJS += builtin/url-parse.o BUILTIN_OBJS += builtin/var.o BUILTIN_OBJS += builtin/verify-commit.o BUILTIN_OBJS += builtin/verify-pack.o diff --git a/builtin.h b/builtin.h index 63813c9012..4e47a4ebd3 100644 --- a/builtin.h +++ b/builtin.h @@ -272,6 +272,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix, stru int cmd_upload_archive(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_upload_pack(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_url_parse(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_var(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_verify_commit(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_verify_tag(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/url-parse.c b/builtin/url-parse.c new file mode 100644 index 0000000000..7e705538c0 --- /dev/null +++ b/builtin/url-parse.c @@ -0,0 +1,135 @@ +#include "builtin.h" +#include "gettext.h" +#include "parse-options.h" +#include "url.h" +#include "urlmatch.h" + +static const char * const builtin_url_parse_usage[] = { + N_("git url-parse [-c ] [--] ..."), + NULL +}; + +static char *component_arg; + +static struct option builtin_url_parse_options[] = { + OPT_STRING('c', "component", &component_arg, N_("component"), + N_("which URL component to extract")), + OPT_END(), +}; + +enum url_component { + URL_NONE = 0, + URL_SCHEME, + URL_USER, + URL_PASSWORD, + URL_HOST, + URL_PORT, + URL_PATH, +}; + +static void parse_or_die(const char *url, struct url_info *info) +{ + if (url_is_local_not_ssh(url)) { + if (*url == '/') + die("'%s' is not a URL; if you meant a local " + "repository, use 'file://%s'", url, url); + if (has_dos_drive_prefix(url)) + die("'%s' is not a URL; if you meant a local " + "repository, use 'file:///%s'", url, url); + die("'%s' is not a URL; if you meant a local repository, " + "use a 'file://' URL with an absolute path", url); + } + if (!url_parse(url, info)) + die("invalid git URL '%s': %s", url, info->err); +} + +static enum url_component get_component_or_die(const char *arg) +{ + if (!strcmp("path", arg)) + return URL_PATH; + if (!strcmp("host", arg)) + return URL_HOST; + if (!strcmp("scheme", arg)) + return URL_SCHEME; + if (!strcmp("user", arg)) + return URL_USER; + if (!strcmp("password", arg)) + return URL_PASSWORD; + if (!strcmp("port", arg)) + return URL_PORT; + die("invalid git URL component '%s'", arg); +} + +static char *extract_component(enum url_component component, + struct url_info *info) +{ + size_t offset, length; + + switch (component) { + case URL_SCHEME: + offset = 0; + length = info->scheme_len; + break; + case URL_USER: + offset = info->user_off; + length = info->user_len; + break; + case URL_PASSWORD: + offset = info->passwd_off; + length = info->passwd_len; + break; + case URL_HOST: + offset = info->host_off; + length = info->host_len; + break; + case URL_PORT: + offset = info->port_off; + length = info->port_len; + break; + case URL_PATH: + offset = info->path_off; + length = info->path_len; + break; + case URL_NONE: + return NULL; + } + + return xstrndup(info->url + offset, length); +} + +int cmd_url_parse(int argc, + const char **argv, + const char *prefix, + struct repository *repo UNUSED) +{ + struct url_info info; + enum url_component selected = URL_NONE; + char *extracted; + int i; + + argc = parse_options(argc, argv, prefix, builtin_url_parse_options, + builtin_url_parse_usage, 0); + + if (argc == 0) + usage_with_options(builtin_url_parse_usage, + builtin_url_parse_options); + + if (component_arg) + selected = get_component_or_die(component_arg); + + for (i = 0; i < argc; i++) { + parse_or_die(argv[i], &info); + + if (selected != URL_NONE) { + extracted = extract_component(selected, &info); + if (extracted) { + puts(extracted); + free(extracted); + } + } + + free(info.url); + } + + return 0; +} diff --git a/command-list.txt b/command-list.txt index df729872dc..21b802c420 100644 --- a/command-list.txt +++ b/command-list.txt @@ -203,6 +203,7 @@ git-update-ref plumbingmanipulators git-update-server-info synchingrepositories git-upload-archive synchelpers git-upload-pack synchelpers +git-url-parse purehelpers git-var plumbinginterrogators git-verify-commit ancillaryinterrogators git-verify-pack plumbinginterrogators diff --git a/connect.c b/connect.c index a02583a102..60e4237470 100644 --- a/connect.c +++ b/connect.c @@ -700,51 +700,21 @@ int server_supports(const char *feature) return !!server_feature_value(feature, NULL); } -enum protocol { - PROTO_LOCAL = 1, - PROTO_FILE, - PROTO_SSH, - PROTO_GIT -}; - -int url_is_local_not_ssh(const char *url) +static const char *url_scheme_name(enum url_scheme scheme) { - const char *colon = strchr(url, ':'); - const char *slash = strchr(url, '/'); - return !colon || (slash && slash < colon) || - (has_dos_drive_prefix(url) && is_valid_path(url)); -} - -static const char *prot_name(enum protocol protocol) -{ - switch (protocol) { - case PROTO_LOCAL: - case PROTO_FILE: + switch (scheme) { + case URL_SCHEME_LOCAL: + case URL_SCHEME_FILE: return "file"; - case PROTO_SSH: + case URL_SCHEME_SSH: return "ssh"; - case PROTO_GIT: + case URL_SCHEME_GIT: return "git"; default: return "unknown protocol"; } } -static enum protocol get_protocol(const char *name) -{ - if (!strcmp(name, "ssh")) - return PROTO_SSH; - if (!strcmp(name, "git")) - return PROTO_GIT; - if (!strcmp(name, "git+ssh")) /* deprecated - do not use */ - return PROTO_SSH; - if (!strcmp(name, "ssh+git")) /* deprecated - do not use */ - return PROTO_SSH; - if (!strcmp(name, "file")) - return PROTO_FILE; - die(_("protocol '%s' is not supported"), name); -} - static char *host_end(char **hoststart, int removebrackets) { char *host = *hoststart; @@ -1081,14 +1051,14 @@ static char *get_port(char *host) * Extract protocol and relevant parts from the specified connection URL. * The caller must free() the returned strings. */ -static enum protocol parse_connect_url(const char *url_orig, char **ret_host, - char **ret_path) +static enum url_scheme parse_connect_url(const char *url_orig, char **ret_host, + char **ret_path) { char *url; char *host, *path; char *end; int separator = '/'; - enum protocol protocol = PROTO_LOCAL; + enum url_scheme scheme = URL_SCHEME_LOCAL; if (is_url(url_orig)) url = url_decode(url_orig); @@ -1098,12 +1068,14 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, host = strstr(url, "://"); if (host) { *host = '\0'; - protocol = get_protocol(url); + scheme = url_get_scheme(url); + if (scheme == URL_SCHEME_UNKNOWN) + die(_("protocol '%s' is not supported"), url); host += 3; } else { host = url; if (!url_is_local_not_ssh(url)) { - protocol = PROTO_SSH; + scheme = URL_SCHEME_SSH; separator = ':'; } } @@ -1114,13 +1086,13 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, */ end = host_end(&host, 0); - if (protocol == PROTO_LOCAL) + if (scheme == URL_SCHEME_LOCAL) path = end; - else if (protocol == PROTO_FILE && *host != '/' && + else if (scheme == URL_SCHEME_FILE && *host != '/' && !has_dos_drive_prefix(host) && offset_1st_component(host - 2) > 1) path = host - 2; /* include the leading "//" */ - else if (protocol == PROTO_FILE && has_dos_drive_prefix(end)) + else if (scheme == URL_SCHEME_FILE && has_dos_drive_prefix(end)) path = end; /* "file://$(pwd)" may be "file://C:/projects/repo" */ else path = strchr(end, separator); @@ -1136,7 +1108,7 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, end = path; /* Need to \0 terminate host here */ if (separator == ':') path++; /* path starts after ':' */ - if (protocol == PROTO_GIT || protocol == PROTO_SSH) { + if (scheme == URL_SCHEME_GIT || scheme == URL_SCHEME_SSH) { if (path[1] == '~') path++; } @@ -1147,7 +1119,7 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host, *ret_host = xstrdup(host); *ret_path = path; free(url); - return protocol; + return scheme; } static const char *get_ssh_command(void) @@ -1432,7 +1404,7 @@ struct child_process *git_connect(int fd[2], const char *url, { char *hostandport, *path; struct child_process *conn; - enum protocol protocol; + enum url_scheme scheme; enum protocol_version version = get_protocol_version_config(); /* @@ -1449,14 +1421,14 @@ struct child_process *git_connect(int fd[2], const char *url, */ signal(SIGCHLD, SIG_DFL); - protocol = parse_connect_url(url, &hostandport, &path); - if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) { + scheme = parse_connect_url(url, &hostandport, &path); + if ((flags & CONNECT_DIAG_URL) && (scheme != URL_SCHEME_SSH)) { printf("Diag: url=%s\n", url ? url : "NULL"); - printf("Diag: protocol=%s\n", prot_name(protocol)); + printf("Diag: protocol=%s\n", url_scheme_name(scheme)); printf("Diag: hostandport=%s\n", hostandport ? hostandport : "NULL"); printf("Diag: path=%s\n", path ? path : "NULL"); conn = NULL; - } else if (protocol == PROTO_GIT) { + } else if (scheme == URL_SCHEME_GIT) { conn = git_connect_git(fd, hostandport, path, prog, version, flags); conn->trace2_child_class = "transport/git"; } else { @@ -1479,7 +1451,7 @@ struct child_process *git_connect(int fd[2], const char *url, conn->use_shell = 1; conn->in = conn->out = -1; - if (protocol == PROTO_SSH) { + if (scheme == URL_SCHEME_SSH) { char *ssh_host = hostandport; const char *port = NULL; transport_check_allowed("ssh"); @@ -1490,7 +1462,7 @@ struct child_process *git_connect(int fd[2], const char *url, if (flags & CONNECT_DIAG_URL) { printf("Diag: url=%s\n", url ? url : "NULL"); - printf("Diag: protocol=%s\n", prot_name(protocol)); + printf("Diag: protocol=%s\n", url_scheme_name(scheme)); printf("Diag: userandhost=%s\n", ssh_host ? ssh_host : "NULL"); printf("Diag: port=%s\n", port ? port : "NONE"); printf("Diag: path=%s\n", path ? path : "NULL"); diff --git a/connect.h b/connect.h index 1645126c17..8d84f6656b 100644 --- a/connect.h +++ b/connect.h @@ -13,7 +13,6 @@ int git_connection_is_socket(struct child_process *conn); int server_supports(const char *feature); int parse_feature_request(const char *features, const char *feature); const char *server_feature_value(const char *feature, size_t *len_ret); -int url_is_local_not_ssh(const char *url); struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); diff --git a/git.c b/git.c index 1f2a0864e7..5dadc86948 100644 --- a/git.c +++ b/git.c @@ -671,6 +671,7 @@ static struct cmd_struct commands[] = { { "upload-archive", cmd_upload_archive, NO_PARSEOPT }, { "upload-archive--writer", cmd_upload_archive_writer, NO_PARSEOPT }, { "upload-pack", cmd_upload_pack }, + { "url-parse", cmd_url_parse }, { "var", cmd_var, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "verify-commit", cmd_verify_commit, RUN_SETUP }, { "verify-pack", cmd_verify_pack }, diff --git a/meson.build b/meson.build index 91a883348d..052c81f288 100644 --- a/meson.build +++ b/meson.build @@ -698,6 +698,7 @@ builtin_sources = [ 'builtin/update-server-info.c', 'builtin/upload-archive.c', 'builtin/upload-pack.c', + 'builtin/url-parse.c', 'builtin/var.c', 'builtin/verify-commit.c', 'builtin/verify-pack.c', diff --git a/remote.c b/remote.c index a664cd166a..24a8118d25 100644 --- a/remote.c +++ b/remote.c @@ -8,6 +8,7 @@ #include "gettext.h" #include "hex.h" #include "remote.h" +#include "url.h" #include "urlmatch.h" #include "refs.h" #include "refspec.h" diff --git a/t/meson.build b/t/meson.build index f502ad8ec9..fd955f44ef 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1115,6 +1115,7 @@ integration_tests = [ 't9901-git-web--browse.sh', 't9902-completion.sh', 't9903-bash-prompt.sh', + 't9904-url-parse.sh', ] benchmarks = [ diff --git a/t/t9904-url-parse.sh b/t/t9904-url-parse.sh new file mode 100755 index 0000000000..8a369d2040 --- /dev/null +++ b/t/t9904-url-parse.sh @@ -0,0 +1,319 @@ +#!/bin/sh +# +# Copyright (c) 2024 Matheus Afonso Martins Moreira +# + +test_description='git url-parse tests' + +. ./test-lib.sh + +test_expect_success 'git url-parse -- ssh syntax' ' + git url-parse "ssh://user@example.com:1234/repository/path" && + git url-parse "ssh://user@example.com/repository/path" && + git url-parse "ssh://example.com:1234/repository/path" && + git url-parse "ssh://example.com/repository/path" +' + +test_expect_success 'git url-parse -- git syntax' ' + git url-parse "git://example.com:1234/repository/path" && + git url-parse "git://example.com/repository/path" +' + +test_expect_success 'git url-parse -- http syntax' ' + git url-parse "https://example.com:1234/repository/path" && + git url-parse "https://example.com/repository/path" && + git url-parse "http://example.com:1234/repository/path" && + git url-parse "http://example.com/repository/path" +' + +test_expect_success 'git url-parse -- scp syntax' ' + git url-parse "user@example.com:/repository/path" && + git url-parse "example.com:/repository/path" +' + +test_expect_success 'git url-parse -- username expansion - ssh syntax' ' + git url-parse "ssh://user@example.com:1234/~user/repository" && + git url-parse "ssh://user@example.com/~user/repository" && + git url-parse "ssh://example.com:1234/~user/repository" && + git url-parse "ssh://example.com/~user/repository" +' + +test_expect_success 'git url-parse -- username expansion - git syntax' ' + git url-parse "git://example.com:1234/~user/repository" && + git url-parse "git://example.com/~user/repository" +' + +test_expect_success 'git url-parse -- username expansion - scp syntax' ' + git url-parse "user@example.com:~user/repository" && + git url-parse "example.com:~user/repository" +' + +test_expect_success 'git url-parse -- file urls' ' + git url-parse "file:///repository/path" && + git url-parse "file://" +' + +test_expect_success 'git url-parse -c scheme -- ssh syntax' ' + test ssh = "$(git url-parse -c scheme "ssh://user@example.com:1234/repository/path")" && + test ssh = "$(git url-parse -c scheme "ssh://user@example.com/repository/path")" && + test ssh = "$(git url-parse -c scheme "ssh://example.com:1234/repository/path")" && + test ssh = "$(git url-parse -c scheme "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c scheme -- git syntax' ' + test git = "$(git url-parse -c scheme "git://example.com:1234/repository/path")" && + test git = "$(git url-parse -c scheme "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c scheme -- http syntax' ' + test https = "$(git url-parse -c scheme "https://example.com:1234/repository/path")" && + test https = "$(git url-parse -c scheme "https://example.com/repository/path")" && + test http = "$(git url-parse -c scheme "http://example.com:1234/repository/path")" && + test http = "$(git url-parse -c scheme "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c scheme -- scp syntax' ' + test ssh = "$(git url-parse -c scheme "user@example.com:/repository/path")" && + test ssh = "$(git url-parse -c scheme "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c user -- ssh syntax' ' + test user = "$(git url-parse -c user "ssh://user@example.com:1234/repository/path")" && + test user = "$(git url-parse -c user "ssh://user@example.com/repository/path")" && + test "" = "$(git url-parse -c user "ssh://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c user -- git syntax' ' + test "" = "$(git url-parse -c user "git://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c user -- http syntax' ' + test "" = "$(git url-parse -c user "https://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "https://example.com/repository/path")" && + test "" = "$(git url-parse -c user "http://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c user "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c user -- scp syntax' ' + test user = "$(git url-parse -c user "user@example.com:/repository/path")" && + test "" = "$(git url-parse -c user "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c password -- http syntax' ' + test secret = "$(git url-parse -c password "https://user:secret@example.com:1234/repository/path")" && + test secret = "$(git url-parse -c password "http://user:secret@example.com/repository/path")" && + test "" = "$(git url-parse -c password "https://user@example.com/repository/path")" && + test "" = "$(git url-parse -c password "https://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- ssh syntax' ' + test example.com = "$(git url-parse -c host "ssh://user@example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "ssh://user@example.com/repository/path")" && + test example.com = "$(git url-parse -c host "ssh://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- git syntax' ' + test example.com = "$(git url-parse -c host "git://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- http syntax' ' + test example.com = "$(git url-parse -c host "https://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "https://example.com/repository/path")" && + test example.com = "$(git url-parse -c host "http://example.com:1234/repository/path")" && + test example.com = "$(git url-parse -c host "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c host -- scp syntax' ' + test example.com = "$(git url-parse -c host "user@example.com:/repository/path")" && + test example.com = "$(git url-parse -c host "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c port -- ssh syntax' ' + test 1234 = "$(git url-parse -c port "ssh://user@example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "ssh://user@example.com/repository/path")" && + test 1234 = "$(git url-parse -c port "ssh://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c port -- git syntax' ' + test 1234 = "$(git url-parse -c port "git://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c port -- http syntax' ' + test 1234 = "$(git url-parse -c port "https://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "https://example.com/repository/path")" && + test 1234 = "$(git url-parse -c port "http://example.com:1234/repository/path")" && + test "" = "$(git url-parse -c port "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c port -- scp syntax' ' + test "" = "$(git url-parse -c port "user@example.com:/repository/path")" && + test "" = "$(git url-parse -c port "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c path -- ssh syntax' ' + test "/repository/path" = "$(git url-parse -c path "ssh://user@example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "ssh://user@example.com/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "ssh://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "ssh://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c path -- git syntax' ' + test "/repository/path" = "$(git url-parse -c path "git://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "git://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c path -- http syntax' ' + test "/repository/path" = "$(git url-parse -c path "https://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "https://example.com/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "http://example.com:1234/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "http://example.com/repository/path")" +' + +test_expect_success 'git url-parse -c path -- scp syntax' ' + test "/repository/path" = "$(git url-parse -c path "user@example.com:/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "example.com:/repository/path")" +' + +test_expect_success 'git url-parse -c path -- username expansion - ssh syntax' ' + test "~user/repository" = "$(git url-parse -c path "ssh://user@example.com:1234/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "ssh://user@example.com/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "ssh://example.com:1234/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository")" +' + +test_expect_success 'git url-parse -c path -- username expansion - git syntax' ' + test "~user/repository" = "$(git url-parse -c path "git://example.com:1234/~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "git://example.com/~user/repository")" +' + +test_expect_success 'git url-parse -c path -- username expansion - scp syntax' ' + test "~user/repository" = "$(git url-parse -c path "user@example.com:~user/repository")" && + test "~user/repository" = "$(git url-parse -c path "example.com:~user/repository")" +' + +test_expect_success 'git url-parse -c path -- username expansion strips query and fragment' ' + test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository?query")" && + test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository#fragment")" && + test "~user/repository" = "$(git url-parse -c path "git://example.com/~user/repository?query")" && + test "~user/repository" = "$(git url-parse -c path "user@example.com:~user/repository?query")" +' + +test_expect_success 'git url-parse -- ssh syntax with IPv6' ' + git url-parse "ssh://user@[::1]:1234/repository/path" && + git url-parse "ssh://user@[::1]/repository/path" && + git url-parse "ssh://[::1]:1234/repository/path" && + git url-parse "ssh://[::1]/repository/path" && + git url-parse "ssh://[2001:db8::1]/repository/path" +' + +test_expect_success 'git url-parse -- git syntax with IPv6' ' + git url-parse "git://[::1]:9418/repository/path" && + git url-parse "git://[::1]/repository/path" +' + +test_expect_success 'git url-parse -- http syntax with IPv6' ' + git url-parse "https://[::1]:1234/repository/path" && + git url-parse "https://[::1]/repository/path" && + git url-parse "http://[2001:db8::1]/repository/path" +' + +test_expect_success 'git url-parse -c host -- IPv6 in URL form' ' + test "[::1]" = "$(git url-parse -c host "ssh://user@[::1]:1234/repository/path")" && + test "[::1]" = "$(git url-parse -c host "ssh://[::1]/repository/path")" && + test "[2001:db8::1]" = "$(git url-parse -c host "ssh://[2001:db8::1]/repository/path")" && + test "[::1]" = "$(git url-parse -c host "git://[::1]/repository/path")" && + test "[2001:db8::1]" = "$(git url-parse -c host "https://[2001:db8::1]/repository/path")" +' + +test_expect_success 'git url-parse -c port -- IPv6 in URL form' ' + test 1234 = "$(git url-parse -c port "ssh://user@[::1]:1234/repository/path")" && + test "" = "$(git url-parse -c port "ssh://[::1]/repository/path")" && + test 9418 = "$(git url-parse -c port "git://[::1]:9418/repository/path")" +' + +test_expect_success 'git url-parse -- scp syntax with IPv6' ' + git url-parse "[::1]:repository/path" && + git url-parse "user@[::1]:repository/path" && + git url-parse "[2001:db8::1]:repo" +' + +test_expect_success 'git url-parse -- scp syntax with bracketed hostname' ' + git url-parse "[myhost]:src" && + git url-parse "user@[myhost]:src" +' + +test_expect_success 'git url-parse -- scp syntax with bracketed host:port' ' + git url-parse "[myhost:123]:src" && + git url-parse "user@[myhost:123]:src" +' + +test_expect_success 'git url-parse -c host -- scp+IPv6' ' + test "[::1]" = "$(git url-parse -c host "[::1]:repository/path")" && + test "[::1]" = "$(git url-parse -c host "user@[::1]:repository/path")" && + test "[2001:db8::1]" = "$(git url-parse -c host "[2001:db8::1]:repo")" +' + +test_expect_success 'git url-parse -c path -- scp+IPv6' ' + test "/repository/path" = "$(git url-parse -c path "[::1]:/repository/path")" && + test "/repository/path" = "$(git url-parse -c path "[::1]:repository/path")" && + test "/repo" = "$(git url-parse -c path "[2001:db8::1]:repo")" +' + +test_expect_success 'git url-parse -c host,port,path -- scp [host:port]:src' ' + test myhost = "$(git url-parse -c host "[myhost:123]:src")" && + test 123 = "$(git url-parse -c port "[myhost:123]:src")" && + test "/src" = "$(git url-parse -c path "[myhost:123]:src")" +' + +test_expect_success 'git url-parse -c host,path -- scp [host]:src' ' + test myhost = "$(git url-parse -c host "[myhost]:src")" && + test "/src" = "$(git url-parse -c path "[myhost]:src")" +' + +test_expect_success 'git url-parse -c user -- scp with user@ and brackets' ' + test user = "$(git url-parse -c user "user@[::1]:repo")" && + test user = "$(git url-parse -c user "user@[myhost:123]:src")" && + test user = "$(git url-parse -c user "user@[myhost]:src")" +' + +test_expect_success 'git url-parse -- scp+IPv6 with username expansion' ' + test "~user/repo" = "$(git url-parse -c path "[::1]:~user/repo")" && + test "~user/repo" = "$(git url-parse -c path "user@[::1]:~user/repo")" +' + +test_expect_success 'git url-parse fails on invalid URL' ' + test_must_fail git url-parse "not a url" +' + +test_expect_success 'git url-parse helpful error for absolute local path' ' + test_must_fail git url-parse "/abs/path" 2>err && + test_grep "is not a URL" err && + test_grep "file:///" err +' + +test_expect_success 'git url-parse helpful error for relative local path' ' + test_must_fail git url-parse "./rel" 2>err && + test_grep "is not a URL" err && + test_grep "absolute path" err +' + +test_expect_success 'git url-parse fails on unknown -c component name' ' + test_must_fail git url-parse -c bogus "https://example.com/repo" +' + +test_expect_success 'git url-parse fails on URL missing host' ' + test_must_fail git url-parse "https://" +' + +test_expect_success 'git url-parse with no URL prints usage' ' + test_must_fail git url-parse 2>err && + test_grep "usage:" err +' + +test_done diff --git a/t/unit-tests/u-urlmatch-normalization.c b/t/unit-tests/u-urlmatch-normalization.c index 39f6e1ba26..3595d893a2 100644 --- a/t/unit-tests/u-urlmatch-normalization.c +++ b/t/unit-tests/u-urlmatch-normalization.c @@ -245,3 +245,48 @@ void test_urlmatch_normalization__equivalents(void) compare_normalized_urls("https://@x.y/^/../abc", "httpS://@x.y:0443/abc", 1); compare_normalized_urls("https://@x.y/^/..", "httpS://@x.y:0443/", 1); } + +static void check_parsed_path(const char *url, const char *expected_path) +{ + struct url_info info; + char *parsed = url_parse(url, &info); + char *path; + + cl_assert(parsed != NULL); + path = xstrndup(parsed + info.path_off, info.path_len); + cl_assert_equal_s(path, expected_path); + free(path); + free(parsed); +} + +void test_urlmatch_normalization__parse_scp(void) +{ + check_parsed_path("host:path", "/path"); + check_parsed_path("user@host:path", "/path"); + check_parsed_path("host:~user/repo", "~user/repo"); + check_parsed_path("user@host:~user/repo", "~user/repo"); + check_parsed_path("[host]:src", "/src"); + check_parsed_path("[host:123]:src", "/src"); + check_parsed_path("[::1]:repo", "/repo"); + check_parsed_path("user@[::1]:repo", "/repo"); +} + +void test_urlmatch_normalization__parse_url_form(void) +{ + check_parsed_path("ssh://host/repo", "/repo"); + check_parsed_path("ssh://host/~user/repo", "~user/repo"); + check_parsed_path("git://host:9418/repo", "/repo"); + check_parsed_path("git://host/~user/repo", "~user/repo"); + check_parsed_path("ssh://[::1]:1234/repo", "/repo"); + check_parsed_path("http://[2001:db8::1]/repo", "/repo"); +} + +void test_urlmatch_normalization__parse_strips_query_and_fragment(void) +{ + check_parsed_path("ssh://host/~user/repo?q", "~user/repo"); + check_parsed_path("ssh://host/~user/repo#frag", "~user/repo"); + check_parsed_path("git://host/~user/repo?q", "~user/repo"); + check_parsed_path("user@host:~user/repo?q", "~user/repo"); + check_parsed_path("https://host/repo?q", "/repo"); + check_parsed_path("https://host/repo#frag", "/repo"); +} diff --git a/url.c b/url.c index 3ca5987e90..a59818278f 100644 --- a/url.c +++ b/url.c @@ -132,3 +132,26 @@ void str_end_url_with_slash(const char *url, char **dest) free(*dest); *dest = strbuf_detach(&buf, NULL); } + +int url_is_local_not_ssh(const char *url) +{ + const char *colon = strchr(url, ':'); + const char *slash = strchr(url, '/'); + return !colon || (slash && slash < colon) || + (has_dos_drive_prefix(url) && is_valid_path(url)); +} + +enum url_scheme url_get_scheme(const char *name) +{ + if (!strcmp(name, "ssh")) + return URL_SCHEME_SSH; + if (!strcmp(name, "git")) + return URL_SCHEME_GIT; + if (!strcmp(name, "git+ssh")) /* deprecated - do not use */ + return URL_SCHEME_SSH; + if (!strcmp(name, "ssh+git")) /* deprecated - do not use */ + return URL_SCHEME_SSH; + if (!strcmp(name, "file")) + return URL_SCHEME_FILE; + return URL_SCHEME_UNKNOWN; +} diff --git a/url.h b/url.h index cd9140e994..7289523605 100644 --- a/url.h +++ b/url.h @@ -21,6 +21,22 @@ char *url_decode_parameter_value(const char **query); void end_url_with_slash(struct strbuf *buf, const char *url); void str_end_url_with_slash(const char *url, char **dest); +int url_is_local_not_ssh(const char *url); + +enum url_scheme { + URL_SCHEME_UNKNOWN = 0, + URL_SCHEME_LOCAL, + URL_SCHEME_FILE, + URL_SCHEME_SSH, + URL_SCHEME_GIT, +}; + +/* + * Identify the URL scheme by name. Returns URL_SCHEME_UNKNOWN + * if the name does not match any scheme that Git knows about. + */ +enum url_scheme url_get_scheme(const char *name); + /* * The set of unreserved characters as per STD66 (RFC3986) is * '[A-Za-z0-9-._~]'. These characters are safe to appear in URI diff --git a/urlmatch.c b/urlmatch.c index eea8300489..bf8cce6de9 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -5,6 +5,7 @@ #include "hex-ll.h" #include "strbuf.h" #include "urlmatch.h" +#include "url.h" #define URL_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" #define URL_DIGIT "0123456789" @@ -440,6 +441,132 @@ char *url_normalize(const char *url, struct url_info *out_info) return url_normalize_1(url, out_info, 0); } +char *url_parse(const char *url_orig, struct url_info *out_info) +{ + struct strbuf url; + char *host, *separator; + char *detached, *normalized; + char *url_decoded; + enum url_scheme scheme = URL_SCHEME_LOCAL; + struct url_info local_info; + struct url_info *info = out_info ? out_info : &local_info; + bool scp_syntax = false; + + if (is_url(url_orig)) + url_decoded = url_decode(url_orig); + else + url_decoded = xstrdup(url_orig); + + strbuf_init(&url, strlen(url_decoded) + sizeof("ssh://")); + strbuf_addstr(&url, url_decoded); + free(url_decoded); + + host = strstr(url.buf, "://"); + if (host) { + /* + * Temporarily NUL-terminate the scheme name + * so we can pass it to url_get_scheme(), + * then restore the ':' so the buffer + * is intact for url_normalize() below. + */ + char saved = *host; + *host = '\0'; + scheme = url_get_scheme(url.buf); + *host = saved; + host += 3; + } else { + if (!url_is_local_not_ssh(url.buf)) { + scp_syntax = true; + scheme = URL_SCHEME_SSH; + strbuf_insertstr(&url, 0, "ssh://"); + host = url.buf + strlen("ssh://"); + } + } + + /* + * Path starts after ':' in scp style SSH URLs. + * + * The host portion can begin with an optional "user@", + * and the host itself can be wrapped in '[' ']' brackets. + * The bracket form is git's legacy way of supporting: + * + * - IPv6 literals: [::1]:repo + * - host:port pairs in the short form: [myhost:123]:src + * - Plain hostnames that happen to need bracketing: [host]:path + * + * Treat '[' followed by 0 or 1 inner colons as the host:port + * or plain hostname form and strip the brackets so url_normalize + * sees host[:port] natively. Two or more inner colons mark an + * IPv6 literal: keep the brackets for url_normalize to recognize. + * + * The scp path separator is the ':' that follows the host part, + * and we must skip over user@ and any '[...]' before searching. + */ + if (scp_syntax) { + char *user_at; + char *host_start; + char *bracket_end; + + user_at = strchr(host, '@'); + host_start = user_at ? user_at + 1 : host; + + if (*host_start == '[') { + char *p; + int inner_colons; + + bracket_end = strchr(host_start, ']'); + inner_colons = 0; + for (p = host_start + 1; bracket_end && p < bracket_end; p++) + if (*p == ':') + inner_colons++; + + if (bracket_end && inner_colons <= 1) { + size_t close_off = bracket_end - url.buf; + size_t open_off = host_start - url.buf; + strbuf_remove(&url, close_off, 1); + strbuf_remove(&url, open_off, 1); + separator = url.buf + close_off - 1; + } else if (bracket_end) { + separator = strchr(bracket_end + 1, ':'); + } else { + separator = strchr(host_start, ':'); + } + } else { + separator = strchr(host_start, ':'); + } + + if (separator) { + if (separator[1] == '/') + strbuf_remove(&url, separator - url.buf, 1); + else + *separator = '/'; + } + } + + detached = strbuf_detach(&url, NULL); + normalized = url_normalize(detached, info); + free(detached); + + if (!normalized) + return NULL; + + /* + * Point path to ~ for URLs like this: + * + * ssh://host.xz/~user/repo + * git://host.xz/~user/repo + * host.xz:~user/repo + */ + if (scheme == URL_SCHEME_GIT || scheme == URL_SCHEME_SSH) { + if (normalized[info->path_off + 1] == '~') { + info->path_off++; + info->path_len--; + } + } + + return normalized; +} + static size_t url_match_prefix(const char *url, const char *url_prefix, size_t url_prefix_len) diff --git a/urlmatch.h b/urlmatch.h index 5ba85cea13..6b3ce42858 100644 --- a/urlmatch.h +++ b/urlmatch.h @@ -35,6 +35,7 @@ struct url_info { }; char *url_normalize(const char *, struct url_info *); +char *url_parse(const char *, struct url_info *); struct urlmatch_item { size_t hostmatch_len;