From 533eb14798d0e4e288401b90d4684730a3ed9266 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 2 May 2026 05:28:40 +0000 Subject: [PATCH] builtin: create url-parse command Git commands can accept a rather wide variety of URLs syntaxes. The range of accepted inputs might expand even more in the future. This makes the parsing of URL components difficult since standard URL parsers cannot be used. Extracting the components of a git URL would require implementing all the schemes that git itself supports, not to mention tracking its development continuously in case new URL schemes are added. The url-parse builtin command is designed to solve this problem by exposing git's native URL parsing facilities as a plumbing command. Other programs can then call upon git itself to parse the git URLs and extract their components. This should be quite useful for scripts. Signed-off-by: Matheus Afonso Martins Moreira Signed-off-by: Junio C Hamano --- .gitignore | 1 + Makefile | 1 + builtin.h | 1 + builtin/url-parse.c | 135 ++++++++++++++++++++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + 7 files changed, 141 insertions(+) create mode 100644 builtin/url-parse.c diff --git a/.gitignore b/.gitignore index 24635cf2d6..c5673daa6e 100644 --- a/.gitignore +++ b/.gitignore @@ -182,6 +182,7 @@ /git-update-server-info /git-upload-archive /git-upload-pack +/git-url-parse /git-var /git-verify-commit /git-verify-pack diff --git a/Makefile b/Makefile index cedc234173..1c757a1aa0 100644 --- a/Makefile +++ b/Makefile @@ -1497,6 +1497,7 @@ BUILTIN_OBJS += builtin/update-ref.o BUILTIN_OBJS += builtin/update-server-info.o BUILTIN_OBJS += builtin/upload-archive.o BUILTIN_OBJS += builtin/upload-pack.o +BUILTIN_OBJS += builtin/url-parse.o BUILTIN_OBJS += builtin/var.o BUILTIN_OBJS += builtin/verify-commit.o BUILTIN_OBJS += builtin/verify-pack.o diff --git a/builtin.h b/builtin.h index 235c51f30e..c6f7672991 100644 --- a/builtin.h +++ b/builtin.h @@ -271,6 +271,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix, stru int cmd_upload_archive(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_upload_pack(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_url_parse(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_var(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_verify_commit(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_verify_tag(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/url-parse.c b/builtin/url-parse.c new file mode 100644 index 0000000000..7e705538c0 --- /dev/null +++ b/builtin/url-parse.c @@ -0,0 +1,135 @@ +#include "builtin.h" +#include "gettext.h" +#include "parse-options.h" +#include "url.h" +#include "urlmatch.h" + +static const char * const builtin_url_parse_usage[] = { + N_("git url-parse [-c ] [--] ..."), + NULL +}; + +static char *component_arg; + +static struct option builtin_url_parse_options[] = { + OPT_STRING('c', "component", &component_arg, N_("component"), + N_("which URL component to extract")), + OPT_END(), +}; + +enum url_component { + URL_NONE = 0, + URL_SCHEME, + URL_USER, + URL_PASSWORD, + URL_HOST, + URL_PORT, + URL_PATH, +}; + +static void parse_or_die(const char *url, struct url_info *info) +{ + if (url_is_local_not_ssh(url)) { + if (*url == '/') + die("'%s' is not a URL; if you meant a local " + "repository, use 'file://%s'", url, url); + if (has_dos_drive_prefix(url)) + die("'%s' is not a URL; if you meant a local " + "repository, use 'file:///%s'", url, url); + die("'%s' is not a URL; if you meant a local repository, " + "use a 'file://' URL with an absolute path", url); + } + if (!url_parse(url, info)) + die("invalid git URL '%s': %s", url, info->err); +} + +static enum url_component get_component_or_die(const char *arg) +{ + if (!strcmp("path", arg)) + return URL_PATH; + if (!strcmp("host", arg)) + return URL_HOST; + if (!strcmp("scheme", arg)) + return URL_SCHEME; + if (!strcmp("user", arg)) + return URL_USER; + if (!strcmp("password", arg)) + return URL_PASSWORD; + if (!strcmp("port", arg)) + return URL_PORT; + die("invalid git URL component '%s'", arg); +} + +static char *extract_component(enum url_component component, + struct url_info *info) +{ + size_t offset, length; + + switch (component) { + case URL_SCHEME: + offset = 0; + length = info->scheme_len; + break; + case URL_USER: + offset = info->user_off; + length = info->user_len; + break; + case URL_PASSWORD: + offset = info->passwd_off; + length = info->passwd_len; + break; + case URL_HOST: + offset = info->host_off; + length = info->host_len; + break; + case URL_PORT: + offset = info->port_off; + length = info->port_len; + break; + case URL_PATH: + offset = info->path_off; + length = info->path_len; + break; + case URL_NONE: + return NULL; + } + + return xstrndup(info->url + offset, length); +} + +int cmd_url_parse(int argc, + const char **argv, + const char *prefix, + struct repository *repo UNUSED) +{ + struct url_info info; + enum url_component selected = URL_NONE; + char *extracted; + int i; + + argc = parse_options(argc, argv, prefix, builtin_url_parse_options, + builtin_url_parse_usage, 0); + + if (argc == 0) + usage_with_options(builtin_url_parse_usage, + builtin_url_parse_options); + + if (component_arg) + selected = get_component_or_die(component_arg); + + for (i = 0; i < argc; i++) { + parse_or_die(argv[i], &info); + + if (selected != URL_NONE) { + extracted = extract_component(selected, &info); + if (extracted) { + puts(extracted); + free(extracted); + } + } + + free(info.url); + } + + return 0; +} diff --git a/command-list.txt b/command-list.txt index f9005cf459..1ede48186f 100644 --- a/command-list.txt +++ b/command-list.txt @@ -202,6 +202,7 @@ git-update-ref plumbingmanipulators git-update-server-info synchingrepositories git-upload-archive synchelpers git-upload-pack synchelpers +git-url-parse purehelpers git-var plumbinginterrogators git-verify-commit ancillaryinterrogators git-verify-pack plumbinginterrogators diff --git a/git.c b/git.c index 5a40eab8a2..a073eed931 100644 --- a/git.c +++ b/git.c @@ -670,6 +670,7 @@ static struct cmd_struct commands[] = { { "upload-archive", cmd_upload_archive, NO_PARSEOPT }, { "upload-archive--writer", cmd_upload_archive_writer, NO_PARSEOPT }, { "upload-pack", cmd_upload_pack }, + { "url-parse", cmd_url_parse }, { "var", cmd_var, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "verify-commit", cmd_verify_commit, RUN_SETUP }, { "verify-pack", cmd_verify_pack }, diff --git a/meson.build b/meson.build index 11488623bf..dc3cf68ee5 100644 --- a/meson.build +++ b/meson.build @@ -686,6 +686,7 @@ builtin_sources = [ 'builtin/update-server-info.c', 'builtin/upload-archive.c', 'builtin/upload-pack.c', + 'builtin/url-parse.c', 'builtin/var.c', 'builtin/verify-commit.c', 'builtin/verify-pack.c',