Merge branch 'mm/git-url-parse'

The internal URL parsing logic has been made accessible via a new
subcommand "git url-parse".

* mm/git-url-parse:
  t9904: add tests for the new url-parse builtin
  doc: describe the url-parse builtin
  builtin: create url-parse command
  urlmatch: define url_parse function
  url: return URL_SCHEME_UNKNOWN instead of dying
  url: move scheme detection to URL header/source
  url: move url_is_local_not_ssh to url.h
  connect: rename enum protocol to url_scheme
This commit is contained in:
Junio C Hamano
2026-05-21 12:06:48 +09:00
19 changed files with 780 additions and 54 deletions
+1
View File
@@ -183,6 +183,7 @@
/git-update-server-info
/git-upload-archive
/git-upload-pack
/git-url-parse
/git-var
/git-verify-commit
/git-verify-pack
+80
View File
@@ -0,0 +1,80 @@
git-url-parse(1)
================
NAME
----
git-url-parse - Parse and extract git URL components
SYNOPSIS
--------
[synopsis]
git url-parse [-c <component>] [--] <url>...
DESCRIPTION
-----------
Git supports many ways to specify URLs, some of them non-standard.
For example, git supports the scp style [user@]host:[path] format.
This command eases interoperability with git URLs by enabling the
parsing and extraction of the components of all git URLs.
Any syntactically valid URL is parsed, even if the scheme is not one
git supports for fetching or pushing.
OPTIONS
-------
`-c <component>`::
`--component <component>`::
Extract the _<component>_ component from the given Git URLs.
_<component>_ can be one of:
`scheme`, `user`, `password`, `host`, `port`, `path`.
OUTPUT
------
When `--component` is given, the requested component of each URL
is printed on its own line, in the order the URLs were given. If
the URL has no such component (for example, a port in a URL that
does not specify one), an empty line is printed in its place.
When `--component` is not given, no output is produced. The exit
status is zero if every URL parses successfully and non-zero
otherwise, allowing the command to be used purely as a validator.
EXAMPLES
--------
* Print the host name:
+
------------
$ git url-parse --component host https://example.com/user/repo
example.com
------------
* Print the path:
+
------------
$ git url-parse --component path https://example.com/user/repo
/user/repo
$ git url-parse --component path example.com:~user/repo
~user/repo
$ git url-parse --component path example.com:user/repo
/user/repo
------------
* Validate URLs without outputting anything:
+
------------
$ git url-parse https://example.com/user/repo example.com:~user/repo
------------
SEE ALSO
--------
linkgit:git-clone[1],
linkgit:git-fetch[1],
linkgit:git-config[1]
GIT
---
Part of the linkgit:git[1] suite
+1
View File
@@ -156,6 +156,7 @@ manpages = {
'git-update-server-info.adoc' : 1,
'git-upload-archive.adoc' : 1,
'git-upload-pack.adoc' : 1,
'git-url-parse.adoc' : 1,
'git-var.adoc' : 1,
'git-verify-commit.adoc' : 1,
'git-verify-pack.adoc' : 1,
+1
View File
@@ -1495,6 +1495,7 @@ BUILTIN_OBJS += builtin/update-ref.o
BUILTIN_OBJS += builtin/update-server-info.o
BUILTIN_OBJS += builtin/upload-archive.o
BUILTIN_OBJS += builtin/upload-pack.o
BUILTIN_OBJS += builtin/url-parse.o
BUILTIN_OBJS += builtin/var.o
BUILTIN_OBJS += builtin/verify-commit.o
BUILTIN_OBJS += builtin/verify-pack.o
+1
View File
@@ -272,6 +272,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix, stru
int cmd_upload_archive(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_upload_pack(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_url_parse(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_var(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_verify_commit(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_verify_tag(int argc, const char **argv, const char *prefix, struct repository *repo);
+135
View File
@@ -0,0 +1,135 @@
#include "builtin.h"
#include "gettext.h"
#include "parse-options.h"
#include "url.h"
#include "urlmatch.h"
static const char * const builtin_url_parse_usage[] = {
N_("git url-parse [-c <component>] [--] <url>..."),
NULL
};
static char *component_arg;
static struct option builtin_url_parse_options[] = {
OPT_STRING('c', "component", &component_arg, N_("component"),
N_("which URL component to extract")),
OPT_END(),
};
enum url_component {
URL_NONE = 0,
URL_SCHEME,
URL_USER,
URL_PASSWORD,
URL_HOST,
URL_PORT,
URL_PATH,
};
static void parse_or_die(const char *url, struct url_info *info)
{
if (url_is_local_not_ssh(url)) {
if (*url == '/')
die("'%s' is not a URL; if you meant a local "
"repository, use 'file://%s'", url, url);
if (has_dos_drive_prefix(url))
die("'%s' is not a URL; if you meant a local "
"repository, use 'file:///%s'", url, url);
die("'%s' is not a URL; if you meant a local repository, "
"use a 'file://' URL with an absolute path", url);
}
if (!url_parse(url, info))
die("invalid git URL '%s': %s", url, info->err);
}
static enum url_component get_component_or_die(const char *arg)
{
if (!strcmp("path", arg))
return URL_PATH;
if (!strcmp("host", arg))
return URL_HOST;
if (!strcmp("scheme", arg))
return URL_SCHEME;
if (!strcmp("user", arg))
return URL_USER;
if (!strcmp("password", arg))
return URL_PASSWORD;
if (!strcmp("port", arg))
return URL_PORT;
die("invalid git URL component '%s'", arg);
}
static char *extract_component(enum url_component component,
struct url_info *info)
{
size_t offset, length;
switch (component) {
case URL_SCHEME:
offset = 0;
length = info->scheme_len;
break;
case URL_USER:
offset = info->user_off;
length = info->user_len;
break;
case URL_PASSWORD:
offset = info->passwd_off;
length = info->passwd_len;
break;
case URL_HOST:
offset = info->host_off;
length = info->host_len;
break;
case URL_PORT:
offset = info->port_off;
length = info->port_len;
break;
case URL_PATH:
offset = info->path_off;
length = info->path_len;
break;
case URL_NONE:
return NULL;
}
return xstrndup(info->url + offset, length);
}
int cmd_url_parse(int argc,
const char **argv,
const char *prefix,
struct repository *repo UNUSED)
{
struct url_info info;
enum url_component selected = URL_NONE;
char *extracted;
int i;
argc = parse_options(argc, argv, prefix, builtin_url_parse_options,
builtin_url_parse_usage, 0);
if (argc == 0)
usage_with_options(builtin_url_parse_usage,
builtin_url_parse_options);
if (component_arg)
selected = get_component_or_die(component_arg);
for (i = 0; i < argc; i++) {
parse_or_die(argv[i], &info);
if (selected != URL_NONE) {
extracted = extract_component(selected, &info);
if (extracted) {
puts(extracted);
free(extracted);
}
}
free(info.url);
}
return 0;
}
+1
View File
@@ -203,6 +203,7 @@ git-update-ref plumbingmanipulators
git-update-server-info synchingrepositories
git-upload-archive synchelpers
git-upload-pack synchelpers
git-url-parse purehelpers
git-var plumbinginterrogators
git-verify-commit ancillaryinterrogators
git-verify-pack plumbinginterrogators
+25 -53
View File
@@ -700,51 +700,21 @@ int server_supports(const char *feature)
return !!server_feature_value(feature, NULL);
}
enum protocol {
PROTO_LOCAL = 1,
PROTO_FILE,
PROTO_SSH,
PROTO_GIT
};
int url_is_local_not_ssh(const char *url)
static const char *url_scheme_name(enum url_scheme scheme)
{
const char *colon = strchr(url, ':');
const char *slash = strchr(url, '/');
return !colon || (slash && slash < colon) ||
(has_dos_drive_prefix(url) && is_valid_path(url));
}
static const char *prot_name(enum protocol protocol)
{
switch (protocol) {
case PROTO_LOCAL:
case PROTO_FILE:
switch (scheme) {
case URL_SCHEME_LOCAL:
case URL_SCHEME_FILE:
return "file";
case PROTO_SSH:
case URL_SCHEME_SSH:
return "ssh";
case PROTO_GIT:
case URL_SCHEME_GIT:
return "git";
default:
return "unknown protocol";
}
}
static enum protocol get_protocol(const char *name)
{
if (!strcmp(name, "ssh"))
return PROTO_SSH;
if (!strcmp(name, "git"))
return PROTO_GIT;
if (!strcmp(name, "git+ssh")) /* deprecated - do not use */
return PROTO_SSH;
if (!strcmp(name, "ssh+git")) /* deprecated - do not use */
return PROTO_SSH;
if (!strcmp(name, "file"))
return PROTO_FILE;
die(_("protocol '%s' is not supported"), name);
}
static char *host_end(char **hoststart, int removebrackets)
{
char *host = *hoststart;
@@ -1081,14 +1051,14 @@ static char *get_port(char *host)
* Extract protocol and relevant parts from the specified connection URL.
* The caller must free() the returned strings.
*/
static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
char **ret_path)
static enum url_scheme parse_connect_url(const char *url_orig, char **ret_host,
char **ret_path)
{
char *url;
char *host, *path;
char *end;
int separator = '/';
enum protocol protocol = PROTO_LOCAL;
enum url_scheme scheme = URL_SCHEME_LOCAL;
if (is_url(url_orig))
url = url_decode(url_orig);
@@ -1098,12 +1068,14 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
host = strstr(url, "://");
if (host) {
*host = '\0';
protocol = get_protocol(url);
scheme = url_get_scheme(url);
if (scheme == URL_SCHEME_UNKNOWN)
die(_("protocol '%s' is not supported"), url);
host += 3;
} else {
host = url;
if (!url_is_local_not_ssh(url)) {
protocol = PROTO_SSH;
scheme = URL_SCHEME_SSH;
separator = ':';
}
}
@@ -1114,13 +1086,13 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
*/
end = host_end(&host, 0);
if (protocol == PROTO_LOCAL)
if (scheme == URL_SCHEME_LOCAL)
path = end;
else if (protocol == PROTO_FILE && *host != '/' &&
else if (scheme == URL_SCHEME_FILE && *host != '/' &&
!has_dos_drive_prefix(host) &&
offset_1st_component(host - 2) > 1)
path = host - 2; /* include the leading "//" */
else if (protocol == PROTO_FILE && has_dos_drive_prefix(end))
else if (scheme == URL_SCHEME_FILE && has_dos_drive_prefix(end))
path = end; /* "file://$(pwd)" may be "file://C:/projects/repo" */
else
path = strchr(end, separator);
@@ -1136,7 +1108,7 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
end = path; /* Need to \0 terminate host here */
if (separator == ':')
path++; /* path starts after ':' */
if (protocol == PROTO_GIT || protocol == PROTO_SSH) {
if (scheme == URL_SCHEME_GIT || scheme == URL_SCHEME_SSH) {
if (path[1] == '~')
path++;
}
@@ -1147,7 +1119,7 @@ static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
*ret_host = xstrdup(host);
*ret_path = path;
free(url);
return protocol;
return scheme;
}
static const char *get_ssh_command(void)
@@ -1432,7 +1404,7 @@ struct child_process *git_connect(int fd[2], const char *url,
{
char *hostandport, *path;
struct child_process *conn;
enum protocol protocol;
enum url_scheme scheme;
enum protocol_version version = get_protocol_version_config();
/*
@@ -1449,14 +1421,14 @@ struct child_process *git_connect(int fd[2], const char *url,
*/
signal(SIGCHLD, SIG_DFL);
protocol = parse_connect_url(url, &hostandport, &path);
if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) {
scheme = parse_connect_url(url, &hostandport, &path);
if ((flags & CONNECT_DIAG_URL) && (scheme != URL_SCHEME_SSH)) {
printf("Diag: url=%s\n", url ? url : "NULL");
printf("Diag: protocol=%s\n", prot_name(protocol));
printf("Diag: protocol=%s\n", url_scheme_name(scheme));
printf("Diag: hostandport=%s\n", hostandport ? hostandport : "NULL");
printf("Diag: path=%s\n", path ? path : "NULL");
conn = NULL;
} else if (protocol == PROTO_GIT) {
} else if (scheme == URL_SCHEME_GIT) {
conn = git_connect_git(fd, hostandport, path, prog, version, flags);
conn->trace2_child_class = "transport/git";
} else {
@@ -1479,7 +1451,7 @@ struct child_process *git_connect(int fd[2], const char *url,
conn->use_shell = 1;
conn->in = conn->out = -1;
if (protocol == PROTO_SSH) {
if (scheme == URL_SCHEME_SSH) {
char *ssh_host = hostandport;
const char *port = NULL;
transport_check_allowed("ssh");
@@ -1490,7 +1462,7 @@ struct child_process *git_connect(int fd[2], const char *url,
if (flags & CONNECT_DIAG_URL) {
printf("Diag: url=%s\n", url ? url : "NULL");
printf("Diag: protocol=%s\n", prot_name(protocol));
printf("Diag: protocol=%s\n", url_scheme_name(scheme));
printf("Diag: userandhost=%s\n", ssh_host ? ssh_host : "NULL");
printf("Diag: port=%s\n", port ? port : "NONE");
printf("Diag: path=%s\n", path ? path : "NULL");
-1
View File
@@ -13,7 +13,6 @@ int git_connection_is_socket(struct child_process *conn);
int server_supports(const char *feature);
int parse_feature_request(const char *features, const char *feature);
const char *server_feature_value(const char *feature, size_t *len_ret);
int url_is_local_not_ssh(const char *url);
struct packet_reader;
enum protocol_version discover_version(struct packet_reader *reader);
+1
View File
@@ -671,6 +671,7 @@ static struct cmd_struct commands[] = {
{ "upload-archive", cmd_upload_archive, NO_PARSEOPT },
{ "upload-archive--writer", cmd_upload_archive_writer, NO_PARSEOPT },
{ "upload-pack", cmd_upload_pack },
{ "url-parse", cmd_url_parse },
{ "var", cmd_var, RUN_SETUP_GENTLY | NO_PARSEOPT },
{ "verify-commit", cmd_verify_commit, RUN_SETUP },
{ "verify-pack", cmd_verify_pack },
+1
View File
@@ -698,6 +698,7 @@ builtin_sources = [
'builtin/update-server-info.c',
'builtin/upload-archive.c',
'builtin/upload-pack.c',
'builtin/url-parse.c',
'builtin/var.c',
'builtin/verify-commit.c',
'builtin/verify-pack.c',
+1
View File
@@ -8,6 +8,7 @@
#include "gettext.h"
#include "hex.h"
#include "remote.h"
#include "url.h"
#include "urlmatch.h"
#include "refs.h"
#include "refspec.h"
+1
View File
@@ -1115,6 +1115,7 @@ integration_tests = [
't9901-git-web--browse.sh',
't9902-completion.sh',
't9903-bash-prompt.sh',
't9904-url-parse.sh',
]
benchmarks = [
+319
View File
@@ -0,0 +1,319 @@
#!/bin/sh
#
# Copyright (c) 2024 Matheus Afonso Martins Moreira
#
test_description='git url-parse tests'
. ./test-lib.sh
test_expect_success 'git url-parse -- ssh syntax' '
git url-parse "ssh://user@example.com:1234/repository/path" &&
git url-parse "ssh://user@example.com/repository/path" &&
git url-parse "ssh://example.com:1234/repository/path" &&
git url-parse "ssh://example.com/repository/path"
'
test_expect_success 'git url-parse -- git syntax' '
git url-parse "git://example.com:1234/repository/path" &&
git url-parse "git://example.com/repository/path"
'
test_expect_success 'git url-parse -- http syntax' '
git url-parse "https://example.com:1234/repository/path" &&
git url-parse "https://example.com/repository/path" &&
git url-parse "http://example.com:1234/repository/path" &&
git url-parse "http://example.com/repository/path"
'
test_expect_success 'git url-parse -- scp syntax' '
git url-parse "user@example.com:/repository/path" &&
git url-parse "example.com:/repository/path"
'
test_expect_success 'git url-parse -- username expansion - ssh syntax' '
git url-parse "ssh://user@example.com:1234/~user/repository" &&
git url-parse "ssh://user@example.com/~user/repository" &&
git url-parse "ssh://example.com:1234/~user/repository" &&
git url-parse "ssh://example.com/~user/repository"
'
test_expect_success 'git url-parse -- username expansion - git syntax' '
git url-parse "git://example.com:1234/~user/repository" &&
git url-parse "git://example.com/~user/repository"
'
test_expect_success 'git url-parse -- username expansion - scp syntax' '
git url-parse "user@example.com:~user/repository" &&
git url-parse "example.com:~user/repository"
'
test_expect_success 'git url-parse -- file urls' '
git url-parse "file:///repository/path" &&
git url-parse "file://"
'
test_expect_success 'git url-parse -c scheme -- ssh syntax' '
test ssh = "$(git url-parse -c scheme "ssh://user@example.com:1234/repository/path")" &&
test ssh = "$(git url-parse -c scheme "ssh://user@example.com/repository/path")" &&
test ssh = "$(git url-parse -c scheme "ssh://example.com:1234/repository/path")" &&
test ssh = "$(git url-parse -c scheme "ssh://example.com/repository/path")"
'
test_expect_success 'git url-parse -c scheme -- git syntax' '
test git = "$(git url-parse -c scheme "git://example.com:1234/repository/path")" &&
test git = "$(git url-parse -c scheme "git://example.com/repository/path")"
'
test_expect_success 'git url-parse -c scheme -- http syntax' '
test https = "$(git url-parse -c scheme "https://example.com:1234/repository/path")" &&
test https = "$(git url-parse -c scheme "https://example.com/repository/path")" &&
test http = "$(git url-parse -c scheme "http://example.com:1234/repository/path")" &&
test http = "$(git url-parse -c scheme "http://example.com/repository/path")"
'
test_expect_success 'git url-parse -c scheme -- scp syntax' '
test ssh = "$(git url-parse -c scheme "user@example.com:/repository/path")" &&
test ssh = "$(git url-parse -c scheme "example.com:/repository/path")"
'
test_expect_success 'git url-parse -c user -- ssh syntax' '
test user = "$(git url-parse -c user "ssh://user@example.com:1234/repository/path")" &&
test user = "$(git url-parse -c user "ssh://user@example.com/repository/path")" &&
test "" = "$(git url-parse -c user "ssh://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c user "ssh://example.com/repository/path")"
'
test_expect_success 'git url-parse -c user -- git syntax' '
test "" = "$(git url-parse -c user "git://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c user "git://example.com/repository/path")"
'
test_expect_success 'git url-parse -c user -- http syntax' '
test "" = "$(git url-parse -c user "https://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c user "https://example.com/repository/path")" &&
test "" = "$(git url-parse -c user "http://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c user "http://example.com/repository/path")"
'
test_expect_success 'git url-parse -c user -- scp syntax' '
test user = "$(git url-parse -c user "user@example.com:/repository/path")" &&
test "" = "$(git url-parse -c user "example.com:/repository/path")"
'
test_expect_success 'git url-parse -c password -- http syntax' '
test secret = "$(git url-parse -c password "https://user:secret@example.com:1234/repository/path")" &&
test secret = "$(git url-parse -c password "http://user:secret@example.com/repository/path")" &&
test "" = "$(git url-parse -c password "https://user@example.com/repository/path")" &&
test "" = "$(git url-parse -c password "https://example.com/repository/path")"
'
test_expect_success 'git url-parse -c host -- ssh syntax' '
test example.com = "$(git url-parse -c host "ssh://user@example.com:1234/repository/path")" &&
test example.com = "$(git url-parse -c host "ssh://user@example.com/repository/path")" &&
test example.com = "$(git url-parse -c host "ssh://example.com:1234/repository/path")" &&
test example.com = "$(git url-parse -c host "ssh://example.com/repository/path")"
'
test_expect_success 'git url-parse -c host -- git syntax' '
test example.com = "$(git url-parse -c host "git://example.com:1234/repository/path")" &&
test example.com = "$(git url-parse -c host "git://example.com/repository/path")"
'
test_expect_success 'git url-parse -c host -- http syntax' '
test example.com = "$(git url-parse -c host "https://example.com:1234/repository/path")" &&
test example.com = "$(git url-parse -c host "https://example.com/repository/path")" &&
test example.com = "$(git url-parse -c host "http://example.com:1234/repository/path")" &&
test example.com = "$(git url-parse -c host "http://example.com/repository/path")"
'
test_expect_success 'git url-parse -c host -- scp syntax' '
test example.com = "$(git url-parse -c host "user@example.com:/repository/path")" &&
test example.com = "$(git url-parse -c host "example.com:/repository/path")"
'
test_expect_success 'git url-parse -c port -- ssh syntax' '
test 1234 = "$(git url-parse -c port "ssh://user@example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c port "ssh://user@example.com/repository/path")" &&
test 1234 = "$(git url-parse -c port "ssh://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c port "ssh://example.com/repository/path")"
'
test_expect_success 'git url-parse -c port -- git syntax' '
test 1234 = "$(git url-parse -c port "git://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c port "git://example.com/repository/path")"
'
test_expect_success 'git url-parse -c port -- http syntax' '
test 1234 = "$(git url-parse -c port "https://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c port "https://example.com/repository/path")" &&
test 1234 = "$(git url-parse -c port "http://example.com:1234/repository/path")" &&
test "" = "$(git url-parse -c port "http://example.com/repository/path")"
'
test_expect_success 'git url-parse -c port -- scp syntax' '
test "" = "$(git url-parse -c port "user@example.com:/repository/path")" &&
test "" = "$(git url-parse -c port "example.com:/repository/path")"
'
test_expect_success 'git url-parse -c path -- ssh syntax' '
test "/repository/path" = "$(git url-parse -c path "ssh://user@example.com:1234/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "ssh://user@example.com/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "ssh://example.com:1234/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "ssh://example.com/repository/path")"
'
test_expect_success 'git url-parse -c path -- git syntax' '
test "/repository/path" = "$(git url-parse -c path "git://example.com:1234/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "git://example.com/repository/path")"
'
test_expect_success 'git url-parse -c path -- http syntax' '
test "/repository/path" = "$(git url-parse -c path "https://example.com:1234/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "https://example.com/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "http://example.com:1234/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "http://example.com/repository/path")"
'
test_expect_success 'git url-parse -c path -- scp syntax' '
test "/repository/path" = "$(git url-parse -c path "user@example.com:/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "example.com:/repository/path")"
'
test_expect_success 'git url-parse -c path -- username expansion - ssh syntax' '
test "~user/repository" = "$(git url-parse -c path "ssh://user@example.com:1234/~user/repository")" &&
test "~user/repository" = "$(git url-parse -c path "ssh://user@example.com/~user/repository")" &&
test "~user/repository" = "$(git url-parse -c path "ssh://example.com:1234/~user/repository")" &&
test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository")"
'
test_expect_success 'git url-parse -c path -- username expansion - git syntax' '
test "~user/repository" = "$(git url-parse -c path "git://example.com:1234/~user/repository")" &&
test "~user/repository" = "$(git url-parse -c path "git://example.com/~user/repository")"
'
test_expect_success 'git url-parse -c path -- username expansion - scp syntax' '
test "~user/repository" = "$(git url-parse -c path "user@example.com:~user/repository")" &&
test "~user/repository" = "$(git url-parse -c path "example.com:~user/repository")"
'
test_expect_success 'git url-parse -c path -- username expansion strips query and fragment' '
test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository?query")" &&
test "~user/repository" = "$(git url-parse -c path "ssh://example.com/~user/repository#fragment")" &&
test "~user/repository" = "$(git url-parse -c path "git://example.com/~user/repository?query")" &&
test "~user/repository" = "$(git url-parse -c path "user@example.com:~user/repository?query")"
'
test_expect_success 'git url-parse -- ssh syntax with IPv6' '
git url-parse "ssh://user@[::1]:1234/repository/path" &&
git url-parse "ssh://user@[::1]/repository/path" &&
git url-parse "ssh://[::1]:1234/repository/path" &&
git url-parse "ssh://[::1]/repository/path" &&
git url-parse "ssh://[2001:db8::1]/repository/path"
'
test_expect_success 'git url-parse -- git syntax with IPv6' '
git url-parse "git://[::1]:9418/repository/path" &&
git url-parse "git://[::1]/repository/path"
'
test_expect_success 'git url-parse -- http syntax with IPv6' '
git url-parse "https://[::1]:1234/repository/path" &&
git url-parse "https://[::1]/repository/path" &&
git url-parse "http://[2001:db8::1]/repository/path"
'
test_expect_success 'git url-parse -c host -- IPv6 in URL form' '
test "[::1]" = "$(git url-parse -c host "ssh://user@[::1]:1234/repository/path")" &&
test "[::1]" = "$(git url-parse -c host "ssh://[::1]/repository/path")" &&
test "[2001:db8::1]" = "$(git url-parse -c host "ssh://[2001:db8::1]/repository/path")" &&
test "[::1]" = "$(git url-parse -c host "git://[::1]/repository/path")" &&
test "[2001:db8::1]" = "$(git url-parse -c host "https://[2001:db8::1]/repository/path")"
'
test_expect_success 'git url-parse -c port -- IPv6 in URL form' '
test 1234 = "$(git url-parse -c port "ssh://user@[::1]:1234/repository/path")" &&
test "" = "$(git url-parse -c port "ssh://[::1]/repository/path")" &&
test 9418 = "$(git url-parse -c port "git://[::1]:9418/repository/path")"
'
test_expect_success 'git url-parse -- scp syntax with IPv6' '
git url-parse "[::1]:repository/path" &&
git url-parse "user@[::1]:repository/path" &&
git url-parse "[2001:db8::1]:repo"
'
test_expect_success 'git url-parse -- scp syntax with bracketed hostname' '
git url-parse "[myhost]:src" &&
git url-parse "user@[myhost]:src"
'
test_expect_success 'git url-parse -- scp syntax with bracketed host:port' '
git url-parse "[myhost:123]:src" &&
git url-parse "user@[myhost:123]:src"
'
test_expect_success 'git url-parse -c host -- scp+IPv6' '
test "[::1]" = "$(git url-parse -c host "[::1]:repository/path")" &&
test "[::1]" = "$(git url-parse -c host "user@[::1]:repository/path")" &&
test "[2001:db8::1]" = "$(git url-parse -c host "[2001:db8::1]:repo")"
'
test_expect_success 'git url-parse -c path -- scp+IPv6' '
test "/repository/path" = "$(git url-parse -c path "[::1]:/repository/path")" &&
test "/repository/path" = "$(git url-parse -c path "[::1]:repository/path")" &&
test "/repo" = "$(git url-parse -c path "[2001:db8::1]:repo")"
'
test_expect_success 'git url-parse -c host,port,path -- scp [host:port]:src' '
test myhost = "$(git url-parse -c host "[myhost:123]:src")" &&
test 123 = "$(git url-parse -c port "[myhost:123]:src")" &&
test "/src" = "$(git url-parse -c path "[myhost:123]:src")"
'
test_expect_success 'git url-parse -c host,path -- scp [host]:src' '
test myhost = "$(git url-parse -c host "[myhost]:src")" &&
test "/src" = "$(git url-parse -c path "[myhost]:src")"
'
test_expect_success 'git url-parse -c user -- scp with user@ and brackets' '
test user = "$(git url-parse -c user "user@[::1]:repo")" &&
test user = "$(git url-parse -c user "user@[myhost:123]:src")" &&
test user = "$(git url-parse -c user "user@[myhost]:src")"
'
test_expect_success 'git url-parse -- scp+IPv6 with username expansion' '
test "~user/repo" = "$(git url-parse -c path "[::1]:~user/repo")" &&
test "~user/repo" = "$(git url-parse -c path "user@[::1]:~user/repo")"
'
test_expect_success 'git url-parse fails on invalid URL' '
test_must_fail git url-parse "not a url"
'
test_expect_success 'git url-parse helpful error for absolute local path' '
test_must_fail git url-parse "/abs/path" 2>err &&
test_grep "is not a URL" err &&
test_grep "file:///" err
'
test_expect_success 'git url-parse helpful error for relative local path' '
test_must_fail git url-parse "./rel" 2>err &&
test_grep "is not a URL" err &&
test_grep "absolute path" err
'
test_expect_success 'git url-parse fails on unknown -c component name' '
test_must_fail git url-parse -c bogus "https://example.com/repo"
'
test_expect_success 'git url-parse fails on URL missing host' '
test_must_fail git url-parse "https://"
'
test_expect_success 'git url-parse with no URL prints usage' '
test_must_fail git url-parse 2>err &&
test_grep "usage:" err
'
test_done
+45
View File
@@ -245,3 +245,48 @@ void test_urlmatch_normalization__equivalents(void)
compare_normalized_urls("https://@x.y/^/../abc", "httpS://@x.y:0443/abc", 1);
compare_normalized_urls("https://@x.y/^/..", "httpS://@x.y:0443/", 1);
}
static void check_parsed_path(const char *url, const char *expected_path)
{
struct url_info info;
char *parsed = url_parse(url, &info);
char *path;
cl_assert(parsed != NULL);
path = xstrndup(parsed + info.path_off, info.path_len);
cl_assert_equal_s(path, expected_path);
free(path);
free(parsed);
}
void test_urlmatch_normalization__parse_scp(void)
{
check_parsed_path("host:path", "/path");
check_parsed_path("user@host:path", "/path");
check_parsed_path("host:~user/repo", "~user/repo");
check_parsed_path("user@host:~user/repo", "~user/repo");
check_parsed_path("[host]:src", "/src");
check_parsed_path("[host:123]:src", "/src");
check_parsed_path("[::1]:repo", "/repo");
check_parsed_path("user@[::1]:repo", "/repo");
}
void test_urlmatch_normalization__parse_url_form(void)
{
check_parsed_path("ssh://host/repo", "/repo");
check_parsed_path("ssh://host/~user/repo", "~user/repo");
check_parsed_path("git://host:9418/repo", "/repo");
check_parsed_path("git://host/~user/repo", "~user/repo");
check_parsed_path("ssh://[::1]:1234/repo", "/repo");
check_parsed_path("http://[2001:db8::1]/repo", "/repo");
}
void test_urlmatch_normalization__parse_strips_query_and_fragment(void)
{
check_parsed_path("ssh://host/~user/repo?q", "~user/repo");
check_parsed_path("ssh://host/~user/repo#frag", "~user/repo");
check_parsed_path("git://host/~user/repo?q", "~user/repo");
check_parsed_path("user@host:~user/repo?q", "~user/repo");
check_parsed_path("https://host/repo?q", "/repo");
check_parsed_path("https://host/repo#frag", "/repo");
}
+23
View File
@@ -132,3 +132,26 @@ void str_end_url_with_slash(const char *url, char **dest)
free(*dest);
*dest = strbuf_detach(&buf, NULL);
}
int url_is_local_not_ssh(const char *url)
{
const char *colon = strchr(url, ':');
const char *slash = strchr(url, '/');
return !colon || (slash && slash < colon) ||
(has_dos_drive_prefix(url) && is_valid_path(url));
}
enum url_scheme url_get_scheme(const char *name)
{
if (!strcmp(name, "ssh"))
return URL_SCHEME_SSH;
if (!strcmp(name, "git"))
return URL_SCHEME_GIT;
if (!strcmp(name, "git+ssh")) /* deprecated - do not use */
return URL_SCHEME_SSH;
if (!strcmp(name, "ssh+git")) /* deprecated - do not use */
return URL_SCHEME_SSH;
if (!strcmp(name, "file"))
return URL_SCHEME_FILE;
return URL_SCHEME_UNKNOWN;
}
+16
View File
@@ -21,6 +21,22 @@ char *url_decode_parameter_value(const char **query);
void end_url_with_slash(struct strbuf *buf, const char *url);
void str_end_url_with_slash(const char *url, char **dest);
int url_is_local_not_ssh(const char *url);
enum url_scheme {
URL_SCHEME_UNKNOWN = 0,
URL_SCHEME_LOCAL,
URL_SCHEME_FILE,
URL_SCHEME_SSH,
URL_SCHEME_GIT,
};
/*
* Identify the URL scheme by name. Returns URL_SCHEME_UNKNOWN
* if the name does not match any scheme that Git knows about.
*/
enum url_scheme url_get_scheme(const char *name);
/*
* The set of unreserved characters as per STD66 (RFC3986) is
* '[A-Za-z0-9-._~]'. These characters are safe to appear in URI
+127
View File
@@ -5,6 +5,7 @@
#include "hex-ll.h"
#include "strbuf.h"
#include "urlmatch.h"
#include "url.h"
#define URL_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
#define URL_DIGIT "0123456789"
@@ -440,6 +441,132 @@ char *url_normalize(const char *url, struct url_info *out_info)
return url_normalize_1(url, out_info, 0);
}
char *url_parse(const char *url_orig, struct url_info *out_info)
{
struct strbuf url;
char *host, *separator;
char *detached, *normalized;
char *url_decoded;
enum url_scheme scheme = URL_SCHEME_LOCAL;
struct url_info local_info;
struct url_info *info = out_info ? out_info : &local_info;
bool scp_syntax = false;
if (is_url(url_orig))
url_decoded = url_decode(url_orig);
else
url_decoded = xstrdup(url_orig);
strbuf_init(&url, strlen(url_decoded) + sizeof("ssh://"));
strbuf_addstr(&url, url_decoded);
free(url_decoded);
host = strstr(url.buf, "://");
if (host) {
/*
* Temporarily NUL-terminate the scheme name
* so we can pass it to url_get_scheme(),
* then restore the ':' so the buffer
* is intact for url_normalize() below.
*/
char saved = *host;
*host = '\0';
scheme = url_get_scheme(url.buf);
*host = saved;
host += 3;
} else {
if (!url_is_local_not_ssh(url.buf)) {
scp_syntax = true;
scheme = URL_SCHEME_SSH;
strbuf_insertstr(&url, 0, "ssh://");
host = url.buf + strlen("ssh://");
}
}
/*
* Path starts after ':' in scp style SSH URLs.
*
* The host portion can begin with an optional "user@",
* and the host itself can be wrapped in '[' ']' brackets.
* The bracket form is git's legacy way of supporting:
*
* - IPv6 literals: [::1]:repo
* - host:port pairs in the short form: [myhost:123]:src
* - Plain hostnames that happen to need bracketing: [host]:path
*
* Treat '[' followed by 0 or 1 inner colons as the host:port
* or plain hostname form and strip the brackets so url_normalize
* sees host[:port] natively. Two or more inner colons mark an
* IPv6 literal: keep the brackets for url_normalize to recognize.
*
* The scp path separator is the ':' that follows the host part,
* and we must skip over user@ and any '[...]' before searching.
*/
if (scp_syntax) {
char *user_at;
char *host_start;
char *bracket_end;
user_at = strchr(host, '@');
host_start = user_at ? user_at + 1 : host;
if (*host_start == '[') {
char *p;
int inner_colons;
bracket_end = strchr(host_start, ']');
inner_colons = 0;
for (p = host_start + 1; bracket_end && p < bracket_end; p++)
if (*p == ':')
inner_colons++;
if (bracket_end && inner_colons <= 1) {
size_t close_off = bracket_end - url.buf;
size_t open_off = host_start - url.buf;
strbuf_remove(&url, close_off, 1);
strbuf_remove(&url, open_off, 1);
separator = url.buf + close_off - 1;
} else if (bracket_end) {
separator = strchr(bracket_end + 1, ':');
} else {
separator = strchr(host_start, ':');
}
} else {
separator = strchr(host_start, ':');
}
if (separator) {
if (separator[1] == '/')
strbuf_remove(&url, separator - url.buf, 1);
else
*separator = '/';
}
}
detached = strbuf_detach(&url, NULL);
normalized = url_normalize(detached, info);
free(detached);
if (!normalized)
return NULL;
/*
* Point path to ~ for URLs like this:
*
* ssh://host.xz/~user/repo
* git://host.xz/~user/repo
* host.xz:~user/repo
*/
if (scheme == URL_SCHEME_GIT || scheme == URL_SCHEME_SSH) {
if (normalized[info->path_off + 1] == '~') {
info->path_off++;
info->path_len--;
}
}
return normalized;
}
static size_t url_match_prefix(const char *url,
const char *url_prefix,
size_t url_prefix_len)
+1
View File
@@ -35,6 +35,7 @@ struct url_info {
};
char *url_normalize(const char *, struct url_info *);
char *url_parse(const char *, struct url_info *);
struct urlmatch_item {
size_t hostmatch_len;