Merge branch 'jc/string-list-split'

string_list_split*() family of functions have been extended to
simplify common use cases.

* jc/string-list-split:
  string-list: split-then-remove-empty can be done while splitting
  string-list: optionally omit empty string pieces in string_list_split*()
  diff: simplify parsing of diff.colormovedws
  string-list: optionally trim string pieces split by string_list_split*()
  string-list: unify string_list_split* functions
  string-list: align string_list_split() with its _in_place() counterpart
  string-list: report programming error with BUG
This commit is contained in:
Junio C Hamano
2025-08-21 13:46:59 -07:00
21 changed files with 226 additions and 91 deletions

View File

@@ -413,7 +413,7 @@ static void parse_color_fields(const char *s)
colorfield_nr = 0;
/* Ideally this would be stripped and split at the same time? */
string_list_split(&l, s, ',', -1);
string_list_split(&l, s, ",", -1);
ALLOC_GROW(colorfield, colorfield_nr + 1, colorfield_alloc);
for_each_string_list_item(item, &l) {

View File

@@ -875,7 +875,7 @@ static void add_strategies(const char *string, unsigned attr)
if (string) {
struct string_list list = STRING_LIST_INIT_DUP;
struct string_list_item *item;
string_list_split(&list, string, ' ', -1);
string_list_split(&list, string, " ", -1);
for_each_string_list_item(item, &list)
append_strategy(get_strategy(item->string));
string_list_clear(&list, 0);

View File

@@ -182,7 +182,7 @@ static void list_vars(void)
if (ptr->multivalued && *val) {
struct string_list list = STRING_LIST_INIT_DUP;
string_list_split(&list, val, '\n', -1);
string_list_split(&list, val, "\n", -1);
for (size_t i = 0; i < list.nr; i++)
printf("%s=%s\n", ptr->name, list.items[i].string);
string_list_clear(&list, 0);

View File

@@ -407,7 +407,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list,
* name. Subsequent fields (symref-target and peeled) are optional and
* don't have a particular order.
*/
if (string_list_split(&line_sections, line, ' ', -1) < 2) {
if (string_list_split(&line_sections, line, " ", -1) < 2) {
ret = 0;
goto out;
}

20
diff.c
View File

@@ -327,29 +327,23 @@ static unsigned parse_color_moved_ws(const char *arg)
struct string_list l = STRING_LIST_INIT_DUP;
struct string_list_item *i;
string_list_split(&l, arg, ',', -1);
string_list_split_f(&l, arg, ",", -1, STRING_LIST_SPLIT_TRIM);
for_each_string_list_item(i, &l) {
struct strbuf sb = STRBUF_INIT;
strbuf_addstr(&sb, i->string);
strbuf_trim(&sb);
if (!strcmp(sb.buf, "no"))
if (!strcmp(i->string, "no"))
ret = 0;
else if (!strcmp(sb.buf, "ignore-space-change"))
else if (!strcmp(i->string, "ignore-space-change"))
ret |= XDF_IGNORE_WHITESPACE_CHANGE;
else if (!strcmp(sb.buf, "ignore-space-at-eol"))
else if (!strcmp(i->string, "ignore-space-at-eol"))
ret |= XDF_IGNORE_WHITESPACE_AT_EOL;
else if (!strcmp(sb.buf, "ignore-all-space"))
else if (!strcmp(i->string, "ignore-all-space"))
ret |= XDF_IGNORE_WHITESPACE;
else if (!strcmp(sb.buf, "allow-indentation-change"))
else if (!strcmp(i->string, "allow-indentation-change"))
ret |= COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE;
else {
ret |= COLOR_MOVED_WS_ERROR;
error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), sb.buf);
error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), i->string);
}
strbuf_release(&sb);
}
if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) &&

View File

@@ -1914,7 +1914,7 @@ static void fetch_pack_config(void)
char *str;
if (!repo_config_get_string(the_repository, "fetch.uriprotocols", &str) && str) {
string_list_split(&uri_protocols, str, ',', -1);
string_list_split(&uri_protocols, str, ",", -1);
free(str);
}
}

View File

@@ -894,7 +894,7 @@ static int string_list_add_note_lines(struct string_list *list,
* later, along with any empty strings that came from empty
* lines within the file.
*/
string_list_split(list, data, '\n', -1);
string_list_split(list, data, "\n", -1);
free(data);
return 0;
}
@@ -973,8 +973,8 @@ void string_list_add_refs_from_colon_sep(struct string_list *list,
char *globs_copy = xstrdup(globs);
int i;
string_list_split_in_place(&split, globs_copy, ":", -1);
string_list_remove_empty_items(&split, 0);
string_list_split_in_place_f(&split, globs_copy, ":", -1,
STRING_LIST_SPLIT_NONEMPTY);
for (i = 0; i < split.nr; i++)
string_list_add_refs_by_glob(list, split.items[i].string);

View File

@@ -1338,7 +1338,7 @@ static enum parse_opt_result usage_with_options_internal(struct parse_opt_ctx_t
if (!saw_empty_line && !*str)
saw_empty_line = 1;
string_list_split(&list, str, '\n', -1);
string_list_split(&list, str, "\n", -1);
for (j = 0; j < list.nr; j++) {
const char *line = list.items[j].string;

View File

@@ -201,8 +201,7 @@ static void parse_pathspec_attr_match(struct pathspec_item *item, const char *va
if (!value || !*value)
die(_("attr spec must not be empty"));
string_list_split(&list, value, ' ', -1);
string_list_remove_empty_items(&list, 0);
string_list_split_f(&list, value, " ", -1, STRING_LIST_SPLIT_NONEMPTY);
item->attr_check = attr_check_alloc();
CALLOC_ARRAY(item->attr_match, list.nr);

View File

@@ -61,7 +61,7 @@ enum protocol_version determine_protocol_version_server(void)
if (git_protocol) {
struct string_list list = STRING_LIST_INIT_DUP;
const struct string_list_item *item;
string_list_split(&list, git_protocol, ':', -1);
string_list_split(&list, git_protocol, ":", -1);
for_each_string_list_item(item, &list) {
const char *value;

View File

@@ -435,7 +435,7 @@ static int remote_ref_atom_parser(struct ref_format *format UNUSED,
}
atom->u.remote_ref.nobracket = 0;
string_list_split(&params, arg, ',', -1);
string_list_split(&params, arg, ",", -1);
for (i = 0; i < params.nr; i++) {
const char *s = params.items[i].string;
@@ -831,7 +831,7 @@ static int align_atom_parser(struct ref_format *format UNUSED,
align->position = ALIGN_LEFT;
string_list_split(&params, arg, ',', -1);
string_list_split(&params, arg, ",", -1);
for (i = 0; i < params.nr; i++) {
const char *s = params.items[i].string;
int position;

View File

@@ -1460,8 +1460,9 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir,
if (env_ceiling_dirs) {
int empty_entry_found = 0;
static const char path_sep[] = { PATH_SEP, '\0' };
string_list_split(&ceiling_dirs, env_ceiling_dirs, PATH_SEP, -1);
string_list_split(&ceiling_dirs, env_ceiling_dirs, path_sep, -1);
filter_string_list(&ceiling_dirs, 0,
canonicalize_ceiling_entry, &empty_entry_found);
ceil_offset = longest_ancestor_length(dir->buf, &ceiling_dirs);

View File

@@ -276,55 +276,99 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_
list->nr--;
}
int string_list_split(struct string_list *list, const char *string,
int delim, int maxsplit)
/*
* append a substring [p..end] to list; return number of things it
* appended to the list.
*/
static int append_one(struct string_list *list,
const char *p, const char *end,
int in_place, unsigned flags)
{
if (!end)
end = p + strlen(p);
if ((flags & STRING_LIST_SPLIT_TRIM)) {
/* rtrim */
for (; p < end; end--)
if (!isspace(end[-1]))
break;
}
if ((flags & STRING_LIST_SPLIT_NONEMPTY) && (end <= p))
return 0;
if (in_place) {
*((char *)end) = '\0';
string_list_append(list, p);
} else {
string_list_append_nodup(list, xmemdupz(p, end - p));
}
return 1;
}
/*
* Unfortunately this cannot become a public interface, as _in_place()
* wants to have "const char *string" while the other variant wants to
* have "char *string" for type safety.
*
* This accepts "const char *string" to allow both wrappers to use it;
* it internally casts away the constness when in_place is true by
* taking advantage of strpbrk() that takes a "const char *" arg and
* returns "char *" pointer into that const string. Yucky but works ;-).
*/
static int split_string(struct string_list *list, const char *string, const char *delim,
int maxsplit, int in_place, unsigned flags)
{
int count = 0;
const char *p = string, *end;
const char *p = string;
if (in_place && list->strdup_strings)
BUG("string_list_split_in_place() called with strdup_strings");
else if (!in_place && !list->strdup_strings)
BUG("string_list_split() called without strdup_strings");
if (!list->strdup_strings)
die("internal error in string_list_split(): "
"list->strdup_strings must be set");
for (;;) {
count++;
if (maxsplit >= 0 && count > maxsplit) {
string_list_append(list, p);
return count;
char *end;
if (flags & STRING_LIST_SPLIT_TRIM) {
/* ltrim */
while (*p && isspace(*p))
p++;
}
end = strchr(p, delim);
if (end) {
string_list_append_nodup(list, xmemdupz(p, end - p));
p = end + 1;
} else {
string_list_append(list, p);
if (0 <= maxsplit && maxsplit <= count)
end = NULL;
else
end = strpbrk(p, delim);
count += append_one(list, p, end, in_place, flags);
if (!end)
return count;
}
p = end + 1;
}
}
int string_list_split(struct string_list *list, const char *string,
const char *delim, int maxsplit)
{
return split_string(list, string, delim, maxsplit, 0, 0);
}
int string_list_split_in_place(struct string_list *list, char *string,
const char *delim, int maxsplit)
{
int count = 0;
char *p = string, *end;
if (list->strdup_strings)
die("internal error in string_list_split_in_place(): "
"list->strdup_strings must not be set");
for (;;) {
count++;
if (maxsplit >= 0 && count > maxsplit) {
string_list_append(list, p);
return count;
}
end = strpbrk(p, delim);
if (end) {
*end = '\0';
string_list_append(list, p);
p = end + 1;
} else {
string_list_append(list, p);
return count;
}
}
return split_string(list, string, delim, maxsplit, 1, 0);
}
int string_list_split_f(struct string_list *list, const char *string,
const char *delim, int maxsplit, unsigned flags)
{
return split_string(list, string, delim, maxsplit, 0, flags);
}
int string_list_split_in_place_f(struct string_list *list, char *string,
const char *delim, int maxsplit, unsigned flags)
{
return split_string(list, string, delim, maxsplit, 1, flags);
}

View File

@@ -254,7 +254,7 @@ struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util);
/**
* Split string into substrings on character `delim` and append the
* Split string into substrings on characters in `delim` and append the
* substrings to `list`. The input string is not modified.
* list->strdup_strings must be set, as new memory needs to be
* allocated to hold the substrings. If maxsplit is non-negative,
@@ -262,15 +262,15 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_
* appended to list.
*
* Examples:
* string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"]
* string_list_split(l, "foo:bar:baz", ':', 0) -> ["foo:bar:baz"]
* string_list_split(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"]
* string_list_split(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""]
* string_list_split(l, "", ':', -1) -> [""]
* string_list_split(l, ":", ':', -1) -> ["", ""]
* string_list_split(l, "foo:bar:baz", ":", -1) -> ["foo", "bar", "baz"]
* string_list_split(l, "foo:bar:baz", ":", 0) -> ["foo:bar:baz"]
* string_list_split(l, "foo:bar:baz", ":", 1) -> ["foo", "bar:baz"]
* string_list_split(l, "foo:bar:", ":", -1) -> ["foo", "bar", ""]
* string_list_split(l, "", ":", -1) -> [""]
* string_list_split(l, ":", ":", -1) -> ["", ""]
*/
int string_list_split(struct string_list *list, const char *string,
int delim, int maxsplit);
const char *delim, int maxsplit);
/*
* Like string_list_split(), except that string is split in-place: the
@@ -281,4 +281,21 @@ int string_list_split(struct string_list *list, const char *string,
*/
int string_list_split_in_place(struct string_list *list, char *string,
const char *delim, int maxsplit);
/* Flag bits for split_f and split_in_place_f functions */
enum {
/*
* trim whitespaces around resulting string piece before adding
* it to the list
*/
STRING_LIST_SPLIT_TRIM = (1 << 0),
/* omit adding empty string piece to the resulting list */
STRING_LIST_SPLIT_NONEMPTY = (1 << 1),
};
int string_list_split_f(struct string_list *, const char *string,
const char *delim, int maxsplit, unsigned flags);
int string_list_split_in_place_f(struct string_list *, char *string,
const char *delim, int maxsplit, unsigned flags);
#endif /* STRING_LIST_H */

View File

@@ -154,8 +154,8 @@ int cmd__hashmap(int argc UNUSED, const char **argv UNUSED)
/* break line into command and up to two parameters */
string_list_setlen(&parts, 0);
string_list_split_in_place(&parts, line.buf, DELIM, 2);
string_list_remove_empty_items(&parts, 0);
string_list_split_in_place_f(&parts, line.buf, DELIM, 2,
STRING_LIST_SPLIT_NONEMPTY);
/* ignore empty lines */
if (!parts.nr)

View File

@@ -492,8 +492,8 @@ static int scripted(void)
/* break line into command and zero or more tokens */
string_list_setlen(&parts, 0);
string_list_split_in_place(&parts, line, " ", -1);
string_list_remove_empty_items(&parts, 0);
string_list_split_in_place_f(&parts, line, " ", -1,
STRING_LIST_SPLIT_NONEMPTY);
/* ignore empty lines */
if (!parts.nr || !*parts.items[0].string)

View File

@@ -348,6 +348,7 @@ int cmd__path_utils(int argc, const char **argv)
if (argc == 4 && !strcmp(argv[1], "longest_ancestor_length")) {
int len;
struct string_list ceiling_dirs = STRING_LIST_INIT_DUP;
const char path_sep[] = { PATH_SEP, '\0' };
char *path = xstrdup(argv[2]);
/*
@@ -362,7 +363,7 @@ int cmd__path_utils(int argc, const char **argv)
*/
if (normalize_path_copy(path, path))
die("Path \"%s\" could not be normalized", argv[2]);
string_list_split(&ceiling_dirs, argv[3], PATH_SEP, -1);
string_list_split(&ceiling_dirs, argv[3], path_sep, -1);
filter_string_list(&ceiling_dirs, 0,
normalize_ceiling_entry, NULL);
len = longest_ancestor_length(path, &ceiling_dirs);

View File

@@ -29,7 +29,7 @@ static unsigned int parse_flags(const char *str, struct flag_definition *defs)
if (!strcmp(str, "0"))
return 0;
string_list_split(&masks, str, ',', 64);
string_list_split(&masks, str, ",", 64);
for (size_t i = 0; i < masks.nr; i++) {
const char *name = masks.items[i].string;
struct flag_definition *def = defs;

View File

@@ -43,7 +43,7 @@ static void t_string_list_equal(struct string_list *list,
expected_strings->items[i].string);
}
static void t_string_list_split(const char *data, int delim, int maxsplit, ...)
static void t_string_list_split(const char *data, const char *delim, int maxsplit, ...)
{
struct string_list expected_strings = STRING_LIST_INIT_DUP;
struct string_list list = STRING_LIST_INIT_DUP;
@@ -63,15 +63,94 @@ static void t_string_list_split(const char *data, int delim, int maxsplit, ...)
string_list_clear(&list, 0);
}
static void t_string_list_split_f(const char *data, const char *delim,
int maxsplit, unsigned flags, ...)
{
struct string_list expected_strings = STRING_LIST_INIT_DUP;
struct string_list list = STRING_LIST_INIT_DUP;
va_list ap;
int len;
va_start(ap, flags);
t_vcreate_string_list_dup(&expected_strings, 0, ap);
va_end(ap);
string_list_clear(&list, 0);
len = string_list_split_f(&list, data, delim, maxsplit, flags);
cl_assert_equal_i(len, expected_strings.nr);
t_string_list_equal(&list, &expected_strings);
string_list_clear(&expected_strings, 0);
string_list_clear(&list, 0);
}
void test_string_list__split_f(void)
{
t_string_list_split_f("::foo:bar:baz:", ":", -1, 0,
"", "", "foo", "bar", "baz", "", NULL);
t_string_list_split_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM,
"foo", "bar", "baz", NULL);
t_string_list_split_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM,
"a", "b c", NULL);
t_string_list_split_f("::foo::bar:baz:", ":", -1, STRING_LIST_SPLIT_NONEMPTY,
"foo", "bar", "baz", NULL);
t_string_list_split_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY,
"foo", "baz", NULL);
t_string_list_split_f("foo :: : baz", ":", -1,
STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM,
"foo", "baz", NULL);
}
static void t_string_list_split_in_place_f(const char *data_, const char *delim,
int maxsplit, unsigned flags, ...)
{
struct string_list expected_strings = STRING_LIST_INIT_DUP;
struct string_list list = STRING_LIST_INIT_NODUP;
char *data = xstrdup(data_);
va_list ap;
int len;
va_start(ap, flags);
t_vcreate_string_list_dup(&expected_strings, 0, ap);
va_end(ap);
string_list_clear(&list, 0);
len = string_list_split_in_place_f(&list, data, delim, maxsplit, flags);
cl_assert_equal_i(len, expected_strings.nr);
t_string_list_equal(&list, &expected_strings);
free(data);
string_list_clear(&expected_strings, 0);
string_list_clear(&list, 0);
}
void test_string_list__split_in_place_f(void)
{
t_string_list_split_in_place_f("::foo:bar:baz:", ":", -1, 0,
"", "", "foo", "bar", "baz", "", NULL);
t_string_list_split_in_place_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM,
"foo", "bar", "baz", NULL);
t_string_list_split_in_place_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM,
"a", "b c", NULL);
t_string_list_split_in_place_f("::foo::bar:baz:", ":", -1,
STRING_LIST_SPLIT_NONEMPTY,
"foo", "bar", "baz", NULL);
t_string_list_split_in_place_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY,
"foo", "baz", NULL);
t_string_list_split_in_place_f("foo :: : baz", ":", -1,
STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM,
"foo", "baz", NULL);
}
void test_string_list__split(void)
{
t_string_list_split("foo:bar:baz", ':', -1, "foo", "bar", "baz", NULL);
t_string_list_split("foo:bar:baz", ':', 0, "foo:bar:baz", NULL);
t_string_list_split("foo:bar:baz", ':', 1, "foo", "bar:baz", NULL);
t_string_list_split("foo:bar:baz", ':', 2, "foo", "bar", "baz", NULL);
t_string_list_split("foo:bar:", ':', -1, "foo", "bar", "", NULL);
t_string_list_split("", ':', -1, "", NULL);
t_string_list_split(":", ':', -1, "", "", NULL);
t_string_list_split("foo:bar:baz", ":", -1, "foo", "bar", "baz", NULL);
t_string_list_split("foo:bar:baz", ":", 0, "foo:bar:baz", NULL);
t_string_list_split("foo:bar:baz", ":", 1, "foo", "bar:baz", NULL);
t_string_list_split("foo:bar:baz", ":", 2, "foo", "bar", "baz", NULL);
t_string_list_split("foo:bar:", ":", -1, "foo", "bar", "", NULL);
t_string_list_split("", ":", -1, "", NULL);
t_string_list_split(":", ":", -1, "", "", NULL);
}
static void t_string_list_split_in_place(const char *data, const char *delim,

View File

@@ -1042,7 +1042,7 @@ static const struct string_list *protocol_allow_list(void)
if (enabled < 0) {
const char *v = getenv("GIT_ALLOW_PROTOCOL");
if (v) {
string_list_split(&allowed, v, ':', -1);
string_list_split(&allowed, v, ":", -1);
string_list_sort(&allowed);
enabled = 1;
} else {

View File

@@ -1685,7 +1685,7 @@ static void process_args(struct packet_reader *request,
if (data->uri_protocols.nr)
send_err_and_die(data,
"multiple packfile-uris lines forbidden");
string_list_split(&data->uri_protocols, p, ',', -1);
string_list_split(&data->uri_protocols, p, ",", -1);
continue;
}