diff --git a/builtin/blame.c b/builtin/blame.c index 5b10e84b66..2703820258 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -413,7 +413,7 @@ static void parse_color_fields(const char *s) colorfield_nr = 0; /* Ideally this would be stripped and split at the same time? */ - string_list_split(&l, s, ',', -1); + string_list_split(&l, s, ",", -1); ALLOC_GROW(colorfield, colorfield_nr + 1, colorfield_alloc); for_each_string_list_item(item, &l) { diff --git a/builtin/merge.c b/builtin/merge.c index ce880e6ccb..c3810c9cb2 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -875,7 +875,7 @@ static void add_strategies(const char *string, unsigned attr) if (string) { struct string_list list = STRING_LIST_INIT_DUP; struct string_list_item *item; - string_list_split(&list, string, ' ', -1); + string_list_split(&list, string, " ", -1); for_each_string_list_item(item, &list) append_strategy(get_strategy(item->string)); string_list_clear(&list, 0); diff --git a/builtin/var.c b/builtin/var.c index a2d790d453..cc3a43cde2 100644 --- a/builtin/var.c +++ b/builtin/var.c @@ -182,7 +182,7 @@ static void list_vars(void) if (ptr->multivalued && *val) { struct string_list list = STRING_LIST_INIT_DUP; - string_list_split(&list, val, '\n', -1); + string_list_split(&list, val, "\n", -1); for (size_t i = 0; i < list.nr; i++) printf("%s=%s\n", ptr->name, list.items[i].string); string_list_clear(&list, 0); diff --git a/connect.c b/connect.c index 34cac24d54..8352b71faf 100644 --- a/connect.c +++ b/connect.c @@ -407,7 +407,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list, * name. Subsequent fields (symref-target and peeled) are optional and * don't have a particular order. */ - if (string_list_split(&line_sections, line, ' ', -1) < 2) { + if (string_list_split(&line_sections, line, " ", -1) < 2) { ret = 0; goto out; } diff --git a/diff.c b/diff.c index dca87e164f..70666ad2cd 100644 --- a/diff.c +++ b/diff.c @@ -327,29 +327,23 @@ static unsigned parse_color_moved_ws(const char *arg) struct string_list l = STRING_LIST_INIT_DUP; struct string_list_item *i; - string_list_split(&l, arg, ',', -1); + string_list_split_f(&l, arg, ",", -1, STRING_LIST_SPLIT_TRIM); for_each_string_list_item(i, &l) { - struct strbuf sb = STRBUF_INIT; - strbuf_addstr(&sb, i->string); - strbuf_trim(&sb); - - if (!strcmp(sb.buf, "no")) + if (!strcmp(i->string, "no")) ret = 0; - else if (!strcmp(sb.buf, "ignore-space-change")) + else if (!strcmp(i->string, "ignore-space-change")) ret |= XDF_IGNORE_WHITESPACE_CHANGE; - else if (!strcmp(sb.buf, "ignore-space-at-eol")) + else if (!strcmp(i->string, "ignore-space-at-eol")) ret |= XDF_IGNORE_WHITESPACE_AT_EOL; - else if (!strcmp(sb.buf, "ignore-all-space")) + else if (!strcmp(i->string, "ignore-all-space")) ret |= XDF_IGNORE_WHITESPACE; - else if (!strcmp(sb.buf, "allow-indentation-change")) + else if (!strcmp(i->string, "allow-indentation-change")) ret |= COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE; else { ret |= COLOR_MOVED_WS_ERROR; - error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), sb.buf); + error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), i->string); } - - strbuf_release(&sb); } if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) && diff --git a/fetch-pack.c b/fetch-pack.c index 46c39f85c4..20e5533b21 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1914,7 +1914,7 @@ static void fetch_pack_config(void) char *str; if (!repo_config_get_string(the_repository, "fetch.uriprotocols", &str) && str) { - string_list_split(&uri_protocols, str, ',', -1); + string_list_split(&uri_protocols, str, ",", -1); free(str); } } diff --git a/notes.c b/notes.c index 9736276f37..9a2e9181fe 100644 --- a/notes.c +++ b/notes.c @@ -894,7 +894,7 @@ static int string_list_add_note_lines(struct string_list *list, * later, along with any empty strings that came from empty * lines within the file. */ - string_list_split(list, data, '\n', -1); + string_list_split(list, data, "\n", -1); free(data); return 0; } @@ -973,8 +973,8 @@ void string_list_add_refs_from_colon_sep(struct string_list *list, char *globs_copy = xstrdup(globs); int i; - string_list_split_in_place(&split, globs_copy, ":", -1); - string_list_remove_empty_items(&split, 0); + string_list_split_in_place_f(&split, globs_copy, ":", -1, + STRING_LIST_SPLIT_NONEMPTY); for (i = 0; i < split.nr; i++) string_list_add_refs_by_glob(list, split.items[i].string); diff --git a/parse-options.c b/parse-options.c index 5224203ffe..9e7cb75192 100644 --- a/parse-options.c +++ b/parse-options.c @@ -1338,7 +1338,7 @@ static enum parse_opt_result usage_with_options_internal(struct parse_opt_ctx_t if (!saw_empty_line && !*str) saw_empty_line = 1; - string_list_split(&list, str, '\n', -1); + string_list_split(&list, str, "\n", -1); for (j = 0; j < list.nr; j++) { const char *line = list.items[j].string; diff --git a/pathspec.c b/pathspec.c index a3ddd701c7..5993c4afa0 100644 --- a/pathspec.c +++ b/pathspec.c @@ -201,8 +201,7 @@ static void parse_pathspec_attr_match(struct pathspec_item *item, const char *va if (!value || !*value) die(_("attr spec must not be empty")); - string_list_split(&list, value, ' ', -1); - string_list_remove_empty_items(&list, 0); + string_list_split_f(&list, value, " ", -1, STRING_LIST_SPLIT_NONEMPTY); item->attr_check = attr_check_alloc(); CALLOC_ARRAY(item->attr_match, list.nr); diff --git a/protocol.c b/protocol.c index 65f6621702..a3e26a8dd3 100644 --- a/protocol.c +++ b/protocol.c @@ -61,7 +61,7 @@ enum protocol_version determine_protocol_version_server(void) if (git_protocol) { struct string_list list = STRING_LIST_INIT_DUP; const struct string_list_item *item; - string_list_split(&list, git_protocol, ':', -1); + string_list_split(&list, git_protocol, ":", -1); for_each_string_list_item(item, &list) { const char *value; diff --git a/ref-filter.c b/ref-filter.c index 4edf0df4cc..520d2539c9 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -435,7 +435,7 @@ static int remote_ref_atom_parser(struct ref_format *format UNUSED, } atom->u.remote_ref.nobracket = 0; - string_list_split(¶ms, arg, ',', -1); + string_list_split(¶ms, arg, ",", -1); for (i = 0; i < params.nr; i++) { const char *s = params.items[i].string; @@ -831,7 +831,7 @@ static int align_atom_parser(struct ref_format *format UNUSED, align->position = ALIGN_LEFT; - string_list_split(¶ms, arg, ',', -1); + string_list_split(¶ms, arg, ",", -1); for (i = 0; i < params.nr; i++) { const char *s = params.items[i].string; int position; diff --git a/setup.c b/setup.c index 98ddbf377f..7086741e6c 100644 --- a/setup.c +++ b/setup.c @@ -1460,8 +1460,9 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir, if (env_ceiling_dirs) { int empty_entry_found = 0; + static const char path_sep[] = { PATH_SEP, '\0' }; - string_list_split(&ceiling_dirs, env_ceiling_dirs, PATH_SEP, -1); + string_list_split(&ceiling_dirs, env_ceiling_dirs, path_sep, -1); filter_string_list(&ceiling_dirs, 0, canonicalize_ceiling_entry, &empty_entry_found); ceil_offset = longest_ancestor_length(dir->buf, &ceiling_dirs); diff --git a/string-list.c b/string-list.c index 53faaa8420..343cf1ca90 100644 --- a/string-list.c +++ b/string-list.c @@ -276,55 +276,99 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ list->nr--; } -int string_list_split(struct string_list *list, const char *string, - int delim, int maxsplit) +/* + * append a substring [p..end] to list; return number of things it + * appended to the list. + */ +static int append_one(struct string_list *list, + const char *p, const char *end, + int in_place, unsigned flags) +{ + if (!end) + end = p + strlen(p); + + if ((flags & STRING_LIST_SPLIT_TRIM)) { + /* rtrim */ + for (; p < end; end--) + if (!isspace(end[-1])) + break; + } + + if ((flags & STRING_LIST_SPLIT_NONEMPTY) && (end <= p)) + return 0; + + if (in_place) { + *((char *)end) = '\0'; + string_list_append(list, p); + } else { + string_list_append_nodup(list, xmemdupz(p, end - p)); + } + return 1; +} + +/* + * Unfortunately this cannot become a public interface, as _in_place() + * wants to have "const char *string" while the other variant wants to + * have "char *string" for type safety. + * + * This accepts "const char *string" to allow both wrappers to use it; + * it internally casts away the constness when in_place is true by + * taking advantage of strpbrk() that takes a "const char *" arg and + * returns "char *" pointer into that const string. Yucky but works ;-). + */ +static int split_string(struct string_list *list, const char *string, const char *delim, + int maxsplit, int in_place, unsigned flags) { int count = 0; - const char *p = string, *end; + const char *p = string; + + if (in_place && list->strdup_strings) + BUG("string_list_split_in_place() called with strdup_strings"); + else if (!in_place && !list->strdup_strings) + BUG("string_list_split() called without strdup_strings"); - if (!list->strdup_strings) - die("internal error in string_list_split(): " - "list->strdup_strings must be set"); for (;;) { - count++; - if (maxsplit >= 0 && count > maxsplit) { - string_list_append(list, p); - return count; + char *end; + + if (flags & STRING_LIST_SPLIT_TRIM) { + /* ltrim */ + while (*p && isspace(*p)) + p++; } - end = strchr(p, delim); - if (end) { - string_list_append_nodup(list, xmemdupz(p, end - p)); - p = end + 1; - } else { - string_list_append(list, p); + + if (0 <= maxsplit && maxsplit <= count) + end = NULL; + else + end = strpbrk(p, delim); + + count += append_one(list, p, end, in_place, flags); + + if (!end) return count; - } + p = end + 1; } } +int string_list_split(struct string_list *list, const char *string, + const char *delim, int maxsplit) +{ + return split_string(list, string, delim, maxsplit, 0, 0); +} + int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit) { - int count = 0; - char *p = string, *end; - - if (list->strdup_strings) - die("internal error in string_list_split_in_place(): " - "list->strdup_strings must not be set"); - for (;;) { - count++; - if (maxsplit >= 0 && count > maxsplit) { - string_list_append(list, p); - return count; - } - end = strpbrk(p, delim); - if (end) { - *end = '\0'; - string_list_append(list, p); - p = end + 1; - } else { - string_list_append(list, p); - return count; - } - } + return split_string(list, string, delim, maxsplit, 1, 0); +} + +int string_list_split_f(struct string_list *list, const char *string, + const char *delim, int maxsplit, unsigned flags) +{ + return split_string(list, string, delim, maxsplit, 0, flags); +} + +int string_list_split_in_place_f(struct string_list *list, char *string, + const char *delim, int maxsplit, unsigned flags) +{ + return split_string(list, string, delim, maxsplit, 1, flags); } diff --git a/string-list.h b/string-list.h index 122b318641..2b438c7733 100644 --- a/string-list.h +++ b/string-list.h @@ -254,7 +254,7 @@ struct string_list_item *unsorted_string_list_lookup(struct string_list *list, void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util); /** - * Split string into substrings on character `delim` and append the + * Split string into substrings on characters in `delim` and append the * substrings to `list`. The input string is not modified. * list->strdup_strings must be set, as new memory needs to be * allocated to hold the substrings. If maxsplit is non-negative, @@ -262,15 +262,15 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ * appended to list. * * Examples: - * string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"] - * string_list_split(l, "foo:bar:baz", ':', 0) -> ["foo:bar:baz"] - * string_list_split(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"] - * string_list_split(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""] - * string_list_split(l, "", ':', -1) -> [""] - * string_list_split(l, ":", ':', -1) -> ["", ""] + * string_list_split(l, "foo:bar:baz", ":", -1) -> ["foo", "bar", "baz"] + * string_list_split(l, "foo:bar:baz", ":", 0) -> ["foo:bar:baz"] + * string_list_split(l, "foo:bar:baz", ":", 1) -> ["foo", "bar:baz"] + * string_list_split(l, "foo:bar:", ":", -1) -> ["foo", "bar", ""] + * string_list_split(l, "", ":", -1) -> [""] + * string_list_split(l, ":", ":", -1) -> ["", ""] */ int string_list_split(struct string_list *list, const char *string, - int delim, int maxsplit); + const char *delim, int maxsplit); /* * Like string_list_split(), except that string is split in-place: the @@ -281,4 +281,21 @@ int string_list_split(struct string_list *list, const char *string, */ int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit); + +/* Flag bits for split_f and split_in_place_f functions */ +enum { + /* + * trim whitespaces around resulting string piece before adding + * it to the list + */ + STRING_LIST_SPLIT_TRIM = (1 << 0), + /* omit adding empty string piece to the resulting list */ + STRING_LIST_SPLIT_NONEMPTY = (1 << 1), +}; + +int string_list_split_f(struct string_list *, const char *string, + const char *delim, int maxsplit, unsigned flags); + +int string_list_split_in_place_f(struct string_list *, char *string, + const char *delim, int maxsplit, unsigned flags); #endif /* STRING_LIST_H */ diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 16a3145c3a..56d223a283 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -154,8 +154,8 @@ int cmd__hashmap(int argc UNUSED, const char **argv UNUSED) /* break line into command and up to two parameters */ string_list_setlen(&parts, 0); - string_list_split_in_place(&parts, line.buf, DELIM, 2); - string_list_remove_empty_items(&parts, 0); + string_list_split_in_place_f(&parts, line.buf, DELIM, 2, + STRING_LIST_SPLIT_NONEMPTY); /* ignore empty lines */ if (!parts.nr) diff --git a/t/helper/test-json-writer.c b/t/helper/test-json-writer.c index a288069b04..f8316a7d29 100644 --- a/t/helper/test-json-writer.c +++ b/t/helper/test-json-writer.c @@ -492,8 +492,8 @@ static int scripted(void) /* break line into command and zero or more tokens */ string_list_setlen(&parts, 0); - string_list_split_in_place(&parts, line, " ", -1); - string_list_remove_empty_items(&parts, 0); + string_list_split_in_place_f(&parts, line, " ", -1, + STRING_LIST_SPLIT_NONEMPTY); /* ignore empty lines */ if (!parts.nr || !*parts.items[0].string) diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 086238c826..f5f33751da 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -348,6 +348,7 @@ int cmd__path_utils(int argc, const char **argv) if (argc == 4 && !strcmp(argv[1], "longest_ancestor_length")) { int len; struct string_list ceiling_dirs = STRING_LIST_INIT_DUP; + const char path_sep[] = { PATH_SEP, '\0' }; char *path = xstrdup(argv[2]); /* @@ -362,7 +363,7 @@ int cmd__path_utils(int argc, const char **argv) */ if (normalize_path_copy(path, path)) die("Path \"%s\" could not be normalized", argv[2]); - string_list_split(&ceiling_dirs, argv[3], PATH_SEP, -1); + string_list_split(&ceiling_dirs, argv[3], path_sep, -1); filter_string_list(&ceiling_dirs, 0, normalize_ceiling_entry, NULL); len = longest_ancestor_length(path, &ceiling_dirs); diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index b2380d57ba..83b06d39a3 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -29,7 +29,7 @@ static unsigned int parse_flags(const char *str, struct flag_definition *defs) if (!strcmp(str, "0")) return 0; - string_list_split(&masks, str, ',', 64); + string_list_split(&masks, str, ",", 64); for (size_t i = 0; i < masks.nr; i++) { const char *name = masks.items[i].string; struct flag_definition *def = defs; diff --git a/t/unit-tests/u-string-list.c b/t/unit-tests/u-string-list.c index d4ba5f9fa5..a2457d7b1e 100644 --- a/t/unit-tests/u-string-list.c +++ b/t/unit-tests/u-string-list.c @@ -43,7 +43,7 @@ static void t_string_list_equal(struct string_list *list, expected_strings->items[i].string); } -static void t_string_list_split(const char *data, int delim, int maxsplit, ...) +static void t_string_list_split(const char *data, const char *delim, int maxsplit, ...) { struct string_list expected_strings = STRING_LIST_INIT_DUP; struct string_list list = STRING_LIST_INIT_DUP; @@ -63,15 +63,94 @@ static void t_string_list_split(const char *data, int delim, int maxsplit, ...) string_list_clear(&list, 0); } +static void t_string_list_split_f(const char *data, const char *delim, + int maxsplit, unsigned flags, ...) +{ + struct string_list expected_strings = STRING_LIST_INIT_DUP; + struct string_list list = STRING_LIST_INIT_DUP; + va_list ap; + int len; + + va_start(ap, flags); + t_vcreate_string_list_dup(&expected_strings, 0, ap); + va_end(ap); + + string_list_clear(&list, 0); + len = string_list_split_f(&list, data, delim, maxsplit, flags); + cl_assert_equal_i(len, expected_strings.nr); + t_string_list_equal(&list, &expected_strings); + + string_list_clear(&expected_strings, 0); + string_list_clear(&list, 0); +} + +void test_string_list__split_f(void) +{ + t_string_list_split_f("::foo:bar:baz:", ":", -1, 0, + "", "", "foo", "bar", "baz", "", NULL); + t_string_list_split_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM, + "foo", "bar", "baz", NULL); + t_string_list_split_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, + "a", "b c", NULL); + t_string_list_split_f("::foo::bar:baz:", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "bar", "baz", NULL); + t_string_list_split_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "baz", NULL); + t_string_list_split_f("foo :: : baz", ":", -1, + STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM, + "foo", "baz", NULL); +} + +static void t_string_list_split_in_place_f(const char *data_, const char *delim, + int maxsplit, unsigned flags, ...) +{ + struct string_list expected_strings = STRING_LIST_INIT_DUP; + struct string_list list = STRING_LIST_INIT_NODUP; + char *data = xstrdup(data_); + va_list ap; + int len; + + va_start(ap, flags); + t_vcreate_string_list_dup(&expected_strings, 0, ap); + va_end(ap); + + string_list_clear(&list, 0); + len = string_list_split_in_place_f(&list, data, delim, maxsplit, flags); + cl_assert_equal_i(len, expected_strings.nr); + t_string_list_equal(&list, &expected_strings); + + free(data); + string_list_clear(&expected_strings, 0); + string_list_clear(&list, 0); +} + +void test_string_list__split_in_place_f(void) +{ + t_string_list_split_in_place_f("::foo:bar:baz:", ":", -1, 0, + "", "", "foo", "bar", "baz", "", NULL); + t_string_list_split_in_place_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM, + "foo", "bar", "baz", NULL); + t_string_list_split_in_place_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, + "a", "b c", NULL); + t_string_list_split_in_place_f("::foo::bar:baz:", ":", -1, + STRING_LIST_SPLIT_NONEMPTY, + "foo", "bar", "baz", NULL); + t_string_list_split_in_place_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "baz", NULL); + t_string_list_split_in_place_f("foo :: : baz", ":", -1, + STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM, + "foo", "baz", NULL); +} + void test_string_list__split(void) { - t_string_list_split("foo:bar:baz", ':', -1, "foo", "bar", "baz", NULL); - t_string_list_split("foo:bar:baz", ':', 0, "foo:bar:baz", NULL); - t_string_list_split("foo:bar:baz", ':', 1, "foo", "bar:baz", NULL); - t_string_list_split("foo:bar:baz", ':', 2, "foo", "bar", "baz", NULL); - t_string_list_split("foo:bar:", ':', -1, "foo", "bar", "", NULL); - t_string_list_split("", ':', -1, "", NULL); - t_string_list_split(":", ':', -1, "", "", NULL); + t_string_list_split("foo:bar:baz", ":", -1, "foo", "bar", "baz", NULL); + t_string_list_split("foo:bar:baz", ":", 0, "foo:bar:baz", NULL); + t_string_list_split("foo:bar:baz", ":", 1, "foo", "bar:baz", NULL); + t_string_list_split("foo:bar:baz", ":", 2, "foo", "bar", "baz", NULL); + t_string_list_split("foo:bar:", ":", -1, "foo", "bar", "", NULL); + t_string_list_split("", ":", -1, "", NULL); + t_string_list_split(":", ":", -1, "", "", NULL); } static void t_string_list_split_in_place(const char *data, const char *delim, diff --git a/transport.c b/transport.c index e305d6bd22..6ac8aa402b 100644 --- a/transport.c +++ b/transport.c @@ -1042,7 +1042,7 @@ static const struct string_list *protocol_allow_list(void) if (enabled < 0) { const char *v = getenv("GIT_ALLOW_PROTOCOL"); if (v) { - string_list_split(&allowed, v, ':', -1); + string_list_split(&allowed, v, ":", -1); string_list_sort(&allowed); enabled = 1; } else { diff --git a/upload-pack.c b/upload-pack.c index 4f26f6afc7..91fcdcad9b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1685,7 +1685,7 @@ static void process_args(struct packet_reader *request, if (data->uri_protocols.nr) send_err_and_die(data, "multiple packfile-uris lines forbidden"); - string_list_split(&data->uri_protocols, p, ',', -1); + string_list_split(&data->uri_protocols, p, ",", -1); continue; }