diff --git a/commit.c b/commit.c index 4385ae4329..f7ea9d9928 100644 --- a/commit.c +++ b/commit.c @@ -1637,12 +1637,12 @@ static int find_invalid_utf8(const char *buf, int len) } /* - * This verifies that the buffer is in proper utf8 format. + * This ensures that the buffer is in proper utf8 format. * * If it isn't, it assumes any non-utf8 characters are Latin1, * and does the conversion. */ -static int verify_utf8(struct strbuf *buf) +static int ensure_utf8(struct strbuf *buf) { int ok = 1; long pos = 0; @@ -1726,6 +1726,7 @@ int commit_tree_extended(const char *msg, size_t msg_len, struct repository *r = the_repository; int result = 0; int encoding_is_utf8; + bool warned = false; struct strbuf buffer = STRBUF_INIT, compat_buffer = STRBUF_INIT; struct strbuf sig = STRBUF_INIT, compat_sig = STRBUF_INIT; struct object_id *parent_buf = NULL, *compat_oid = NULL; @@ -1747,6 +1748,13 @@ int commit_tree_extended(const char *msg, size_t msg_len, oidcpy(&parent_buf[i++], &p->item->object.oid); write_commit_tree(&buffer, msg, msg_len, tree, parent_buf, nparents, author, committer, extra); + + /* And check the encoding. */ + if (encoding_is_utf8 && !ensure_utf8(&buffer)) { + fprintf(stderr, _(commit_utf8_warn)); + warned = true; + } + if (sign_commit && sign_buffer(&buffer, &sig, sign_commit, SIGN_BUFFER_USE_DEFAULT_KEY)) { result = -1; @@ -1780,6 +1788,9 @@ int commit_tree_extended(const char *msg, size_t msg_len, free_commit_extra_headers(compat_extra); free(mapped_parents); + if (encoding_is_utf8 && !ensure_utf8(&compat_buffer) && !warned) + fprintf(stderr, _(commit_utf8_warn)); + if (sign_commit && sign_buffer(&compat_buffer, &compat_sig, sign_commit, SIGN_BUFFER_USE_DEFAULT_KEY)) { @@ -1818,10 +1829,6 @@ int commit_tree_extended(const char *msg, size_t msg_len, } } - /* And check the encoding. */ - if (encoding_is_utf8 && (!verify_utf8(&buffer) || !verify_utf8(&compat_buffer))) - fprintf(stderr, _(commit_utf8_warn)); - if (r->compat_hash_algo) { hash_object_file(r->compat_hash_algo, compat_buffer.buf, compat_buffer.len, OBJ_COMMIT, &compat_oid_buf); diff --git a/t/t7510-signed-commit.sh b/t/t7510-signed-commit.sh index 1201c85ba6..aa9108da54 100755 --- a/t/t7510-signed-commit.sh +++ b/t/t7510-signed-commit.sh @@ -462,4 +462,14 @@ test_expect_success 'custom `gpg.program`' ' git commit -S --allow-empty -m signed-commit ' +test_expect_success GPG 'commit verifies with non-UTF-8 commit message' ' + printf "I hate\\376\\377UTF-8\\n" >message && + echo unusual-message >file && + git add file && + test_tick && git commit -S -F message 2>err && + git verify-commit HEAD && + grep "commit message did not conform to UTF-8" err >lines && + test_line_count = 1 lines +' + test_done