bloom: replace struct bloom_key * with struct bloom_keyvec

Previously, we stored bloom keys in a flat array and marked a commit
as NOT TREESAME if any key reported "definitely not changed".

To support multiple pathspec items, we now require that for each
pathspec item, there exists a bloom key reporting "definitely not
changed".

This "for every" condition makes a flat array insufficient, so we
introduce a new structure to group keys by a single pathspec item.
`struct bloom_keyvec` is introduced to replace `struct bloom_key *`
and `bloom_key_nr`. And because we want to support multiple pathspec
items, we added a bloom_keyvec * and a bloom_keyvec_nr field to
`struct rev_info` to represent an array of bloom_keyvecs. This commit
still optimize only one pathspec item, thus bloom_keyvec_nr can only
be 0 or 1.

New bloom_keyvec_* functions are added to create and destroy a keyvec.
bloom_filter_contains_vec() is added to check if all key in keyvec is
contained in a bloom filter.

Signed-off-by: Lidong Yan <502024330056@smail.nju.edu.cn>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Lidong Yan
2025-07-12 17:35:15 +08:00
committed by Junio C Hamano
parent b187353ed2
commit 90d5518a7d
4 changed files with 132 additions and 49 deletions

61
bloom.c
View File

@@ -278,6 +278,55 @@ void deinit_bloom_filters(void)
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
}
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
const struct bloom_filter_settings *settings)
{
struct bloom_keyvec *vec;
const char *p;
size_t sz;
size_t nr = 1;
p = path;
while (*p) {
/*
* At this point, the path is normalized to use Unix-style
* path separators. This is required due to how the
* changed-path Bloom filters store the paths.
*/
if (*p == '/')
nr++;
p++;
}
sz = sizeof(struct bloom_keyvec);
sz += nr * sizeof(struct bloom_key);
vec = (struct bloom_keyvec *)xcalloc(1, sz);
if (!vec)
return NULL;
vec->count = nr;
bloom_key_fill(&vec->key[0], path, len, settings);
nr = 1;
p = path + len - 1;
while (p > path) {
if (*p == '/') {
bloom_key_fill(&vec->key[nr++], path, p - path, settings);
}
p--;
}
assert(nr == vec->count);
return vec;
}
void bloom_keyvec_free(struct bloom_keyvec *vec)
{
if (!vec)
return;
for (size_t nr = 0; nr < vec->count; nr++)
bloom_key_clear(&vec->key[nr]);
free(vec);
}
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
const struct hashmap_entry *eptr,
const struct hashmap_entry *entry_or_key,
@@ -539,6 +588,18 @@ int bloom_filter_contains(const struct bloom_filter *filter,
return 1;
}
int bloom_filter_contains_vec(const struct bloom_filter *filter,
const struct bloom_keyvec *vec,
const struct bloom_filter_settings *settings)
{
int ret = 1;
for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
ret = bloom_filter_contains(filter, &vec->key[nr], settings);
return ret;
}
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
int version)
{