mirror of
https://github.com/git/git.git
synced 2025-12-12 20:36:24 +01:00
bloom: replace struct bloom_key * with struct bloom_keyvec
Previously, we stored bloom keys in a flat array and marked a commit as NOT TREESAME if any key reported "definitely not changed". To support multiple pathspec items, we now require that for each pathspec item, there exists a bloom key reporting "definitely not changed". This "for every" condition makes a flat array insufficient, so we introduce a new structure to group keys by a single pathspec item. `struct bloom_keyvec` is introduced to replace `struct bloom_key *` and `bloom_key_nr`. And because we want to support multiple pathspec items, we added a bloom_keyvec * and a bloom_keyvec_nr field to `struct rev_info` to represent an array of bloom_keyvecs. This commit still optimize only one pathspec item, thus bloom_keyvec_nr can only be 0 or 1. New bloom_keyvec_* functions are added to create and destroy a keyvec. bloom_filter_contains_vec() is added to check if all key in keyvec is contained in a bloom filter. Signed-off-by: Lidong Yan <502024330056@smail.nju.edu.cn> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
committed by
Junio C Hamano
parent
b187353ed2
commit
90d5518a7d
61
bloom.c
61
bloom.c
@@ -278,6 +278,55 @@ void deinit_bloom_filters(void)
|
||||
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
|
||||
}
|
||||
|
||||
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
struct bloom_keyvec *vec;
|
||||
const char *p;
|
||||
size_t sz;
|
||||
size_t nr = 1;
|
||||
|
||||
p = path;
|
||||
while (*p) {
|
||||
/*
|
||||
* At this point, the path is normalized to use Unix-style
|
||||
* path separators. This is required due to how the
|
||||
* changed-path Bloom filters store the paths.
|
||||
*/
|
||||
if (*p == '/')
|
||||
nr++;
|
||||
p++;
|
||||
}
|
||||
|
||||
sz = sizeof(struct bloom_keyvec);
|
||||
sz += nr * sizeof(struct bloom_key);
|
||||
vec = (struct bloom_keyvec *)xcalloc(1, sz);
|
||||
if (!vec)
|
||||
return NULL;
|
||||
vec->count = nr;
|
||||
|
||||
bloom_key_fill(&vec->key[0], path, len, settings);
|
||||
nr = 1;
|
||||
p = path + len - 1;
|
||||
while (p > path) {
|
||||
if (*p == '/') {
|
||||
bloom_key_fill(&vec->key[nr++], path, p - path, settings);
|
||||
}
|
||||
p--;
|
||||
}
|
||||
assert(nr == vec->count);
|
||||
return vec;
|
||||
}
|
||||
|
||||
void bloom_keyvec_free(struct bloom_keyvec *vec)
|
||||
{
|
||||
if (!vec)
|
||||
return;
|
||||
for (size_t nr = 0; nr < vec->count; nr++)
|
||||
bloom_key_clear(&vec->key[nr]);
|
||||
free(vec);
|
||||
}
|
||||
|
||||
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
|
||||
const struct hashmap_entry *eptr,
|
||||
const struct hashmap_entry *entry_or_key,
|
||||
@@ -539,6 +588,18 @@ int bloom_filter_contains(const struct bloom_filter *filter,
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bloom_filter_contains_vec(const struct bloom_filter *filter,
|
||||
const struct bloom_keyvec *vec,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
|
||||
ret = bloom_filter_contains(filter, &vec->key[nr], settings);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
|
||||
int version)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user