From bc35ac1a6a844fc3484833afee41260e1d47042e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 20 Nov 2018 04:46:38 -0500 Subject: [PATCH 1/3] pack-objects: fix tree_depth and layer invariants Commit 108f530385 (pack-objects: move tree_depth into 'struct packing_data', 2018-08-16) dynamically manages a tree_depth array in packing_data that maintains one of these invariants: 1. tree_depth is NULL (i.e., the requested options don't require us to track tree depths) 2. tree_depth is non-NULL and has as many entries as the "objects" array We maintain (2) by: a. When the objects array grows, grow tree_depth to the same size (unless it's NULL, in which case we can leave it). b. When a caller asks to set a depth via oe_set_tree_depth(), if tree_depth is NULL we allocate it. But in (b), we use the number of stored objects, _not_ the allocated size of the objects array. So we can run into a situation like this: 1. packlist_alloc() needs to store the Nth object, so it grows the objects array to M, where M > N. 2. oe_set_tree_depth() wants to store a depth, so it allocates an array of length N. Now we've violated our invariant. 3. packlist_alloc() needs to store the N+1th object. But it _doesn't_ grow the objects array, since N <= M still holds. We try to assign to tree_depth[N+1], which is out of bounds. That doesn't happen in our test scripts, because the repositories they use are so small, but it's easy to trigger by running: echo HEAD | git pack-objects --revs --delta-islands --stdout >/dev/null in any reasonably-sized repo (like git.git). We can fix it by always growing the array to match pack->nr_alloc, not pack->nr_objects. Likewise for the "layer" array from fe0ac2fb7f (pack-objects: move 'layer' into 'struct packing_data', 2018-08-16), which has the same bug. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-objects.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pack-objects.h b/pack-objects.h index ad3c208764..741e6f862d 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -364,7 +364,7 @@ static inline void oe_set_tree_depth(struct packing_data *pack, unsigned int tree_depth) { if (!pack->tree_depth) - ALLOC_ARRAY(pack->tree_depth, pack->nr_objects); + ALLOC_ARRAY(pack->tree_depth, pack->nr_alloc); pack->tree_depth[e - pack->objects] = tree_depth; } @@ -381,7 +381,7 @@ static inline void oe_set_layer(struct packing_data *pack, unsigned char layer) { if (!pack->layer) - ALLOC_ARRAY(pack->layer, pack->nr_objects); + ALLOC_ARRAY(pack->layer, pack->nr_alloc); pack->layer[e - pack->objects] = layer; } From e159b8107190aa53b27f9f106e5874597106eb88 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 20 Nov 2018 04:48:57 -0500 Subject: [PATCH 2/3] pack-objects: zero-initialize tree_depth/layer arrays Commit 108f530385 (pack-objects: move tree_depth into 'struct packing_data', 2018-08-16) started maintaining a tree_depth array that matches the "objects" array. We extend the array when: 1. The objects array is extended, in which case we use realloc to extend the tree_depth array. 2. A caller asks to store a tree_depth for object N, and this is the first such request; we create the array from scratch and store the value for N. In the latter case, though, we use regular xmalloc(), and the depth values for any objects besides N is undefined. This happens to not trigger a bug with the current code, but the reasons are quite subtle: - we never ask about the depth for any object with index i < N. This is because we store the depth immediately for all trees and blobs. So any such "i" must be a non-tree, and therefore we will never need to care about its depth (in fact, we really only care about the depth of trees). - there are no objects at this point with index i > N, because we always fill in the depth for a tree immediately after its object entry is created (we may still allocate uninitialized depth entries, but they'll be initialized by packlist_alloc() when it initializes the entry in the "objects" array). So it works, but only by chance. To be defensive, let's zero the array, which matches the "unset" values which would be handed out by oe_tree_depth() already. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-compat-util.h | 1 + pack-objects.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/git-compat-util.h b/git-compat-util.h index 9a64998b24..b904af3f22 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -844,6 +844,7 @@ extern FILE *fopen_or_warn(const char *path, const char *mode); #define FREE_AND_NULL(p) do { free(p); (p) = NULL; } while (0) #define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc))) +#define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x))); #define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc))) #define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \ diff --git a/pack-objects.h b/pack-objects.h index 741e6f862d..a89f76e5b1 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -364,7 +364,7 @@ static inline void oe_set_tree_depth(struct packing_data *pack, unsigned int tree_depth) { if (!pack->tree_depth) - ALLOC_ARRAY(pack->tree_depth, pack->nr_alloc); + CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc); pack->tree_depth[e - pack->objects] = tree_depth; } @@ -381,7 +381,7 @@ static inline void oe_set_layer(struct packing_data *pack, unsigned char layer) { if (!pack->layer) - ALLOC_ARRAY(pack->layer, pack->nr_alloc); + CALLOC_ARRAY(pack->layer, pack->nr_alloc); pack->layer[e - pack->objects] = layer; } From 3949053617652693b659a933a1393f8be189f26a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 20 Nov 2018 04:50:53 -0500 Subject: [PATCH 3/3] pack-objects: fix off-by-one in delta-island tree-depth computation When delta-islands are in use, we need to record the deepest path at which we find each tree and blob. Our loop to do so counts slashes, so "foo" is depth 0, "foo/bar" is depth 1, and so on. However, this neglects root trees, which are represented by the empty string. Those also have depth 0, but are at a layer above "foo". Thus, "foo" should be 1, "foo/bar" at 2, and so on. We use this depth to topo-sort the trees in resolve_tree_islands(). As a result, we may fail to visit a root tree before the sub-trees it contains, and therefore not correctly pass down the island marks. That in turn could lead to missing some delta opportunities (objects are in the same island, but we didn't realize it) or creating unwanted cross-island deltas (one object is in an island another isn't, but we don't realize). In practice, it seems to have only a small effect. Some experiments on the real-world git/git fork network at GitHub showed an improvement of only 0.14% in the resulting clone size. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index d5d91eeed5..3d70ab1f42 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2709,9 +2709,11 @@ static void show_object(struct object *obj, const char *name, void *data) if (use_delta_islands) { const char *p; - unsigned depth = 0; + unsigned depth; struct object_entry *ent; + /* the empty string is a root tree, which is depth 0 */ + depth = *name ? 1 : 0; for (p = strchr(name, '/'); p; p = strchr(p + 1, '/')) depth++;