mirror of
https://github.com/git/git.git
synced 2026-05-31 11:19:02 +02:00
Merge branch 'js/objects-larger-than-4gb-on-windows'
Update code paths that assumed "unsigned long" was long enough for "size_t". * js/objects-larger-than-4gb-on-windows: ci: run expensive tests on push builds to integration branches t5608: mark >4GB tests as EXPENSIVE test-tool synthesize: add precomputed SHA-256 pack for 4 GiB + 1 test-tool synthesize: precompute pack for 4 GiB + 1 test-tool synthesize: use the unsafe hash for speed t5608: add regression test for >4GB object clone test-tool: add a helper to synthesize large packfiles delta, packfile: use size_t for delta header sizes odb, packfile: use size_t for streaming object sizes git-zlib: handle data streams larger than 4GB index-pack, unpack-objects: use size_t for object size
This commit is contained in:
@@ -872,6 +872,7 @@ TEST_BUILTINS_OBJS += test-submodule-config.o
|
||||
TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o
|
||||
TEST_BUILTINS_OBJS += test-submodule.o
|
||||
TEST_BUILTINS_OBJS += test-subprocess.o
|
||||
TEST_BUILTINS_OBJS += test-synthesize.o
|
||||
TEST_BUILTINS_OBJS += test-trace2.o
|
||||
TEST_BUILTINS_OBJS += test-truncate.o
|
||||
TEST_BUILTINS_OBJS += test-userdiff.o
|
||||
|
||||
@@ -37,7 +37,7 @@ static const char index_pack_usage[] =
|
||||
|
||||
struct object_entry {
|
||||
struct pack_idx_entry idx;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
unsigned char hdr_size;
|
||||
signed char type;
|
||||
signed char real_type;
|
||||
@@ -469,7 +469,7 @@ static int is_delta_type(enum object_type type)
|
||||
return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
|
||||
}
|
||||
|
||||
static void *unpack_entry_data(off_t offset, unsigned long size,
|
||||
static void *unpack_entry_data(off_t offset, size_t size,
|
||||
enum object_type type, struct object_id *oid)
|
||||
{
|
||||
static char fixed_buf[8192];
|
||||
@@ -524,7 +524,7 @@ static void *unpack_raw_entry(struct object_entry *obj,
|
||||
struct object_id *oid)
|
||||
{
|
||||
unsigned char *p;
|
||||
unsigned long size, c;
|
||||
size_t size, c;
|
||||
off_t base_offset;
|
||||
unsigned shift;
|
||||
void *data;
|
||||
@@ -539,6 +539,8 @@ static void *unpack_raw_entry(struct object_entry *obj,
|
||||
size = (c & 15);
|
||||
shift = 4;
|
||||
while (c & 0x80) {
|
||||
if ((bitsizeof(size_t) - 7) < shift)
|
||||
die(_("object size too large for this platform"));
|
||||
p = fill(1);
|
||||
c = *p;
|
||||
use(1);
|
||||
|
||||
+22
-12
@@ -629,14 +629,21 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
|
||||
struct packed_git *p = IN_PACK(entry);
|
||||
struct pack_window *w_curs = NULL;
|
||||
uint32_t pos;
|
||||
off_t offset;
|
||||
off_t offset, cur;
|
||||
enum object_type type = oe_type(entry);
|
||||
enum object_type in_pack_type;
|
||||
off_t datalen;
|
||||
unsigned char header[MAX_PACK_OBJECT_HEADER],
|
||||
dheader[MAX_PACK_OBJECT_HEADER];
|
||||
unsigned hdrlen;
|
||||
const unsigned hashsz = the_hash_algo->rawsz;
|
||||
unsigned long entry_size = SIZE(entry);
|
||||
size_t entry_size;
|
||||
|
||||
cur = entry->in_pack_offset;
|
||||
in_pack_type = unpack_object_header(p, &w_curs, &cur, &entry_size);
|
||||
if (in_pack_type < 0)
|
||||
die(_("write_reuse_object: unable to parse object header of %s"),
|
||||
oid_to_hex(&entry->idx.oid));
|
||||
|
||||
if (DELTA(entry))
|
||||
type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
|
||||
@@ -664,7 +671,8 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
|
||||
datalen -= entry->in_pack_header_size;
|
||||
|
||||
if (!pack_to_stdout && p->index_version == 1 &&
|
||||
check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
|
||||
check_pack_inflate(p, &w_curs, offset, datalen,
|
||||
cast_size_t_to_ulong(entry_size))) {
|
||||
error(_("corrupt packed object for %s"),
|
||||
oid_to_hex(&entry->idx.oid));
|
||||
unuse_pack(&w_curs);
|
||||
@@ -1087,7 +1095,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
|
||||
{
|
||||
off_t offset, next, cur;
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
|
||||
offset = pack_pos_to_offset(reuse_packfile, pos);
|
||||
next = pack_pos_to_offset(reuse_packfile, pos + 1);
|
||||
@@ -2243,7 +2251,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
|
||||
off_t ofs;
|
||||
unsigned char *buf, c;
|
||||
enum object_type type;
|
||||
unsigned long in_pack_size;
|
||||
size_t in_pack_size;
|
||||
|
||||
buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
|
||||
|
||||
@@ -2270,7 +2278,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
|
||||
default:
|
||||
/* Not a delta hence we've already got all we need. */
|
||||
oe_set_type(entry, entry->in_pack_type);
|
||||
SET_SIZE(entry, in_pack_size);
|
||||
SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size));
|
||||
entry->in_pack_header_size = used;
|
||||
if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
|
||||
goto give_up;
|
||||
@@ -2324,8 +2332,8 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
|
||||
if (have_base &&
|
||||
can_reuse_delta(&base_ref, entry, &base_entry)) {
|
||||
oe_set_type(entry, entry->in_pack_type);
|
||||
SET_SIZE(entry, in_pack_size); /* delta size */
|
||||
SET_DELTA_SIZE(entry, in_pack_size);
|
||||
SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size)); /* delta size */
|
||||
SET_DELTA_SIZE(entry, cast_size_t_to_ulong(in_pack_size));
|
||||
|
||||
if (base_entry) {
|
||||
SET_DELTA(entry, base_entry);
|
||||
@@ -2734,16 +2742,18 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
|
||||
struct pack_window *w_curs;
|
||||
unsigned char *buf;
|
||||
enum object_type type;
|
||||
unsigned long used, avail, size;
|
||||
unsigned long used, avail;
|
||||
size_t size;
|
||||
|
||||
if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
|
||||
unsigned long sz;
|
||||
packing_data_lock(&to_pack);
|
||||
if (odb_read_object_info(the_repository->objects,
|
||||
&e->idx.oid, &size) < 0)
|
||||
&e->idx.oid, &sz) < 0)
|
||||
die(_("unable to get size of %s"),
|
||||
oid_to_hex(&e->idx.oid));
|
||||
packing_data_unlock(&to_pack);
|
||||
return size;
|
||||
return sz;
|
||||
}
|
||||
|
||||
p = oe_in_pack(pack, e);
|
||||
@@ -2760,7 +2770,7 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
|
||||
|
||||
unuse_pack(&w_curs);
|
||||
packing_data_unlock(&to_pack);
|
||||
return size;
|
||||
return cast_size_t_to_ulong(size);
|
||||
}
|
||||
|
||||
static int try_delta(struct unpacked *trg, struct unpacked *src,
|
||||
|
||||
@@ -533,7 +533,7 @@ static void unpack_one(unsigned nr)
|
||||
{
|
||||
unsigned shift;
|
||||
unsigned char *pack;
|
||||
unsigned long size, c;
|
||||
size_t size, c;
|
||||
enum object_type type;
|
||||
|
||||
obj_list[nr].offset = consumed_bytes;
|
||||
@@ -545,6 +545,8 @@ static void unpack_one(unsigned nr)
|
||||
size = (c & 15);
|
||||
shift = 4;
|
||||
while (c & 0x80) {
|
||||
if ((bitsizeof(size_t) - 7) < shift)
|
||||
die(_("object size too large for this platform"));
|
||||
pack = fill(1);
|
||||
c = *pack;
|
||||
use(1);
|
||||
|
||||
@@ -314,6 +314,15 @@ export DEFAULT_TEST_TARGET=prove
|
||||
export GIT_TEST_CLONE_2GB=true
|
||||
export SKIP_DASHED_BUILT_INS=YesPlease
|
||||
|
||||
# Enable expensive tests on push builds to integration branches, but
|
||||
# not on PR builds where the extra time is not justified for every
|
||||
# iteration.
|
||||
case "$GITHUB_EVENT_NAME,$CI_BRANCH" in
|
||||
push,*next*|push,*master*|push,*main*|push,*maint*)
|
||||
export GIT_TEST_LONG=YesPlease
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$distro" in
|
||||
ubuntu-*)
|
||||
# Python 2 is end of life, and Ubuntu 23.04 and newer don't actually
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
# define z_stream_s zng_stream_s
|
||||
# define gz_header_s zng_gz_header_s
|
||||
|
||||
# define adler32(adler, buf, len) zng_adler32(adler, buf, len)
|
||||
|
||||
# define crc32(crc, buf, len) zng_crc32(crc, buf, len)
|
||||
|
||||
# define inflate(strm, bits) zng_inflate(strm, bits)
|
||||
|
||||
@@ -86,8 +86,11 @@ void *patch_delta(const void *src_buf, unsigned long src_size,
|
||||
* This must be called twice on the delta data buffer, first to get the
|
||||
* expected source buffer size, and again to get the target buffer size.
|
||||
*/
|
||||
static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
|
||||
const unsigned char *top)
|
||||
/*
|
||||
* Size_t variant that doesn't truncate - use for >4GB objects on Windows.
|
||||
*/
|
||||
static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
|
||||
const unsigned char *top)
|
||||
{
|
||||
const unsigned char *data = *datap;
|
||||
size_t cmd, size = 0;
|
||||
@@ -98,6 +101,13 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
|
||||
i += 7;
|
||||
} while (cmd & 0x80 && data < top);
|
||||
*datap = data;
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
|
||||
const unsigned char *top)
|
||||
{
|
||||
size_t size = get_delta_hdr_size_sz(datap, top);
|
||||
return cast_size_t_to_ulong(size);
|
||||
}
|
||||
|
||||
|
||||
+17
-8
@@ -30,6 +30,9 @@ static const char *zerr_to_string(int status)
|
||||
*/
|
||||
/* #define ZLIB_BUF_MAX ((uInt)-1) */
|
||||
#define ZLIB_BUF_MAX ((uInt) 1024 * 1024 * 1024) /* 1GB */
|
||||
|
||||
/* uLong is 32-bit on Windows, even on 64-bit systems */
|
||||
#define ULONG_MAX_VALUE maximum_unsigned_value_of_type(uLong)
|
||||
static inline uInt zlib_buf_cap(unsigned long len)
|
||||
{
|
||||
return (ZLIB_BUF_MAX < len) ? ZLIB_BUF_MAX : len;
|
||||
@@ -39,31 +42,37 @@ static void zlib_pre_call(git_zstream *s)
|
||||
{
|
||||
s->z.next_in = s->next_in;
|
||||
s->z.next_out = s->next_out;
|
||||
s->z.total_in = s->total_in;
|
||||
s->z.total_out = s->total_out;
|
||||
s->z.total_in = (uLong)(s->total_in & ULONG_MAX_VALUE);
|
||||
s->z.total_out = (uLong)(s->total_out & ULONG_MAX_VALUE);
|
||||
s->z.avail_in = zlib_buf_cap(s->avail_in);
|
||||
s->z.avail_out = zlib_buf_cap(s->avail_out);
|
||||
}
|
||||
|
||||
static void zlib_post_call(git_zstream *s, int status)
|
||||
{
|
||||
unsigned long bytes_consumed;
|
||||
unsigned long bytes_produced;
|
||||
size_t bytes_consumed;
|
||||
size_t bytes_produced;
|
||||
|
||||
bytes_consumed = s->z.next_in - s->next_in;
|
||||
bytes_produced = s->z.next_out - s->next_out;
|
||||
if (s->z.total_out != s->total_out + bytes_produced)
|
||||
/*
|
||||
* zlib's total_out/total_in are uLong which may wrap for >4GB.
|
||||
* We track our own totals and verify only the low bits match.
|
||||
*/
|
||||
if ((s->z.total_out & ULONG_MAX_VALUE) !=
|
||||
((s->total_out + bytes_produced) & ULONG_MAX_VALUE))
|
||||
BUG("total_out mismatch");
|
||||
/*
|
||||
* zlib does not update total_in when it returns Z_NEED_DICT,
|
||||
* causing a mismatch here. Skip the sanity check in that case.
|
||||
*/
|
||||
if (status != Z_NEED_DICT &&
|
||||
s->z.total_in != s->total_in + bytes_consumed)
|
||||
(s->z.total_in & ULONG_MAX_VALUE) !=
|
||||
((s->total_in + bytes_consumed) & ULONG_MAX_VALUE))
|
||||
BUG("total_in mismatch");
|
||||
|
||||
s->total_out = s->z.total_out;
|
||||
s->total_in = s->z.total_in;
|
||||
s->total_out += bytes_produced;
|
||||
s->total_in += bytes_consumed;
|
||||
/* zlib-ng marks `next_in` as `const`, so we have to cast it away. */
|
||||
s->next_in = (unsigned char *) s->z.next_in;
|
||||
s->next_out = s->z.next_out;
|
||||
|
||||
+2
-2
@@ -7,8 +7,8 @@ typedef struct git_zstream {
|
||||
struct z_stream_s z;
|
||||
unsigned long avail_in;
|
||||
unsigned long avail_out;
|
||||
unsigned long total_in;
|
||||
unsigned long total_out;
|
||||
size_t total_in;
|
||||
size_t total_out;
|
||||
unsigned char *next_in;
|
||||
unsigned char *next_out;
|
||||
} git_zstream;
|
||||
|
||||
+10
-2
@@ -1118,7 +1118,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
|
||||
} while (ret == Z_OK || ret == Z_BUF_ERROR);
|
||||
|
||||
if (stream.total_in != len + hdrlen)
|
||||
die(_("write stream object %ld != %"PRIuMAX), stream.total_in,
|
||||
die(_("write stream object %"PRIuMAX" != %"PRIuMAX), (uintmax_t)stream.total_in,
|
||||
(uintmax_t)len + hdrlen);
|
||||
|
||||
/*
|
||||
@@ -2326,6 +2326,7 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
struct odb_loose_read_stream *st;
|
||||
unsigned long mapsize;
|
||||
unsigned long size_ul;
|
||||
void *mapped;
|
||||
|
||||
mapped = odb_source_loose_map_object(source, oid, &mapsize);
|
||||
@@ -2349,11 +2350,18 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
|
||||
goto error;
|
||||
}
|
||||
|
||||
oi.sizep = &st->base.size;
|
||||
/*
|
||||
* object_info.sizep is unsigned long* (32-bit on Windows), but
|
||||
* st->base.size is size_t (64-bit). Use temporary variable.
|
||||
* Note: loose objects >4GB would still truncate here, but such
|
||||
* large loose objects are uncommon (they'd normally be packed).
|
||||
*/
|
||||
oi.sizep = &size_ul;
|
||||
oi.typep = &st->base.type;
|
||||
|
||||
if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
|
||||
goto error;
|
||||
st->base.size = size_ul;
|
||||
|
||||
st->mapped = mapped;
|
||||
st->mapsize = mapsize;
|
||||
|
||||
+12
-1
@@ -157,15 +157,26 @@ static int open_istream_incore(struct odb_read_stream **out,
|
||||
.base.read = read_istream_incore,
|
||||
};
|
||||
struct odb_incore_read_stream *st;
|
||||
unsigned long size_ul;
|
||||
int ret;
|
||||
|
||||
oi.typep = &stream.base.type;
|
||||
oi.sizep = &stream.base.size;
|
||||
/*
|
||||
* object_info.sizep is unsigned long* (32-bit on Windows), but
|
||||
* stream.base.size is size_t (64-bit). We use a temporary variable
|
||||
* because the types are incompatible. Note: this path still truncates
|
||||
* for >4GB objects, but large objects should use pack streaming
|
||||
* (packfile_store_read_object_stream) which handles size_t properly.
|
||||
* This incore fallback is only used for small objects or when pack
|
||||
* streaming is unavailable.
|
||||
*/
|
||||
oi.sizep = &size_ul;
|
||||
oi.contentp = (void **)&stream.buf;
|
||||
ret = odb_read_object_info_extended(odb, oid, &oi,
|
||||
OBJECT_INFO_DIE_IF_CORRUPT);
|
||||
if (ret)
|
||||
return ret;
|
||||
stream.base.size = size_ul;
|
||||
|
||||
CALLOC_ARRAY(st, 1);
|
||||
*st = stream;
|
||||
|
||||
+1
-1
@@ -21,7 +21,7 @@ struct odb_read_stream {
|
||||
odb_read_stream_close_fn close;
|
||||
odb_read_stream_read_fn read;
|
||||
enum object_type type;
|
||||
unsigned long size; /* inflated size of full object */
|
||||
size_t size; /* inflated size of full object */
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -6,7 +6,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
|
||||
{
|
||||
enum object_type type;
|
||||
unsigned long len;
|
||||
size_t len;
|
||||
|
||||
unpack_object_header_buffer((const unsigned char *)data,
|
||||
(unsigned long)size, &type, &len);
|
||||
|
||||
+1
-1
@@ -2270,7 +2270,7 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
|
||||
{
|
||||
off_t delta_obj_offset;
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
|
||||
if (pack_pos >= pack->p->num_objects)
|
||||
return -1; /* not actually in the pack */
|
||||
|
||||
+4
-2
@@ -110,7 +110,7 @@ static int verify_packfile(struct repository *r,
|
||||
void *data;
|
||||
struct object_id oid;
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
off_t curpos;
|
||||
int data_valid;
|
||||
|
||||
@@ -143,7 +143,9 @@ static int verify_packfile(struct repository *r,
|
||||
data = NULL;
|
||||
data_valid = 0;
|
||||
} else {
|
||||
data = unpack_entry(r, p, entries[i].offset, &type, &size);
|
||||
unsigned long sz;
|
||||
data = unpack_entry(r, p, entries[i].offset, &type, &sz);
|
||||
size = sz;
|
||||
data_valid = 1;
|
||||
}
|
||||
|
||||
|
||||
+39
-18
@@ -1133,7 +1133,7 @@ out:
|
||||
}
|
||||
|
||||
unsigned long unpack_object_header_buffer(const unsigned char *buf,
|
||||
unsigned long len, enum object_type *type, unsigned long *sizep)
|
||||
unsigned long len, enum object_type *type, size_t *sizep)
|
||||
{
|
||||
unsigned shift;
|
||||
size_t size, c;
|
||||
@@ -1144,7 +1144,11 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
|
||||
size = c & 15;
|
||||
shift = 4;
|
||||
while (c & 0x80) {
|
||||
if (len <= used || (bitsizeof(long) - 7) < shift) {
|
||||
/*
|
||||
* Each continuation byte adds 7 bits. Ensure shift won't
|
||||
* overflow size_t (use size_t not long for 64-bit on Windows).
|
||||
*/
|
||||
if (len <= used || (bitsizeof(size_t) - 7) < shift) {
|
||||
error("bad object header");
|
||||
size = used = 0;
|
||||
break;
|
||||
@@ -1153,13 +1157,16 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
|
||||
size = st_add(size, st_left_shift(c & 0x7f, shift));
|
||||
shift += 7;
|
||||
}
|
||||
*sizep = cast_size_t_to_ulong(size);
|
||||
*sizep = size;
|
||||
return used;
|
||||
}
|
||||
|
||||
unsigned long get_size_from_delta(struct packed_git *p,
|
||||
struct pack_window **w_curs,
|
||||
off_t curpos)
|
||||
/*
|
||||
* Size_t variant for >4GB delta results on Windows.
|
||||
*/
|
||||
static size_t get_size_from_delta_sz(struct packed_git *p,
|
||||
struct pack_window **w_curs,
|
||||
off_t curpos)
|
||||
{
|
||||
const unsigned char *data;
|
||||
unsigned char delta_head[20], *in;
|
||||
@@ -1206,16 +1213,24 @@ unsigned long get_size_from_delta(struct packed_git *p,
|
||||
data = delta_head;
|
||||
|
||||
/* ignore base size */
|
||||
get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
|
||||
get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
|
||||
|
||||
/* Read the result size */
|
||||
return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
|
||||
return get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
|
||||
}
|
||||
|
||||
unsigned long get_size_from_delta(struct packed_git *p,
|
||||
struct pack_window **w_curs,
|
||||
off_t curpos)
|
||||
{
|
||||
size_t size = get_size_from_delta_sz(p, w_curs, curpos);
|
||||
return cast_size_t_to_ulong(size);
|
||||
}
|
||||
|
||||
int unpack_object_header(struct packed_git *p,
|
||||
struct pack_window **w_curs,
|
||||
off_t *curpos,
|
||||
unsigned long *sizep)
|
||||
size_t *sizep)
|
||||
{
|
||||
unsigned char *base;
|
||||
unsigned long left;
|
||||
@@ -1367,7 +1382,7 @@ static enum object_type packed_to_object_type(struct repository *r,
|
||||
|
||||
while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
|
||||
off_t base_offset;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
/* Push the object we're going to leave behind */
|
||||
if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
|
||||
poi_stack_alloc = alloc_nr(poi_stack_nr);
|
||||
@@ -1586,7 +1601,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
|
||||
uint32_t *maybe_index_pos, struct object_info *oi)
|
||||
{
|
||||
struct pack_window *w_curs = NULL;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
off_t curpos = obj_offset;
|
||||
enum object_type type = OBJ_NONE;
|
||||
uint32_t pack_pos;
|
||||
@@ -1614,14 +1629,18 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
*oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
|
||||
if (*oi->sizep == 0) {
|
||||
/*
|
||||
* Use size_t variant to avoid die() on >4GB deltas.
|
||||
* oi->sizep is unsigned long, so truncation may occur,
|
||||
* but streaming code uses its own size_t tracking.
|
||||
*/
|
||||
size = get_size_from_delta_sz(p, &w_curs, tmp_pos);
|
||||
if (size == 0) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
*oi->sizep = size;
|
||||
}
|
||||
*oi->sizep = (unsigned long)size;
|
||||
}
|
||||
|
||||
if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
|
||||
@@ -1778,7 +1797,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
|
||||
struct pack_window *w_curs = NULL;
|
||||
off_t curpos = obj_offset;
|
||||
void *data = NULL;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
enum object_type type;
|
||||
struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
|
||||
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
|
||||
@@ -1943,8 +1962,10 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
|
||||
(uintmax_t)curpos, p->pack_name);
|
||||
data = NULL;
|
||||
} else {
|
||||
unsigned long sz;
|
||||
data = patch_delta(base, base_size, delta_data,
|
||||
delta_size, &size);
|
||||
delta_size, &sz);
|
||||
size = sz;
|
||||
|
||||
/*
|
||||
* We could not apply the delta; warn the user, but
|
||||
@@ -2929,7 +2950,7 @@ int packfile_read_object_stream(struct odb_read_stream **out,
|
||||
struct odb_packed_read_stream *stream;
|
||||
struct pack_window *window = NULL;
|
||||
enum object_type in_pack_type;
|
||||
unsigned long size;
|
||||
size_t size;
|
||||
|
||||
in_pack_type = unpack_object_header(pack, &window, &offset, &size);
|
||||
unuse_pack(&window);
|
||||
|
||||
+2
-2
@@ -456,9 +456,9 @@ off_t find_pack_entry_one(const struct object_id *oid, struct packed_git *);
|
||||
|
||||
int is_pack_valid(struct packed_git *);
|
||||
void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
|
||||
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
|
||||
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
|
||||
unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
|
||||
int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
|
||||
int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
|
||||
off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
|
||||
off_t *curpos, enum object_type type,
|
||||
off_t delta_obj_offset);
|
||||
|
||||
@@ -69,6 +69,7 @@ test_tool_sources = [
|
||||
'test-submodule-nested-repo-config.c',
|
||||
'test-submodule.c',
|
||||
'test-subprocess.c',
|
||||
'test-synthesize.c',
|
||||
'test-tool.c',
|
||||
'test-trace2.c',
|
||||
'test-truncate.c',
|
||||
|
||||
@@ -0,0 +1,541 @@
|
||||
#define USE_THE_REPOSITORY_VARIABLE
|
||||
|
||||
#include "test-tool.h"
|
||||
#include "git-compat-util.h"
|
||||
#include "git-zlib.h"
|
||||
#include "hash.h"
|
||||
#include "hex.h"
|
||||
#include "object-file.h"
|
||||
#include "object.h"
|
||||
#include "pack.h"
|
||||
#include "parse-options.h"
|
||||
#include "parse.h"
|
||||
#include "repository.h"
|
||||
#include "setup.h"
|
||||
#include "strbuf.h"
|
||||
#include "write-or-die.h"
|
||||
|
||||
#define BLOCK_SIZE 0xffff
|
||||
static const unsigned char zeros[BLOCK_SIZE];
|
||||
|
||||
/*
|
||||
* Write data as an uncompressed zlib stream.
|
||||
* For data larger than 64KB, writes multiple uncompressed blocks.
|
||||
* If data is NULL, writes zeros.
|
||||
* Updates the pack checksum context.
|
||||
*/
|
||||
static void write_uncompressed_zlib(FILE *f, struct git_hash_ctx *pack_ctx,
|
||||
const void *data, size_t len,
|
||||
const struct git_hash_algo *algo)
|
||||
{
|
||||
unsigned char zlib_header[2] = { 0x78, 0x01 }; /* CMF, FLG */
|
||||
unsigned char block_header[5];
|
||||
const unsigned char *p = data;
|
||||
size_t remaining = len;
|
||||
uint32_t adler = 1L; /* adler32 initial value */
|
||||
unsigned char adler_buf[4];
|
||||
|
||||
/* Write zlib header */
|
||||
fwrite_or_die(f, zlib_header, sizeof(zlib_header));
|
||||
algo->update_fn(pack_ctx, zlib_header, 2);
|
||||
|
||||
/* Write uncompressed blocks (max 64KB each) */
|
||||
do {
|
||||
size_t block_len = remaining > BLOCK_SIZE ? BLOCK_SIZE : remaining;
|
||||
int is_final = (block_len == remaining);
|
||||
const unsigned char *block_data = data ? p : zeros;
|
||||
|
||||
block_header[0] = is_final ? 0x01 : 0x00;
|
||||
block_header[1] = block_len & 0xff;
|
||||
block_header[2] = (block_len >> 8) & 0xff;
|
||||
block_header[3] = block_header[1] ^ 0xff;
|
||||
block_header[4] = block_header[2] ^ 0xff;
|
||||
|
||||
fwrite_or_die(f, block_header, sizeof(block_header));
|
||||
algo->update_fn(pack_ctx, block_header, 5);
|
||||
|
||||
if (block_len) {
|
||||
fwrite_or_die(f, block_data, block_len);
|
||||
algo->update_fn(pack_ctx, block_data, block_len);
|
||||
adler = adler32(adler, block_data, block_len);
|
||||
}
|
||||
|
||||
if (data)
|
||||
p += block_len;
|
||||
remaining -= block_len;
|
||||
} while (remaining > 0);
|
||||
|
||||
/* Write adler32 checksum */
|
||||
put_be32(adler_buf, adler);
|
||||
fwrite_or_die(f, adler_buf, sizeof(adler_buf));
|
||||
algo->update_fn(pack_ctx, adler_buf, 4);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write an uncompressed object to the pack file.
|
||||
* If `data == NULL`, it is treated like a buffer to NUL bytes.
|
||||
* Updates the pack checksum context.
|
||||
*/
|
||||
static void write_pack_object(FILE *f, struct git_hash_ctx *pack_ctx,
|
||||
enum object_type type,
|
||||
const void *data, size_t len,
|
||||
struct object_id *oid,
|
||||
const struct git_hash_algo *algo)
|
||||
{
|
||||
unsigned char pack_header[MAX_PACK_OBJECT_HEADER];
|
||||
char object_header[32];
|
||||
int pack_header_len, object_header_len;
|
||||
struct git_hash_ctx ctx;
|
||||
|
||||
/* Write pack object header */
|
||||
pack_header_len = encode_in_pack_object_header(pack_header,
|
||||
sizeof(pack_header),
|
||||
type, len);
|
||||
fwrite_or_die(f, pack_header, pack_header_len);
|
||||
algo->update_fn(pack_ctx, pack_header, pack_header_len);
|
||||
|
||||
/* Write the data as uncompressed zlib */
|
||||
write_uncompressed_zlib(f, pack_ctx, data, len, algo);
|
||||
|
||||
algo->init_fn(&ctx);
|
||||
object_header_len = format_object_header(object_header,
|
||||
sizeof(object_header),
|
||||
type, len);
|
||||
algo->update_fn(&ctx, object_header, object_header_len);
|
||||
if (data)
|
||||
algo->update_fn(&ctx, data, len);
|
||||
else {
|
||||
for (size_t i = len / BLOCK_SIZE; i; i--)
|
||||
algo->update_fn(&ctx, zeros, BLOCK_SIZE);
|
||||
algo->update_fn(&ctx, zeros, len % BLOCK_SIZE);
|
||||
}
|
||||
algo->final_oid_fn(oid, &ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fast path: precomputed pack data for a 4 GiB + 1 all-NUL blob.
|
||||
*
|
||||
* The generated pack is almost entirely zeros with a small constant
|
||||
* prefix, periodic deflate block headers, and a constant suffix
|
||||
* containing the tree, two commits, and the pack checksum. Because
|
||||
* every byte is deterministic for a given blob size and hash algorithm,
|
||||
* we can write the pack without computing any hashes at all, reducing
|
||||
* runtime from minutes of hash computation to seconds of pure I/O.
|
||||
*
|
||||
* The blob is stored as an uncompressed deflate stream: a two-byte
|
||||
* zlib header, then 65538 blocks of up to 0xffff bytes each, followed
|
||||
* by an adler32 checksum. The pack header and deflate framing are
|
||||
* shared across hash algorithms; only the suffix (which contains OIDs
|
||||
* and the pack checksum) differs.
|
||||
*
|
||||
* Constants were generated by running the generic path and extracting
|
||||
* the non-zero bytes from the resulting pack file.
|
||||
*/
|
||||
|
||||
#define FAST_PACK_4G1_BLOB_SIZE ((size_t)4 * 1024 * 1024 * 1024 + 1)
|
||||
#define FAST_PACK_4G1_N_FULL_BLOCKS 65537
|
||||
|
||||
/*
|
||||
* Per-hash-algorithm constants for the fast path. The prefix and
|
||||
* deflate block structure are identical across algorithms; only the
|
||||
* suffix (tree, commits, pack checksum) and the commit OID differ.
|
||||
*/
|
||||
struct fast_pack {
|
||||
uint32_t format_id;
|
||||
const unsigned char *suffix;
|
||||
size_t suffix_len;
|
||||
const char *commit_oid;
|
||||
};
|
||||
|
||||
/* Pack header + pack object header + zlib header + first block header */
|
||||
static const unsigned char fast_pack_prefix[] = {
|
||||
/* PACK header: signature, version 2, 5 objects */
|
||||
0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02,
|
||||
0x00, 0x00, 0x00, 0x05,
|
||||
/* pack object header: blob, size = 4294967297 */
|
||||
0xb1, 0x80, 0x80, 0x80, 0x80, 0x01,
|
||||
/* zlib header: CMF=0x78, FLG=0x01 */
|
||||
0x78, 0x01,
|
||||
/* first non-final block header: BFINAL=0, LEN=0xffff, NLEN=0x0000 */
|
||||
0x00, 0xff, 0xff, 0x00, 0x00
|
||||
};
|
||||
|
||||
/* Every non-final deflate block header is identical */
|
||||
static const unsigned char fast_pack_block_header[] = {
|
||||
0x00, 0xff, 0xff, 0x00, 0x00
|
||||
};
|
||||
|
||||
/* Final block (2 data bytes) + adler32 of 4294967297 NUL bytes */
|
||||
static const unsigned char fast_pack_final_block[] = {
|
||||
/* BFINAL=1, LEN=2, NLEN=0xfffd */
|
||||
0x01, 0x02, 0x00, 0xfd, 0xff,
|
||||
/* 2 NUL data bytes */
|
||||
0x00, 0x00,
|
||||
/* adler32 */
|
||||
0x00, 0xe2, 0x00, 0x01
|
||||
};
|
||||
|
||||
/*
|
||||
* SHA-1 suffix: tree, commit, empty tree, final commit, pack checksum.
|
||||
*/
|
||||
static const unsigned char fast_pack_sha1_suffix[] = {
|
||||
0xa0, 0x02, 0x78, 0x01, 0x01, 0x20, 0x00, 0xdf,
|
||||
0xff, 0x31, 0x30, 0x30, 0x36, 0x34, 0x34, 0x20,
|
||||
0x66, 0x69, 0x6c, 0x65, 0x00, 0x3e, 0xb7, 0xfe,
|
||||
0xb1, 0x41, 0x3c, 0x75, 0x7f, 0x0d, 0x81, 0x81,
|
||||
0xde, 0xb2, 0x8d, 0x1d, 0xab, 0x03, 0xd6, 0x48,
|
||||
0x46, 0xb4, 0xb4, 0x0c, 0x60, 0x95, 0x0b, 0x78,
|
||||
0x01, 0x01, 0xb5, 0x00, 0x4a, 0xff, 0x74, 0x72,
|
||||
0x65, 0x65, 0x20, 0x63, 0x36, 0x38, 0x33, 0x66,
|
||||
0x63, 0x63, 0x37, 0x64, 0x31, 0x64, 0x38, 0x33,
|
||||
0x65, 0x66, 0x32, 0x66, 0x65, 0x31, 0x61, 0x66,
|
||||
0x35, 0x35, 0x32, 0x31, 0x35, 0x64, 0x30, 0x31,
|
||||
0x36, 0x38, 0x64, 0x62, 0x35, 0x32, 0x61, 0x33,
|
||||
0x61, 0x33, 0x62, 0x0a, 0x61, 0x75, 0x74, 0x68,
|
||||
0x6f, 0x72, 0x20, 0x41, 0x20, 0x55, 0x20, 0x54,
|
||||
0x68, 0x6f, 0x72, 0x20, 0x3c, 0x61, 0x75, 0x74,
|
||||
0x68, 0x6f, 0x72, 0x40, 0x65, 0x78, 0x61, 0x6d,
|
||||
0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x3e,
|
||||
0x20, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||||
0x38, 0x39, 0x30, 0x20, 0x2b, 0x30, 0x30, 0x30,
|
||||
0x30, 0x0a, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74,
|
||||
0x74, 0x65, 0x72, 0x20, 0x43, 0x20, 0x4f, 0x20,
|
||||
0x4d, 0x69, 0x74, 0x74, 0x65, 0x72, 0x20, 0x3c,
|
||||
0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x74, 0x65,
|
||||
0x72, 0x40, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x3e, 0x20, 0x31,
|
||||
0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
||||
0x30, 0x20, 0x2b, 0x30, 0x30, 0x30, 0x30, 0x0a,
|
||||
0x0a, 0x4c, 0x61, 0x72, 0x67, 0x65, 0x20, 0x62,
|
||||
0x6c, 0x6f, 0x62, 0x20, 0x63, 0x6f, 0x6d, 0x6d,
|
||||
0x69, 0x74, 0x0a, 0xc6, 0x55, 0x37, 0x6b, 0x20,
|
||||
0x78, 0x01, 0x01, 0x00, 0x00, 0xff, 0xff, 0x00,
|
||||
0x00, 0x00, 0x01, 0x95, 0x0e, 0x78, 0x01, 0x01,
|
||||
0xe5, 0x00, 0x1a, 0xff, 0x74, 0x72, 0x65, 0x65,
|
||||
0x20, 0x34, 0x62, 0x38, 0x32, 0x35, 0x64, 0x63,
|
||||
0x36, 0x34, 0x32, 0x63, 0x62, 0x36, 0x65, 0x62,
|
||||
0x39, 0x61, 0x30, 0x36, 0x30, 0x65, 0x35, 0x34,
|
||||
0x62, 0x66, 0x38, 0x64, 0x36, 0x39, 0x32, 0x38,
|
||||
0x38, 0x66, 0x62, 0x65, 0x65, 0x34, 0x39, 0x30,
|
||||
0x34, 0x0a, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74,
|
||||
0x20, 0x63, 0x35, 0x62, 0x32, 0x31, 0x63, 0x36,
|
||||
0x31, 0x31, 0x61, 0x61, 0x35, 0x39, 0x34, 0x65,
|
||||
0x63, 0x39, 0x66, 0x64, 0x37, 0x65, 0x39, 0x32,
|
||||
0x63, 0x66, 0x39, 0x36, 0x34, 0x38, 0x39, 0x31,
|
||||
0x34, 0x63, 0x61, 0x34, 0x63, 0x32, 0x34, 0x31,
|
||||
0x32, 0x0a, 0x61, 0x75, 0x74, 0x68, 0x6f, 0x72,
|
||||
0x20, 0x41, 0x20, 0x55, 0x20, 0x54, 0x68, 0x6f,
|
||||
0x72, 0x20, 0x3c, 0x61, 0x75, 0x74, 0x68, 0x6f,
|
||||
0x72, 0x40, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x3e, 0x20, 0x31,
|
||||
0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
||||
0x30, 0x20, 0x2b, 0x30, 0x30, 0x30, 0x30, 0x0a,
|
||||
0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x74, 0x65,
|
||||
0x72, 0x20, 0x43, 0x20, 0x4f, 0x20, 0x4d, 0x69,
|
||||
0x74, 0x74, 0x65, 0x72, 0x20, 0x3c, 0x63, 0x6f,
|
||||
0x6d, 0x6d, 0x69, 0x74, 0x74, 0x65, 0x72, 0x40,
|
||||
0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e,
|
||||
0x63, 0x6f, 0x6d, 0x3e, 0x20, 0x31, 0x32, 0x33,
|
||||
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, 0x20,
|
||||
0x2b, 0x30, 0x30, 0x30, 0x30, 0x0a, 0x0a, 0x45,
|
||||
0x6d, 0x70, 0x74, 0x79, 0x20, 0x74, 0x72, 0x65,
|
||||
0x65, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74,
|
||||
0x0a, 0xaa, 0xb8, 0x45, 0x01, 0x8e, 0xfc, 0xf0,
|
||||
0x2f, 0x9c, 0xc5, 0xcc, 0x4f, 0x6a, 0x1a, 0xc9,
|
||||
0x2b, 0x23, 0xa9, 0xff, 0x91, 0x06, 0xc2, 0x70,
|
||||
0xe3
|
||||
};
|
||||
|
||||
/*
|
||||
* SHA-256 suffix: same structure, but with 32-byte OIDs and SHA-256
|
||||
* pack checksum (609 bytes vs 513 for SHA-1).
|
||||
*/
|
||||
static const unsigned char fast_pack_sha256_suffix[] = {
|
||||
0xac, 0x02, 0x78, 0x01, 0x01, 0x2c, 0x00, 0xd3,
|
||||
0xff, 0x31, 0x30, 0x30, 0x36, 0x34, 0x34, 0x20,
|
||||
0x66, 0x69, 0x6c, 0x65, 0x00, 0x42, 0x53, 0xc1,
|
||||
0x8a, 0x9f, 0x5e, 0xc3, 0xbb, 0x47, 0xb0, 0x83,
|
||||
0x8a, 0x19, 0xdb, 0x31, 0xbb, 0x7b, 0x0f, 0x3b,
|
||||
0x80, 0xa4, 0xbc, 0x2f, 0xaf, 0x72, 0x6b, 0xdb,
|
||||
0x62, 0xaa, 0xba, 0xdd, 0xde, 0x77, 0xc6, 0x13,
|
||||
0xeb, 0x9d, 0x0c, 0x78, 0x01, 0x01, 0xcd, 0x00,
|
||||
0x32, 0xff, 0x74, 0x72, 0x65, 0x65, 0x20, 0x62,
|
||||
0x36, 0x30, 0x39, 0x37, 0x37, 0x64, 0x37, 0x63,
|
||||
0x34, 0x63, 0x32, 0x64, 0x31, 0x65, 0x63, 0x63,
|
||||
0x33, 0x66, 0x62, 0x61, 0x31, 0x64, 0x39, 0x38,
|
||||
0x65, 0x65, 0x31, 0x32, 0x30, 0x61, 0x64, 0x63,
|
||||
0x32, 0x34, 0x38, 0x33, 0x34, 0x39, 0x35, 0x30,
|
||||
0x62, 0x65, 0x34, 0x31, 0x32, 0x64, 0x39, 0x34,
|
||||
0x63, 0x38, 0x30, 0x39, 0x34, 0x38, 0x30, 0x66,
|
||||
0x35, 0x38, 0x62, 0x61, 0x39, 0x64, 0x61, 0x0a,
|
||||
0x61, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x20, 0x41,
|
||||
0x20, 0x55, 0x20, 0x54, 0x68, 0x6f, 0x72, 0x20,
|
||||
0x3c, 0x61, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x40,
|
||||
0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e,
|
||||
0x63, 0x6f, 0x6d, 0x3e, 0x20, 0x31, 0x32, 0x33,
|
||||
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, 0x20,
|
||||
0x2b, 0x30, 0x30, 0x30, 0x30, 0x0a, 0x63, 0x6f,
|
||||
0x6d, 0x6d, 0x69, 0x74, 0x74, 0x65, 0x72, 0x20,
|
||||
0x43, 0x20, 0x4f, 0x20, 0x4d, 0x69, 0x74, 0x74,
|
||||
0x65, 0x72, 0x20, 0x3c, 0x63, 0x6f, 0x6d, 0x6d,
|
||||
0x69, 0x74, 0x74, 0x65, 0x72, 0x40, 0x65, 0x78,
|
||||
0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f,
|
||||
0x6d, 0x3e, 0x20, 0x31, 0x32, 0x33, 0x34, 0x35,
|
||||
0x36, 0x37, 0x38, 0x39, 0x30, 0x20, 0x2b, 0x30,
|
||||
0x30, 0x30, 0x30, 0x0a, 0x0a, 0x4c, 0x61, 0x72,
|
||||
0x67, 0x65, 0x20, 0x62, 0x6c, 0x6f, 0x62, 0x20,
|
||||
0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x0a, 0xb7,
|
||||
0x80, 0x3d, 0xd7, 0x20, 0x78, 0x01, 0x01, 0x00,
|
||||
0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x95,
|
||||
0x11, 0x78, 0x01, 0x01, 0x15, 0x01, 0xea, 0xfe,
|
||||
0x74, 0x72, 0x65, 0x65, 0x20, 0x36, 0x65, 0x66,
|
||||
0x31, 0x39, 0x62, 0x34, 0x31, 0x32, 0x32, 0x35,
|
||||
0x63, 0x35, 0x33, 0x36, 0x39, 0x66, 0x31, 0x63,
|
||||
0x31, 0x30, 0x34, 0x64, 0x34, 0x35, 0x64, 0x38,
|
||||
0x64, 0x38, 0x35, 0x65, 0x66, 0x61, 0x39, 0x62,
|
||||
0x30, 0x35, 0x37, 0x62, 0x35, 0x33, 0x62, 0x31,
|
||||
0x34, 0x62, 0x34, 0x62, 0x39, 0x62, 0x39, 0x33,
|
||||
0x39, 0x64, 0x64, 0x37, 0x34, 0x64, 0x65, 0x63,
|
||||
0x63, 0x35, 0x33, 0x32, 0x31, 0x0a, 0x70, 0x61,
|
||||
0x72, 0x65, 0x6e, 0x74, 0x20, 0x37, 0x35, 0x62,
|
||||
0x66, 0x30, 0x63, 0x34, 0x37, 0x61, 0x65, 0x34,
|
||||
0x62, 0x62, 0x33, 0x30, 0x38, 0x65, 0x37, 0x63,
|
||||
0x63, 0x32, 0x34, 0x38, 0x32, 0x65, 0x32, 0x32,
|
||||
0x65, 0x66, 0x61, 0x65, 0x33, 0x37, 0x38, 0x37,
|
||||
0x61, 0x39, 0x36, 0x38, 0x34, 0x38, 0x62, 0x64,
|
||||
0x31, 0x37, 0x34, 0x39, 0x35, 0x36, 0x37, 0x31,
|
||||
0x34, 0x37, 0x31, 0x35, 0x32, 0x34, 0x36, 0x64,
|
||||
0x64, 0x62, 0x64, 0x35, 0x34, 0x0a, 0x61, 0x75,
|
||||
0x74, 0x68, 0x6f, 0x72, 0x20, 0x41, 0x20, 0x55,
|
||||
0x20, 0x54, 0x68, 0x6f, 0x72, 0x20, 0x3c, 0x61,
|
||||
0x75, 0x74, 0x68, 0x6f, 0x72, 0x40, 0x65, 0x78,
|
||||
0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f,
|
||||
0x6d, 0x3e, 0x20, 0x31, 0x32, 0x33, 0x34, 0x35,
|
||||
0x36, 0x37, 0x38, 0x39, 0x30, 0x20, 0x2b, 0x30,
|
||||
0x30, 0x30, 0x30, 0x0a, 0x63, 0x6f, 0x6d, 0x6d,
|
||||
0x69, 0x74, 0x74, 0x65, 0x72, 0x20, 0x43, 0x20,
|
||||
0x4f, 0x20, 0x4d, 0x69, 0x74, 0x74, 0x65, 0x72,
|
||||
0x20, 0x3c, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74,
|
||||
0x74, 0x65, 0x72, 0x40, 0x65, 0x78, 0x61, 0x6d,
|
||||
0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x3e,
|
||||
0x20, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||||
0x38, 0x39, 0x30, 0x20, 0x2b, 0x30, 0x30, 0x30,
|
||||
0x30, 0x0a, 0x0a, 0x45, 0x6d, 0x70, 0x74, 0x79,
|
||||
0x20, 0x74, 0x72, 0x65, 0x65, 0x20, 0x63, 0x6f,
|
||||
0x6d, 0x6d, 0x69, 0x74, 0x0a, 0x6d, 0x6d, 0x51,
|
||||
0x9a, 0xc9, 0x11, 0x76, 0x61, 0xa3, 0x89, 0x49,
|
||||
0xb7, 0xa1, 0x58, 0xc6, 0x1d, 0x8c, 0x33, 0x75,
|
||||
0x8d, 0x7e, 0x4d, 0x8e, 0x58, 0x91, 0xf8, 0x5c,
|
||||
0x57, 0xd9, 0x89, 0x9e, 0xb8, 0xd2, 0x9a, 0xd8,
|
||||
0xc9
|
||||
};
|
||||
|
||||
static const struct fast_pack fast_packs[] = {
|
||||
{
|
||||
.format_id = GIT_SHA1_FORMAT_ID,
|
||||
.suffix = fast_pack_sha1_suffix,
|
||||
.suffix_len = sizeof(fast_pack_sha1_suffix),
|
||||
.commit_oid = "aac43daf40d0377af31aa9c798a4ae8a31b55c1d",
|
||||
},
|
||||
{
|
||||
.format_id = GIT_SHA256_FORMAT_ID,
|
||||
.suffix = fast_pack_sha256_suffix,
|
||||
.suffix_len = sizeof(fast_pack_sha256_suffix),
|
||||
.commit_oid = "63c46ca51267b1d45be69a044bb84b4bf0559f09"
|
||||
"d727f861d2ae94ddebdddbc9",
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Try the fast path for known blob sizes. Returns 1 if the pack was
|
||||
* written from precomputed constants, 0 if the caller should fall
|
||||
* through to the generic path.
|
||||
*/
|
||||
static int generate_fast_pack(const char *path, size_t blob_size,
|
||||
const struct git_hash_algo *algo)
|
||||
{
|
||||
const struct fast_pack *fp = NULL;
|
||||
FILE *f;
|
||||
size_t i;
|
||||
|
||||
if (blob_size != FAST_PACK_4G1_BLOB_SIZE)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(fast_packs); i++) {
|
||||
if (fast_packs[i].format_id == algo->format_id) {
|
||||
fp = &fast_packs[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!fp)
|
||||
return 0;
|
||||
|
||||
f = xfopen(path, "wb");
|
||||
|
||||
fwrite_or_die(f, fast_pack_prefix, sizeof(fast_pack_prefix));
|
||||
|
||||
/* First full block: 0xffff zero bytes (header already in prefix) */
|
||||
fwrite_or_die(f, zeros, BLOCK_SIZE);
|
||||
|
||||
/* Remaining non-final full blocks */
|
||||
for (i = 1; i < FAST_PACK_4G1_N_FULL_BLOCKS; i++) {
|
||||
fwrite_or_die(f, fast_pack_block_header,
|
||||
sizeof(fast_pack_block_header));
|
||||
fwrite_or_die(f, zeros, BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/* Final block (2 data bytes) + adler32 */
|
||||
fwrite_or_die(f, fast_pack_final_block,
|
||||
sizeof(fast_pack_final_block));
|
||||
|
||||
/* Tree, commits, and pack checksum */
|
||||
fwrite_or_die(f, fp->suffix, fp->suffix_len);
|
||||
|
||||
if (fclose(f))
|
||||
die_errno(_("could not close '%s'"), path);
|
||||
|
||||
printf("%s\n", fp->commit_oid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a pack file with a single large (>4GB) reachable object.
|
||||
*
|
||||
* Creates:
|
||||
* 1. A large blob (all NUL bytes)
|
||||
* 2. A tree containing that blob as "file"
|
||||
* 3. A commit using that tree
|
||||
* 4. The empty tree
|
||||
* 5. A child commit using the empty tree
|
||||
*
|
||||
* This is useful for testing that Git can handle objects larger than 4GB.
|
||||
*/
|
||||
static int generate_pack_with_large_object(const char *path, size_t blob_size,
|
||||
const struct git_hash_algo *algo)
|
||||
{
|
||||
FILE *f;
|
||||
struct git_hash_ctx pack_ctx;
|
||||
unsigned char pack_hash[GIT_MAX_RAWSZ];
|
||||
struct object_id blob_oid, tree_oid, commit_oid, empty_tree_oid, final_commit_oid;
|
||||
struct strbuf buf = STRBUF_INIT;
|
||||
const uint32_t object_count = 5;
|
||||
struct pack_header pack_header = {
|
||||
.hdr_signature = htonl(PACK_SIGNATURE),
|
||||
.hdr_version = htonl(PACK_VERSION),
|
||||
.hdr_entries = htonl(object_count),
|
||||
};
|
||||
|
||||
if (generate_fast_pack(path, blob_size, algo))
|
||||
return 0;
|
||||
|
||||
f = xfopen(path, "wb");
|
||||
|
||||
algo->init_fn(&pack_ctx);
|
||||
|
||||
/* Write pack header */
|
||||
fwrite_or_die(f, &pack_header, sizeof(pack_header));
|
||||
algo->update_fn(&pack_ctx, &pack_header, sizeof(pack_header));
|
||||
|
||||
/* 1. Write the large blob */
|
||||
write_pack_object(f, &pack_ctx, OBJ_BLOB, NULL, blob_size, &blob_oid, algo);
|
||||
|
||||
/* 2. Write tree containing the blob as "file" */
|
||||
strbuf_addf(&buf, "100644 file%c", '\0');
|
||||
strbuf_add(&buf, blob_oid.hash, algo->rawsz);
|
||||
write_pack_object(f, &pack_ctx, OBJ_TREE, buf.buf, buf.len, &tree_oid, algo);
|
||||
|
||||
/* 3. Write commit using that tree */
|
||||
strbuf_reset(&buf);
|
||||
strbuf_addf(&buf,
|
||||
"tree %s\n"
|
||||
"author A U Thor <author@example.com> 1234567890 +0000\n"
|
||||
"committer C O Mitter <committer@example.com> 1234567890 +0000\n"
|
||||
"\n"
|
||||
"Large blob commit\n",
|
||||
oid_to_hex(&tree_oid));
|
||||
write_pack_object(f, &pack_ctx, OBJ_COMMIT, buf.buf, buf.len, &commit_oid, algo);
|
||||
|
||||
/* 4. Write the empty tree */
|
||||
write_pack_object(f, &pack_ctx, OBJ_TREE, "", 0, &empty_tree_oid, algo);
|
||||
|
||||
/* 5. Write final commit using empty tree, with previous commit as parent */
|
||||
strbuf_reset(&buf);
|
||||
strbuf_addf(&buf,
|
||||
"tree %s\n"
|
||||
"parent %s\n"
|
||||
"author A U Thor <author@example.com> 1234567890 +0000\n"
|
||||
"committer C O Mitter <committer@example.com> 1234567890 +0000\n"
|
||||
"\n"
|
||||
"Empty tree commit\n",
|
||||
oid_to_hex(&empty_tree_oid),
|
||||
oid_to_hex(&commit_oid));
|
||||
write_pack_object(f, &pack_ctx, OBJ_COMMIT, buf.buf, buf.len, &final_commit_oid, algo);
|
||||
|
||||
/* Write pack trailer (checksum) */
|
||||
algo->final_fn(pack_hash, &pack_ctx);
|
||||
fwrite_or_die(f, pack_hash, algo->rawsz);
|
||||
if (fclose(f))
|
||||
die_errno(_("could not close '%s'"), path);
|
||||
|
||||
strbuf_release(&buf);
|
||||
|
||||
/* Print the final commit OID so caller can set up refs */
|
||||
printf("%s\n", oid_to_hex(&final_commit_oid));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmd__synthesize__pack(int argc, const char **argv,
|
||||
const char *prefix UNUSED,
|
||||
struct repository *repo)
|
||||
{
|
||||
int non_git;
|
||||
int reachable_large = 0;
|
||||
const struct git_hash_algo *algo;
|
||||
size_t blob_size;
|
||||
uintmax_t blob_size_u;
|
||||
const char *path;
|
||||
const char * const usage[] = {
|
||||
"test-tool synthesize pack "
|
||||
"--reachable-large <blob-size> <filename>",
|
||||
NULL
|
||||
};
|
||||
struct option options[] = {
|
||||
OPT_BOOL(0, "reachable-large", &reachable_large,
|
||||
N_("write a pack with a single reachable large blob")),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
setup_git_directory_gently(&non_git);
|
||||
repo = the_repository;
|
||||
algo = unsafe_hash_algo(repo->hash_algo);
|
||||
|
||||
argc = parse_options(argc, argv, NULL, options, usage,
|
||||
PARSE_OPT_KEEP_ARGV0);
|
||||
if (argc != 3 || !reachable_large)
|
||||
usage_with_options(usage, options);
|
||||
|
||||
if (!git_parse_unsigned(argv[1], &blob_size_u,
|
||||
maximum_unsigned_value_of_type(size_t)))
|
||||
die(_("'%s' is not a valid blob size"), argv[1]);
|
||||
blob_size = blob_size_u;
|
||||
path = argv[2];
|
||||
|
||||
return !!generate_pack_with_large_object(path, blob_size, algo);
|
||||
}
|
||||
|
||||
int cmd__synthesize(int argc, const char **argv)
|
||||
{
|
||||
const char *prefix = NULL;
|
||||
char const * const synthesize_usage[] = {
|
||||
"test-tool synthesize pack <options>",
|
||||
NULL,
|
||||
};
|
||||
parse_opt_subcommand_fn *fn = NULL;
|
||||
struct option options[] = {
|
||||
OPT_SUBCOMMAND("pack", &fn, cmd__synthesize__pack),
|
||||
OPT_END()
|
||||
};
|
||||
argc = parse_options(argc, argv, prefix, options, synthesize_usage, 0);
|
||||
return !!fn(argc, argv, prefix, NULL);
|
||||
}
|
||||
@@ -82,6 +82,7 @@ static struct test_cmd cmds[] = {
|
||||
{ "submodule-config", cmd__submodule_config },
|
||||
{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
|
||||
{ "subprocess", cmd__subprocess },
|
||||
{ "synthesize", cmd__synthesize },
|
||||
{ "trace2", cmd__trace2 },
|
||||
{ "truncate", cmd__truncate },
|
||||
{ "userdiff", cmd__userdiff },
|
||||
|
||||
@@ -75,6 +75,7 @@ int cmd__submodule(int argc, const char **argv);
|
||||
int cmd__submodule_config(int argc, const char **argv);
|
||||
int cmd__submodule_nested_repo_config(int argc, const char **argv);
|
||||
int cmd__subprocess(int argc, const char **argv);
|
||||
int cmd__synthesize(int argc, const char **argv);
|
||||
int cmd__trace2(int argc, const char **argv);
|
||||
int cmd__truncate(int argc, const char **argv);
|
||||
int cmd__userdiff(int argc, const char **argv);
|
||||
|
||||
@@ -49,4 +49,41 @@ test_expect_success 'clone - with worktree, file:// protocol' '
|
||||
|
||||
'
|
||||
|
||||
test_expect_success SIZE_T_IS_64BIT,EXPENSIVE 'set up repo with >4GB object' '
|
||||
large_blob_size=$((4*1024*1024*1024+1)) &&
|
||||
git init --bare 4gb-repo &&
|
||||
head_oid=$(test-tool synthesize pack \
|
||||
--reachable-large "$large_blob_size" \
|
||||
4gb-repo/objects/pack/test.pack) &&
|
||||
git -C 4gb-repo index-pack objects/pack/test.pack &&
|
||||
git -C 4gb-repo update-ref refs/heads/main $head_oid &&
|
||||
git -C 4gb-repo symbolic-ref HEAD refs/heads/main
|
||||
'
|
||||
|
||||
test_expect_success SIZE_T_IS_64BIT,EXPENSIVE 'clone >4GB object via unpack-objects' '
|
||||
# The synthesized pack has five objects, so a large unpack limit keeps
|
||||
# fetch-pack on the unpack-objects path.
|
||||
git -c fetch.unpackLimit=100 clone --bare \
|
||||
"file://$(pwd)/4gb-repo" 4gb-clone-unpack &&
|
||||
|
||||
# Verify the large blob survived the clone by comparing its OID
|
||||
# between source and clone. We cannot use "cat-file -s" because
|
||||
# object_info.sizep is still unsigned long, which truncates >4GB
|
||||
# sizes on Windows. OID equality proves content integrity since
|
||||
# the clone already verified checksums via index-pack/unpack-objects.
|
||||
source_blob=$(git -C 4gb-repo rev-parse main^:file) &&
|
||||
clone_blob=$(git -C 4gb-clone-unpack rev-parse main^:file) &&
|
||||
test "$source_blob" = "$clone_blob"
|
||||
'
|
||||
|
||||
test_expect_success SIZE_T_IS_64BIT,EXPENSIVE 'clone with >4GB object via index-pack' '
|
||||
# Force fetch-pack to hand the pack to index-pack instead.
|
||||
git -c fetch.unpackLimit=1 clone --bare \
|
||||
"file://$(pwd)/4gb-repo" 4gb-clone-index &&
|
||||
|
||||
source_blob=$(git -C 4gb-repo rev-parse main^:file) &&
|
||||
clone_blob=$(git -C 4gb-clone-index rev-parse main^:file) &&
|
||||
test "$source_blob" = "$clone_blob"
|
||||
'
|
||||
|
||||
test_done
|
||||
|
||||
Reference in New Issue
Block a user