From 6bdda3a3b00fff9a1d64d1bb4732f0c446d7012c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:26 +0100 Subject: [PATCH 01/19] streaming: rename `git_istream` into `odb_read_stream` In the following patches we are about to make the `git_istream` more generic so that it becomes fully controlled by the specific object source that wants to create it. As part of these refactorings we'll fully move the structure into the object database subsystem. Prepare for this change by renaming the structure from `git_istream` to `odb_read_stream`. This mirrors the `odb_write_stream` structure that we already have. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 4 +-- object-file.c | 2 +- streaming.c | 62 +++++++++++++++++++++--------------------- streaming.h | 12 ++++---- 7 files changed, 43 insertions(+), 43 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 73b63ddc41..dc1eda09e0 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -129,7 +129,7 @@ static void write_trailer(void) */ static int stream_blocked(struct repository *r, const struct object_id *oid) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; char buf[BLOCKSIZE]; diff --git a/archive-zip.c b/archive-zip.c index bea5bdd43d..40a9c93ff9 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - struct git_istream *stream = NULL; + struct odb_read_stream *stream = NULL; unsigned long flags = 0; int is_binary = -1; const char *path_without_prefix = path + args->baselen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4..5f90f12f92 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid, struct compare_data { struct object_entry *entry; - struct git_istream *st; + struct odb_read_stream *st; unsigned char *buf; unsigned long buf_size; }; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 69e80b1443..c693d948e1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size) return stream.total_out; } -static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, +static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f, const struct object_id *oid) { git_zstream stream; @@ -513,7 +513,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent unsigned hdrlen; enum object_type type; void *buf; - struct git_istream *st = NULL; + struct odb_read_stream *st = NULL; const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { diff --git a/object-file.c b/object-file.c index 811c569ed3..b62b21a452 100644 --- a/object-file.c +++ b/object-file.c @@ -134,7 +134,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) struct object_id real_oid; unsigned long size; enum object_type obj_type; - struct git_istream *st; + struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; diff --git a/streaming.c b/streaming.c index 00ad649ae3..1fb4b7c1c0 100644 --- a/streaming.c +++ b/streaming.c @@ -14,17 +14,17 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct git_istream *, +typedef int (*open_istream_fn)(struct odb_read_stream *, struct repository *, const struct object_id *, enum object_type *); -typedef int (*close_istream_fn)(struct git_istream *); -typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); +typedef int (*close_istream_fn)(struct odb_read_stream *); +typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) struct filtered_istream { - struct git_istream *upstream; + struct odb_read_stream *upstream; struct stream_filter *filter; char ibuf[FILTER_BUFFER]; char obuf[FILTER_BUFFER]; @@ -33,7 +33,7 @@ struct filtered_istream { int input_finished; }; -struct git_istream { +struct odb_read_stream { open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -71,7 +71,7 @@ struct git_istream { * *****************************************************************/ -static void close_deflated_stream(struct git_istream *st) +static void close_deflated_stream(struct odb_read_stream *st) { if (st->z_state == z_used) git_inflate_end(&st->z); @@ -84,13 +84,13 @@ static void close_deflated_stream(struct git_istream *st) * *****************************************************************/ -static int close_istream_filtered(struct git_istream *st) +static int close_istream_filtered(struct odb_read_stream *st) { free_stream_filter(st->u.filtered.filter); return close_istream(st->u.filtered.upstream); } -static ssize_t read_istream_filtered(struct git_istream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, size_t sz) { struct filtered_istream *fs = &(st->u.filtered); @@ -150,10 +150,10 @@ static ssize_t read_istream_filtered(struct git_istream *st, char *buf, return filled; } -static struct git_istream *attach_stream_filter(struct git_istream *st, - struct stream_filter *filter) +static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, + struct stream_filter *filter) { - struct git_istream *ifs = xmalloc(sizeof(*ifs)); + struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); struct filtered_istream *fs = &(ifs->u.filtered); ifs->close = close_istream_filtered; @@ -173,7 +173,7 @@ static struct git_istream *attach_stream_filter(struct git_istream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -218,14 +218,14 @@ static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) return total_read; } -static int close_istream_loose(struct git_istream *st) +static int close_istream_loose(struct odb_read_stream *st) { close_deflated_stream(st); munmap(st->u.loose.mapped, st->u.loose.mapsize); return 0; } -static int open_istream_loose(struct git_istream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { @@ -277,7 +277,7 @@ error: * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -336,13 +336,13 @@ static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, return total_read; } -static int close_istream_pack_non_delta(struct git_istream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *st) { close_deflated_stream(st); return 0; } -static int open_istream_pack_non_delta(struct git_istream *st, +static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, const struct object_id *oid UNUSED, enum object_type *type UNUSED) @@ -380,13 +380,13 @@ static int open_istream_pack_non_delta(struct git_istream *st, * *****************************************************************/ -static int close_istream_incore(struct git_istream *st) +static int close_istream_incore(struct odb_read_stream *st) { free(st->u.incore.buf); return 0; } -static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) { size_t read_size = sz; size_t remainder = st->size - st->u.incore.read_ptr; @@ -400,7 +400,7 @@ static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) return read_size; } -static int open_istream_incore(struct git_istream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { struct object_info oi = OBJECT_INFO_INIT; @@ -420,7 +420,7 @@ static int open_istream_incore(struct git_istream *st, struct repository *r, * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct git_istream *st, +static int istream_source(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) @@ -458,25 +458,25 @@ static int istream_source(struct git_istream *st, * Users of streaming interface ****************************************************************/ -int close_istream(struct git_istream *st) +int close_istream(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) +ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct git_istream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *open_istream(struct repository *r, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { - struct git_istream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); int ret = istream_source(st, r, real, type); @@ -493,7 +493,7 @@ struct git_istream *open_istream(struct repository *r, } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ - struct git_istream *nst = attach_stream_filter(st, filter); + struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { close_istream(st); return NULL; @@ -508,7 +508,7 @@ struct git_istream *open_istream(struct repository *r, int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, int can_seek) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; ssize_t kept = 0; diff --git a/streaming.h b/streaming.h index bd27f59e57..f5ff5d7ac9 100644 --- a/streaming.h +++ b/streaming.h @@ -7,14 +7,14 @@ #include "object.h" /* opaque */ -struct git_istream; +struct odb_read_stream; struct stream_filter; -struct git_istream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct git_istream *); -ssize_t read_istream(struct git_istream *, void *, size_t); +struct odb_read_stream *open_istream(struct repository *, const struct object_id *, + enum object_type *, unsigned long *, + struct stream_filter *); +int close_istream(struct odb_read_stream *); +ssize_t read_istream(struct odb_read_stream *, void *, size_t); int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); From 70c8b5f5453b9f128a72fad4398acfb9e7d869c4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:27 +0100 Subject: [PATCH 02/19] streaming: drop the `open()` callback function When creating a read stream we first populate the structure with the open callback function and then subsequently call the function. This layout is somewhat weird though: - The structure needs to be allocated and partially populated with the open function before we can properly initialize it. - We only ever call the `open()` callback function right after having populated the `struct odb_read_stream::open` member, and it's never called thereafter again. So it is somewhat pointless to store the callback in the first place. Especially the first point creates a problem for us. In subsequent commits we'll want to fully move construction of the read source into the respective object sources. E.g., the loose object source will be the one that is responsible for creating the structure. But this creates a problem: if we first need to create the structure so that we can call the source-specific callback we cannot fully handle creation of the structure in the source itself. We could of course work around that and have the loose object source create the structure and populate its `open()` callback, only. But this doesn't really buy us anything due to the second bullet point above. Instead, drop the callback entirely and refactor `istream_source()` so that we open the streams immediately. This unblocks a subsequent step, where we'll also start to allocate the structure in the source-specific logic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/streaming.c b/streaming.c index 1fb4b7c1c0..1bb3f393b8 100644 --- a/streaming.c +++ b/streaming.c @@ -14,10 +14,6 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct odb_read_stream *, - struct repository *, - const struct object_id *, - enum object_type *); typedef int (*close_istream_fn)(struct odb_read_stream *); typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); @@ -34,7 +30,6 @@ struct filtered_istream { }; struct odb_read_stream { - open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -437,21 +432,25 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - st->open = open_istream_loose; + if (open_istream_loose(st, r, oid, type) < 0) + break; return 0; case OI_PACKED: - if (!oi.u.packed.is_delta && - repo_settings_get_big_file_threshold(the_repository) < size) { - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - st->open = open_istream_pack_non_delta; - return 0; - } - /* fallthru */ - default: - st->open = open_istream_incore; + if (oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + break; + + st->u.in_pack.pack = oi.u.packed.pack; + st->u.in_pack.pos = oi.u.packed.offset; + if (open_istream_pack_non_delta(st, r, oid, type) < 0) + break; + return 0; + default: + break; } + + return open_istream_incore(st, r, oid, type); } /**************************************************************** @@ -485,12 +484,6 @@ struct odb_read_stream *open_istream(struct repository *r, return NULL; } - if (st->open(st, r, real, type)) { - if (open_istream_incore(st, r, real, type)) { - free(st); - return NULL; - } - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); From 3f64deabdf0a2a9664acec61698affc449e07496 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:28 +0100 Subject: [PATCH 03/19] streaming: propagate final object type via the stream When opening the read stream for a specific object the caller is also expected to pass in a pointer to the object type. This type is passed down via multiple levels and will eventually be populated with the type of the looked-up object. The way we propagate down the pointer though is somewhat non-obvious. While `istream_source()` still expects the pointer and looks it up via `odb_read_object_info_extended()`, we also pass it down even further into the format-specific callbacks that perform another lookup. This is quite confusing overall. Refactor the code so that the responsibility to populate the object type rests solely with the format-specific callbacks. This will allow us to drop the call to `odb_read_object_info_extended()` in `istream_source()` entirely in a subsequent patch. Furthermore, instead of propagating the type via an in-pointer, we now propagate the type via a new field in the object stream. It already has a `size` field, so it's only natural to have a second field that contains the object type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/streaming.c b/streaming.c index 1bb3f393b8..665624ddc0 100644 --- a/streaming.c +++ b/streaming.c @@ -33,6 +33,7 @@ struct odb_read_stream { close_istream_fn close; read_istream_fn read; + enum object_type type; unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; @@ -159,6 +160,7 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, fs->o_end = fs->o_ptr = 0; fs->input_finished = 0; ifs->size = -1; /* unknown */ + ifs->type = st->type; return ifs; } @@ -221,14 +223,13 @@ static int close_istream_loose(struct odb_read_stream *st) } static int open_istream_loose(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_source *source; oi.sizep = &st->size; - oi.typep = type; + oi.typep = &st->type; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { @@ -249,7 +250,7 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; @@ -339,8 +340,7 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED, - enum object_type *type UNUSED) + const struct object_id *oid UNUSED) { struct pack_window *window; enum object_type in_pack_type; @@ -361,6 +361,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } + st->type = in_pack_type; st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; @@ -396,7 +397,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t } static int open_istream_incore(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -404,7 +405,7 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, st->close = close_istream_incore; st->read = read_istream_incore; - oi.typep = type; + oi.typep = &st->type; oi.sizep = &st->size; oi.contentp = (void **)&st->u.incore.buf; return odb_read_object_info_extended(r->objects, oid, &oi, @@ -417,14 +418,12 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, static int istream_source(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { unsigned long size; int status; struct object_info oi = OBJECT_INFO_INIT; - oi.typep = type; oi.sizep = &size; status = odb_read_object_info_extended(r->objects, oid, &oi, 0); if (status < 0) @@ -432,7 +431,7 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid, type) < 0) + if (open_istream_loose(st, r, oid) < 0) break; return 0; case OI_PACKED: @@ -442,7 +441,7 @@ static int istream_source(struct odb_read_stream *st, st->u.in_pack.pack = oi.u.packed.pack; st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid, type) < 0) + if (open_istream_pack_non_delta(st, r, oid) < 0) break; return 0; @@ -450,7 +449,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid, type); + return open_istream_incore(st, r, oid); } /**************************************************************** @@ -477,7 +476,7 @@ struct odb_read_stream *open_istream(struct repository *r, { struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); + int ret = istream_source(st, r, real); if (ret) { free(st); @@ -495,6 +494,7 @@ struct odb_read_stream *open_istream(struct repository *r, } *size = st->size; + *type = st->type; return st; } From 3c7722dd4d376e0fce4c48f723fe8b69af785998 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:29 +0100 Subject: [PATCH 04/19] streaming: explicitly pass packfile info when streaming a packed object When streaming a packed object we first populate the stream with information about the pack that contains the object before calling `open_istream_pack_non_delta()`. This is done because we have already looked up both the pack and the object's offset, so it would be a waste of time to look up this information again. But the way this is done makes for a somewhat awkward calling interface, as the caller now needs to be aware of how exactly the function itself behaves. Refactor the code so that we instead explicitly pass the packfile info into `open_istream_pack_non_delta()`. This makes the calling convention explicit, but more importantly this allows us to refactor the function so that it becomes its responsibility to allocate the stream itself in a subsequent patch. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/streaming.c b/streaming.c index 665624ddc0..bf277daadd 100644 --- a/streaming.c +++ b/streaming.c @@ -340,16 +340,18 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED) + const struct object_id *oid UNUSED, + struct packed_git *pack, + off_t offset) { struct pack_window *window; enum object_type in_pack_type; window = NULL; - in_pack_type = unpack_object_header(st->u.in_pack.pack, + in_pack_type = unpack_object_header(pack, &window, - &st->u.in_pack.pos, + &offset, &st->size); unuse_pack(&window); switch (in_pack_type) { @@ -365,6 +367,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; + st->u.in_pack.pack = pack; + st->u.in_pack.pos = offset; return 0; } @@ -436,14 +440,10 @@ static int istream_source(struct odb_read_stream *st, return 0; case OI_PACKED: if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(the_repository) >= size || + open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + oi.u.packed.offset) < 0) break; - - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid) < 0) - break; - return 0; default: break; From 595296e124f5e8a67c4669fcaeb1b28e71c2d751 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:30 +0100 Subject: [PATCH 05/19] streaming: allocate stream inside the backend-specific logic When creating a new stream we first allocate it and then call into backend-specific logic to populate the stream. This design requires that the stream itself contains a `union` with backend-specific members that then ultimately get populated by the backend-specific logic. This works, but it's awkward in the context of pluggable object databases. Each backend will need its own member in that union, and as the structure itself is completely opaque (it's only defined in "streaming.c") it also has the consequence that we must have the logic that is specific to backends in "streaming.c". Ideally though, the infrastructure would be reversed: we have a generic `struct odb_read_stream` and some helper functions in "streaming.c", whereas the backend-specific logic sits in the backend's subsystem itself. This can be realized by using a design that is similar to how we handle reference databases: instead of having a union of members, we instead have backend-specific structures with a `struct odb_read_stream base` as its first member. The backends would thus hand out the pointer to the base, but internally they know to cast back to the backend-specific type. This means though that we need to allocate different structures depending on the backend. To prepare for this, move allocation of the structure into the backend-specific functions that open a new stream. Subsequent commits will then create those new backend-specific structs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 99 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 36 deletions(-) diff --git a/streaming.c b/streaming.c index bf277daadd..a2c2d88738 100644 --- a/streaming.c +++ b/streaming.c @@ -222,27 +222,34 @@ static int close_istream_loose(struct odb_read_stream *st) return 0; } -static int open_istream_loose(struct odb_read_stream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_read_stream *st; struct odb_source *source; - - oi.sizep = &st->size; - oi.typep = &st->type; + unsigned long mapsize; + void *mapped; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - st->u.loose.mapped = odb_source_loose_map_object(source, oid, - &st->u.loose.mapsize); - if (st->u.loose.mapped) + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (mapped) break; } - if (!st->u.loose.mapped) + if (!mapped) return -1; - switch (unpack_loose_header(&st->z, st->u.loose.mapped, - st->u.loose.mapsize, st->u.loose.hdr, + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr))) { case ULHR_OK: break; @@ -250,19 +257,28 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } + + oi.sizep = &st->size; + oi.typep = &st->type; + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; + st->u.loose.mapped = mapped; + st->u.loose.mapsize = mapsize; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; st->u.loose.hdr_avail = st->z.total_out; st->z_state = z_used; st->close = close_istream_loose; st->read = read_istream_loose; + *out = st; + return 0; error: git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); + free(st); return -1; } @@ -338,12 +354,16 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) return 0; } -static int open_istream_pack_non_delta(struct odb_read_stream *st, +static int open_istream_pack_non_delta(struct odb_read_stream **out, struct repository *r UNUSED, const struct object_id *oid UNUSED, struct packed_git *pack, off_t offset) { + struct odb_read_stream stream = { + .close = close_istream_pack_non_delta, + .read = read_istream_pack_non_delta, + }; struct pack_window *window; enum object_type in_pack_type; @@ -352,7 +372,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, in_pack_type = unpack_object_header(pack, &window, &offset, - &st->size); + &stream.size); unuse_pack(&window); switch (in_pack_type) { default: @@ -363,12 +383,13 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } - st->type = in_pack_type; - st->z_state = z_unused; - st->close = close_istream_pack_non_delta; - st->read = read_istream_pack_non_delta; - st->u.in_pack.pack = pack; - st->u.in_pack.pos = offset; + stream.type = in_pack_type; + stream.z_state = z_unused; + stream.u.in_pack.pack = pack; + stream.u.in_pack.pos = offset; + + CALLOC_ARRAY(*out, 1); + **out = stream; return 0; } @@ -400,27 +421,35 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t return read_size; } -static int open_istream_incore(struct odb_read_stream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_read_stream stream = { + .close = close_istream_incore, + .read = read_istream_incore, + }; + int ret; - st->u.incore.read_ptr = 0; - st->close = close_istream_incore; - st->read = read_istream_incore; + oi.typep = &stream.type; + oi.sizep = &stream.size; + oi.contentp = (void **)&stream.u.incore.buf; + ret = odb_read_object_info_extended(r->objects, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT); + if (ret) + return ret; - oi.typep = &st->type; - oi.sizep = &st->size; - oi.contentp = (void **)&st->u.incore.buf; - return odb_read_object_info_extended(r->objects, oid, &oi, - OBJECT_INFO_DIE_IF_CORRUPT); + CALLOC_ARRAY(*out, 1); + **out = stream; + return 0; } /***************************************************************************** * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct odb_read_stream *st, +static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { @@ -435,13 +464,13 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid) < 0) + if (open_istream_loose(out, r, oid) < 0) break; return 0; case OI_PACKED: if (oi.u.packed.is_delta || repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, oi.u.packed.offset) < 0) break; return 0; @@ -449,7 +478,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid); + return open_istream_incore(out, r, oid); } /**************************************************************** @@ -474,14 +503,12 @@ struct odb_read_stream *open_istream(struct repository *r, unsigned long *size, struct stream_filter *filter) { - struct odb_read_stream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st; const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real); + int ret = istream_source(&st, r, real); - if (ret) { - free(st); + if (ret) return NULL; - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ From e030d0aeb5ebf79cdc4910e79d59e33998de78cd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:31 +0100 Subject: [PATCH 06/19] streaming: create structure for in-core object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for in-core object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/streaming.c b/streaming.c index a2c2d88738..35307d7229 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - char *buf; /* from odb_read_object_info_extended() */ - unsigned long read_ptr; - } incore; - struct { void *mapped; unsigned long mapsize; @@ -401,22 +396,30 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, * *****************************************************************/ -static int close_istream_incore(struct odb_read_stream *st) +struct odb_incore_read_stream { + struct odb_read_stream base; + char *buf; /* from odb_read_object_info_extended() */ + unsigned long read_ptr; +}; + +static int close_istream_incore(struct odb_read_stream *_st) { - free(st->u.incore.buf); + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + free(st->buf); return 0; } -static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; size_t read_size = sz; - size_t remainder = st->size - st->u.incore.read_ptr; + size_t remainder = st->base.size - st->read_ptr; if (remainder <= read_size) read_size = remainder; if (read_size) { - memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); - st->u.incore.read_ptr += read_size; + memcpy(buf, st->buf + st->read_ptr, read_size); + st->read_ptr += read_size; } return read_size; } @@ -426,22 +429,25 @@ static int open_istream_incore(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream stream = { - .close = close_istream_incore, - .read = read_istream_incore, + struct odb_incore_read_stream stream = { + .base.close = close_istream_incore, + .base.read = read_istream_incore, }; + struct odb_incore_read_stream *st; int ret; - oi.typep = &stream.type; - oi.sizep = &stream.size; - oi.contentp = (void **)&stream.u.incore.buf; + oi.typep = &stream.base.type; + oi.sizep = &stream.base.size; + oi.contentp = (void **)&stream.buf; ret = odb_read_object_info_extended(r->objects, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(st, 1); + *st = stream; + *out = &st->base; + return 0; } From b7774c0f0de43379c40984b4ede265a512c1a4f0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:32 +0100 Subject: [PATCH 07/19] streaming: create structure for loose object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for loose object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 85 +++++++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 35307d7229..ac7b3026f5 100644 --- a/streaming.c +++ b/streaming.c @@ -39,14 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; - } loose; - struct { struct packed_git *pack; off_t pos; @@ -165,11 +157,21 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) +struct odb_loose_read_stream { + struct odb_read_stream base; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_done: return 0; case z_error: @@ -178,42 +180,43 @@ static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t break; } - if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { - size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; if (sz < to_copy) to_copy = sz; - memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); - st->u.loose.hdr_used += to_copy; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; total_read += to_copy; } while (total_read < sz) { int status; - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + status = git_inflate(&st->base.z, Z_FINISH); - total_read = st->z.next_out - (unsigned char *)buf; + total_read = st->base.z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_loose(struct odb_read_stream *st) +static int close_istream_loose(struct odb_read_stream *_st) { - close_deflated_stream(st); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + close_deflated_stream(&st->base); + munmap(st->mapped, st->mapsize); return 0; } @@ -222,7 +225,7 @@ static int open_istream_loose(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream *st; + struct odb_loose_read_stream *st; struct odb_source *source; unsigned long mapsize; void *mapped; @@ -244,8 +247,8 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr))) { + switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { case ULHR_OK: break; case ULHR_BAD: @@ -253,26 +256,26 @@ static int open_istream_loose(struct odb_read_stream **out, goto error; } - oi.sizep = &st->size; - oi.typep = &st->type; + oi.sizep = &st->base.size; + oi.typep = &st->base.type; - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) goto error; - st->u.loose.mapped = mapped; - st->u.loose.mapsize = mapsize; - st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; - st->u.loose.hdr_avail = st->z.total_out; - st->z_state = z_used; - st->close = close_istream_loose; - st->read = read_istream_loose; + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->base.z.total_out; + st->base.z_state = z_used; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; - *out = st; + *out = &st->base; return 0; error: - git_inflate_end(&st->z); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + git_inflate_end(&st->base.z); + munmap(st->mapped, st->mapsize); free(st); return -1; } From 5f0d8d2e8d3f992f58af247b6d21509c3c7595ca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:33 +0100 Subject: [PATCH 08/19] streaming: create structure for packed object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for packed object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 75 ++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/streaming.c b/streaming.c index ac7b3026f5..788f04e83e 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - struct packed_git *pack; - off_t pos; - } in_pack; - struct filtered_istream filtered; } u; }; @@ -287,16 +282,23 @@ error: * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_unused: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = z_used; + memset(&st->base.z, 0, sizeof(st->base.z)); + git_inflate_init(&st->base.z); + st->base.z_state = z_used; break; case z_done: return 0; @@ -311,21 +313,21 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf struct pack_window *window = NULL; unsigned char *mapped; - mapped = use_pack(st->u.in_pack.pack, &window, - st->u.in_pack.pos, &st->z.avail_in); + mapped = use_pack(st->pack, &window, + st->pos, &st->base.z.avail_in); - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + st->base.z.next_in = mapped; + status = git_inflate(&st->base.z, Z_FINISH); - st->u.in_pack.pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; + st->pos += st->base.z.next_in - mapped; + total_read = st->base.z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } @@ -338,17 +340,18 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_pack_non_delta(struct odb_read_stream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *_st) { - close_deflated_stream(st); + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + close_deflated_stream(&st->base); return 0; } @@ -358,19 +361,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, struct packed_git *pack, off_t offset) { - struct odb_read_stream stream = { - .close = close_istream_pack_non_delta, - .read = read_istream_pack_non_delta, - }; + struct odb_packed_read_stream *stream; struct pack_window *window; enum object_type in_pack_type; + size_t size; window = NULL; in_pack_type = unpack_object_header(pack, &window, &offset, - &stream.size); + &size); unuse_pack(&window); switch (in_pack_type) { default: @@ -381,13 +382,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, case OBJ_TAG: break; } - stream.type = in_pack_type; - stream.z_state = z_unused; - stream.u.in_pack.pack = pack; - stream.u.in_pack.pos = offset; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->base.z_state = z_unused; + stream->pack = pack; + stream->pos = offset; + + *out = &stream->base; return 0; } From 1154b2d2e511113e9b7d567788b72acb05713915 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:34 +0100 Subject: [PATCH 09/19] streaming: create structure for filtered object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for filtered object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 54 +++++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/streaming.c b/streaming.c index 788f04e83e..199cca5abb 100644 --- a/streaming.c +++ b/streaming.c @@ -19,16 +19,6 @@ typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) -struct filtered_istream { - struct odb_read_stream *upstream; - struct stream_filter *filter; - char ibuf[FILTER_BUFFER]; - char obuf[FILTER_BUFFER]; - int i_end, i_ptr; - int o_end, o_ptr; - int input_finished; -}; - struct odb_read_stream { close_istream_fn close; read_istream_fn read; @@ -37,10 +27,6 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; - - union { - struct filtered_istream filtered; - } u; }; /***************************************************************** @@ -62,16 +48,28 @@ static void close_deflated_stream(struct odb_read_stream *st) * *****************************************************************/ -static int close_istream_filtered(struct odb_read_stream *st) +struct odb_filtered_read_stream { + struct odb_read_stream base; + struct odb_read_stream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; + int input_finished; +}; + +static int close_istream_filtered(struct odb_read_stream *_fs) { - free_stream_filter(st->u.filtered.filter); - return close_istream(st->u.filtered.upstream); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + free_stream_filter(fs->filter); + return close_istream(fs->upstream); } -static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, size_t sz) { - struct filtered_istream *fs = &(st->u.filtered); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; size_t filled = 0; while (sz) { @@ -131,19 +129,17 @@ static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct stream_filter *filter) { - struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); - struct filtered_istream *fs = &(ifs->u.filtered); + struct odb_filtered_read_stream *fs; - ifs->close = close_istream_filtered; - ifs->read = read_istream_filtered; + CALLOC_ARRAY(fs, 1); + fs->base.close = close_istream_filtered; + fs->base.read = read_istream_filtered; fs->upstream = st; fs->filter = filter; - fs->i_end = fs->i_ptr = 0; - fs->o_end = fs->o_ptr = 0; - fs->input_finished = 0; - ifs->size = -1; /* unknown */ - ifs->type = st->type; - return ifs; + fs->base.size = -1; /* unknown */ + fs->base.type = st->type; + + return &fs->base; } /***************************************************************** From eb5abbb4e6a8c06f5c6275bbb541bf7d736171c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:35 +0100 Subject: [PATCH 10/19] streaming: move zlib stream into backends While all backend-specific data is now contained in a backend-specific structure, we still share the zlib stream across the loose and packed objects. Refactor the code and move it into the specific structures so that we fully detangle the different backends from one another. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 104 ++++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/streaming.c b/streaming.c index 199cca5abb..46fddaf2ca 100644 --- a/streaming.c +++ b/streaming.c @@ -25,23 +25,8 @@ struct odb_read_stream { enum object_type type; unsigned long size; /* inflated size of full object */ - git_zstream z; - enum { z_unused, z_used, z_done, z_error } z_state; }; -/***************************************************************** - * - * Common helpers - * - *****************************************************************/ - -static void close_deflated_stream(struct odb_read_stream *st) -{ - if (st->z_state == z_used) - git_inflate_end(&st->z); -} - - /***************************************************************** * * Filtered stream @@ -150,6 +135,12 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct odb_loose_read_stream { struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; void *mapped; unsigned long mapsize; char hdr[32]; @@ -162,10 +153,10 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_done: + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: return 0; - case z_error: + case ODB_LOOSE_READ_STREAM_ERROR: return -1; default: break; @@ -183,20 +174,20 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t while (total_read < sz) { int status; - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); - total_read = st->base.z.next_out - (unsigned char *)buf; + total_read = st->z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; return -1; } } @@ -206,7 +197,8 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t static int close_istream_loose(struct odb_read_stream *_st) { struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); return 0; } @@ -238,7 +230,7 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, sizeof(st->hdr))) { case ULHR_OK: break; @@ -256,8 +248,8 @@ static int open_istream_loose(struct odb_read_stream **out, st->mapped = mapped; st->mapsize = mapsize; st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->base.z.total_out; - st->base.z_state = z_used; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; st->base.close = close_istream_loose; st->base.read = read_istream_loose; @@ -265,7 +257,7 @@ static int open_istream_loose(struct odb_read_stream **out, return 0; error: - git_inflate_end(&st->base.z); + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); free(st); return -1; @@ -281,6 +273,13 @@ error: struct odb_packed_read_stream { struct odb_read_stream base; struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; off_t pos; }; @@ -290,17 +289,17 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_unused: - memset(&st->base.z, 0, sizeof(st->base.z)); - git_inflate_init(&st->base.z); - st->base.z_state = z_used; + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; break; - case z_done: + case ODB_PACKED_READ_STREAM_DONE: return 0; - case z_error: + case ODB_PACKED_READ_STREAM_ERROR: return -1; - case z_used: + case ODB_PACKED_READ_STREAM_INUSE: break; } @@ -310,20 +309,20 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu unsigned char *mapped; mapped = use_pack(st->pack, &window, - st->pos, &st->base.z.avail_in); + st->pos, &st->z.avail_in); - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - st->base.z.next_in = mapped; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); - st->pos += st->base.z.next_in - mapped; - total_read = st->base.z.next_out - (unsigned char *)buf; + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; break; } @@ -336,8 +335,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; return -1; } } @@ -347,7 +346,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu static int close_istream_pack_non_delta(struct odb_read_stream *_st) { struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); return 0; } @@ -384,7 +384,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.read = read_istream_pack_non_delta; stream->base.type = in_pack_type; stream->base.size = size; - stream->base.z_state = z_unused; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; stream->pack = pack; stream->pos = offset; From 385e18810f10ec0ce0a266d25da4e1878c8ce15a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:36 +0100 Subject: [PATCH 11/19] packfile: introduce function to read object info from a store Extract the logic to read object info for a packed object from `do_oid_object_into_extended()` into a standalone function that operates on the packfile store. This function will be used in a subsequent commit. Note that this change allows us to make `find_pack_entry()` an internal implementation detail. As a consequence though we have to move around `packfile_store_freshen_object()` so that it is defined after that function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 29 +++------------------- packfile.c | 71 +++++++++++++++++++++++++++++++++++++++++------------- packfile.h | 12 ++++++++- 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/odb.c b/odb.c index 3ec21ef24e..f4cbee4b04 100644 --- a/odb.c +++ b/odb.c @@ -666,8 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb, { static struct object_info blank_oi = OBJECT_INFO_INIT; const struct cached_object *co; - struct pack_entry e; - int rtype; const struct object_id *real = oid; int already_retried = 0; @@ -702,8 +700,8 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) @@ -713,8 +711,8 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; } /* @@ -747,25 +745,6 @@ static int do_oid_object_info_extended(struct object_database *odb, } return -1; } - - if (oi == &blank_oi) - /* - * We know that the caller doesn't actually need the - * information below, so return early. - */ - return 0; - rtype = packed_object_info(odb->repo, e.p, e.offset, oi); - if (rtype < 0) { - mark_bad_packed_object(e.p, real); - return do_oid_object_info_extended(odb, real, oi, 0); - } else if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - - return 0; } static int oid_object_info_convert(struct repository *r, diff --git a/packfile.c b/packfile.c index 40f733dd23..b4bc40d895 100644 --- a/packfile.c +++ b/packfile.c @@ -819,22 +819,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } -int packfile_store_freshen_object(struct packfile_store *store, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (utime(e.p->pack_name, NULL)) - return 0; - e.p->freshened = 1; - return 1; -} - void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -2064,7 +2048,9 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static int find_pack_entry(struct repository *r, + const struct object_id *oid, + struct pack_entry *e) { struct list_head *pos; @@ -2087,6 +2073,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa return 0; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags UNUSED) +{ + static struct object_info blank_oi = OBJECT_INFO_INIT; + struct pack_entry e; + int rtype; + + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 1; + + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + if (oi == &blank_oi) + return 0; + + rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, oid); + return -1; + } + + if (oi->whence == OI_PACKED) { + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); + } + + return 0; +} + static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { diff --git a/packfile.h b/packfile.h index 58fcc88e20..0a98bddd81 100644 --- a/packfile.h +++ b/packfile.h @@ -144,6 +144,17 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. @@ -357,7 +368,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. */ -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); int has_object_pack(struct repository *r, const struct object_id *oid); From 4c89d31494bff4bde6079a0e0821f1437e37d07b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:37 +0100 Subject: [PATCH 12/19] streaming: rely on object sources to create object stream When creating an object stream we first look up the object info and, if it's present, we call into the respective backend that contains the object to create a new stream for it. This has the consequence that, for loose object source, we basically iterate through the object sources twice: we first discover that the file exists as a loose object in the first place by iterating through all sources. And, once we have discovered it, we again walk through all sources to try and map the object. The same issue will eventually also surface once the packfile store becomes per-object-source. Furthermore, it feels rather pointless to first look up the object only to then try and read it. Refactor the logic to be centered around sources instead. Instead of first reading the object, we immediately ask the source to create the object stream for us. If the object exists we get stream, otherwise we'll try the next source. Like this we only have to iterate through sources once. But even more importantly, this change also helps us to make the whole logic pluggable. The object read stream subsystem does not need to be aware of the different source backends anymore, but eventually it'll only have to call the source's callback function. Note that at the current point in time we aren't fully there yet: - The packfile store still sits on the object database level and is thus agnostic of the sources. - We still have to call into both the packfile store and the loose object source. But both of these issues will soon be addressed. This refactoring results in a slight change to semantics: previously, it was `odb_read_object_info_extended()` that picked the source for us, and it would have favored packed (non-deltified) objects over loose objects. And while we still favor packed over loose objects for a single source with the new logic, we'll now favor a loose object from an earlier source over a packed object from a later source. Ultimately this shouldn't matter though: the stream doesn't indicate to the caller which source it is from and whether it was created from a packed or loose object, so such details are opaque to the caller. And other than that we should be able to assume that two objects with the same object ID should refer to the same content, so the streamed data would be the same, too. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 65 ++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 46fddaf2ca..f0f7d31956 100644 --- a/streaming.c +++ b/streaming.c @@ -204,21 +204,15 @@ static int close_istream_loose(struct odb_read_stream *_st) } static int open_istream_loose(struct odb_read_stream **out, - struct repository *r, + struct odb_source *source, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; - struct odb_source *source; unsigned long mapsize; void *mapped; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (mapped) - break; - } + mapped = odb_source_loose_map_object(source, oid, &mapsize); if (!mapped) return -1; @@ -352,21 +346,25 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st) } static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - struct packed_git *pack, - off_t offset) + struct object_database *odb, + const struct object_id *oid) { struct odb_packed_read_stream *stream; - struct pack_window *window; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; - size_t size; + unsigned long size; - window = NULL; + oi.sizep = &size; - in_pack_type = unpack_object_header(pack, + if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, &window, - &offset, + &oi.u.packed.offset, &size); unuse_pack(&window); switch (in_pack_type) { @@ -385,8 +383,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.type = in_pack_type; stream->base.size = size; stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = pack; - stream->pos = offset; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; *out = &stream->base; @@ -463,30 +461,15 @@ static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; + struct odb_source *source; - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; + if (!open_istream_pack_non_delta(out, r->objects, oid)) + return 0; - switch (oi.whence) { - case OI_LOOSE: - if (open_istream_loose(out, r, oid) < 0) - break; - return 0; - case OI_PACKED: - if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, - oi.u.packed.offset) < 0) - break; - return 0; - default: - break; - } + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) + if (!open_istream_loose(out, source, oid)) + return 0; return open_istream_incore(out, r, oid); } From c26da3446e98ad4aa98ec9154c70c6fd35cb9ad6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:38 +0100 Subject: [PATCH 13/19] streaming: get rid of `the_repository` Subsequent commits will move the backend-specific logic of object streaming into their respective subsystems. These subsystems have gotten rid of `the_repository` already, but we still use it in two locations in the streaming subsystem. Prepare for the move by fixing those two cases. Converting the logic in `open_istream_pack_non_delta()` is trivial as we already got the object database as input. But for `stream_blob_to_fd()` we have to add a new parameter to make it accessible. So, as we already have to adjust all callers anyway, rename the function to `odb_stream_blob_to_fd()` to indicate it's part of the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/fsck.c | 3 ++- builtin/log.c | 4 ++-- entry.c | 2 +- parallel-checkout.c | 3 ++- streaming.c | 13 +++++++------ streaming.h | 18 +++++++++++++++++- 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 983ecec837..120d626d66 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode, static int stream_blob(const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index b1a650c673..1a348d43c2 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj) } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1)) + if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else fprintf(f, "%s\n", describe_object(&obj->oid)); diff --git a/builtin/log.c b/builtin/log.c index c8319b8af3..e7b83a6e00 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || !rev->diffopt.flags.allow_textconv) - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); if (get_oid_with_context(the_repository, obj_name, GET_OID_RECORD_PATH, @@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !textconv_object(the_repository, obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { object_context_release(&obj_context); - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); } if (!buf) diff --git a/entry.c b/entry.c index cae02eb503..38dfe670f7 100644 --- a/entry.c +++ b/entry.c @@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, if (fd < 0) return -1; - result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); + result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1); *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); diff --git a/parallel-checkout.c b/parallel-checkout.c index fba6aa65a6..1cb6701b92 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid); if (filter) { - if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) { + if (odb_stream_blob_to_fd(the_repository->objects, fd, + &pc_item->ce->oid, filter, 1)) { /* On error, reset fd to try writing without streaming */ if (reset_fd(fd, path)) return -1; diff --git a/streaming.c b/streaming.c index f0f7d31956..807a6e03a8 100644 --- a/streaming.c +++ b/streaming.c @@ -2,8 +2,6 @@ * Copyright (c) 2011, Google Inc. */ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "convert.h" #include "environment.h" @@ -359,7 +357,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, @@ -518,8 +516,11 @@ struct odb_read_stream *open_istream(struct repository *r, return st; } -int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, - int can_seek) +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek) { struct odb_read_stream *st; enum object_type type; @@ -527,7 +528,7 @@ int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter ssize_t kept = 0; int result = -1; - st = open_istream(the_repository, oid, &type, &sz, filter); + st = open_istream(odb->repo, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); diff --git a/streaming.h b/streaming.h index f5ff5d7ac9..148f6b3069 100644 --- a/streaming.h +++ b/streaming.h @@ -6,6 +6,7 @@ #include "object.h" +struct object_database; /* opaque */ struct odb_read_stream; struct stream_filter; @@ -16,6 +17,21 @@ struct odb_read_stream *open_istream(struct repository *, const struct object_id int close_istream(struct odb_read_stream *); ssize_t read_istream(struct odb_read_stream *, void *, size_t); -int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); +/* + * Look up the object by its ID and write the full contents to the file + * descriptor. The object must be a blob, or the function will fail. When + * provided, the filter is used to transform the blob contents. + * + * `can_seek` should be set to 1 in case the given file descriptor can be + * seek(3p)'d on. This is used to support files with holes in case a + * significant portion of the blob contains NUL bytes. + * + * Returns a negative error code on failure, 0 on success. + */ +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek); #endif /* STREAMING_H */ From ffc9a3448500caa50766876ef2169e0f26ad3b3c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:39 +0100 Subject: [PATCH 14/19] streaming: make the `odb_read_stream` definition public Subsequent commits will move the backend-specific logic of setting up an object read stream into the specific subsystems. As the backends are now the ones that are responsible for allocating the stream they'll need to have the stream definition available to them. Make the stream definition public to prepare for this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 11 ----------- streaming.h | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/streaming.c b/streaming.c index 807a6e03a8..0635b7c12e 100644 --- a/streaming.c +++ b/streaming.c @@ -12,19 +12,8 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*close_istream_fn)(struct odb_read_stream *); -typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); - #define FILTER_BUFFER (1024*16) -struct odb_read_stream { - close_istream_fn close; - read_istream_fn read; - - enum object_type type; - unsigned long size; /* inflated size of full object */ -}; - /***************************************************************** * * Filtered stream diff --git a/streaming.h b/streaming.h index 148f6b3069..acfdef1598 100644 --- a/streaming.h +++ b/streaming.h @@ -7,10 +7,23 @@ #include "object.h" struct object_database; -/* opaque */ struct odb_read_stream; struct stream_filter; +typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *); +typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t); + +/* + * A stream that can be used to read an object from the object database without + * loading all of it into memory. + */ +struct odb_read_stream { + odb_read_stream_close_fn close; + odb_read_stream_read_fn read; + enum object_type type; + unsigned long size; /* inflated size of full object */ +}; + struct odb_read_stream *open_istream(struct repository *, const struct object_id *, enum object_type *, unsigned long *, struct stream_filter *); From bc30a2f5dff6dd39966819ca3771ab5e9e072123 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:40 +0100 Subject: [PATCH 15/19] streaming: move logic to read loose objects streams into backend Move the logic to read loose object streams into the respective subsystem. This allows us to make a couple of function declarations private. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++--- object-file.h | 42 ++----------- streaming.c | 133 +--------------------------------------- 3 files changed, 164 insertions(+), 178 deletions(-) diff --git a/object-file.c b/object-file.c index b62b21a452..8c67847fea 100644 --- a/object-file.c +++ b/object-file.c @@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) +static void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; int fd = open_loose_object(source->loose, oid, &p); @@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source, return map_fd(fd, p, size); } -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream, } /* + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + * * We used to just use "sscanf()", but that's actually way * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, struct object_info *oi) +static int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose) loose_object_map_clear(&loose->map); free(loose); } + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + void *mapped; + + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + oi.sizep = &st->base.size; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + free(st); + return -1; +} diff --git a/object-file.h b/object-file.h index eeffa67bbd..1229d5f675 100644 --- a/object-file.h +++ b/object-file.h @@ -16,6 +16,8 @@ enum { int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); +struct object_info; +struct odb_read_stream; struct odb_source; struct odb_source_loose { @@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, int flags); -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size); +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); /* * Return true iff an object database source has a loose object @@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb, int format_object_header(char *str, size_t size, enum object_type type, size_t objsize); -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz); - -/** - * parse_loose_header() parses the starting " \0" of an - * object. If it doesn't follow that format -1 is returned. To check - * the validity of the populate the "typep" in the "struct - * object_info". It will be OBJ_BAD if the object type is unknown. The - * parsed can be retrieved via "oi->sizep", and from there - * passed to unpack_loose_rest(). - */ -struct object_info; -int parse_loose_header(const char *hdr, struct object_info *oi); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/streaming.c b/streaming.c index 0635b7c12e..d5acc1c396 100644 --- a/streaming.c +++ b/streaming.c @@ -114,137 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Loose object stream - * - *****************************************************************/ - -struct odb_loose_read_stream { - struct odb_read_stream base; - git_zstream z; - enum { - ODB_LOOSE_READ_STREAM_INUSE, - ODB_LOOSE_READ_STREAM_DONE, - ODB_LOOSE_READ_STREAM_ERROR, - } z_state; - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; -}; - -static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_LOOSE_READ_STREAM_DONE: - return 0; - case ODB_LOOSE_READ_STREAM_ERROR: - return -1; - default: - break; - } - - if (st->hdr_used < st->hdr_avail) { - size_t to_copy = st->hdr_avail - st->hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->hdr + st->hdr_used, to_copy); - st->hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_DONE; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct odb_read_stream *_st) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - return 0; -} - -static int open_istream_loose(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_loose_read_stream *st; - unsigned long mapsize; - void *mapped; - - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (!mapped) - return -1; - - /* - * Note: we must allocate this structure early even though we may still - * fail. This is because we need to initialize the zlib stream, and it - * is not possible to copy the stream around after the fact because it - * has self-referencing pointers. - */ - CALLOC_ARRAY(st, 1); - - switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, - sizeof(st->hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - - oi.sizep = &st->base.size; - oi.typep = &st->base.type; - - if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) - goto error; - - st->mapped = mapped; - st->mapsize = mapsize; - st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->z.total_out; - st->z_state = ODB_LOOSE_READ_STREAM_INUSE; - st->base.close = close_istream_loose; - st->base.read = read_istream_loose; - - *out = &st->base; - - return 0; -error: - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - free(st); - return -1; -} - - /***************************************************************** * * Non-delta packed object stream @@ -455,7 +324,7 @@ static int istream_source(struct odb_read_stream **out, odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) - if (!open_istream_loose(out, source, oid)) + if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; return open_istream_incore(out, r, oid); From 8c1b84bc977bf1e4515efe0386de87257ec28689 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:41 +0100 Subject: [PATCH 16/19] streaming: move logic to read packed objects streams into backend Move the logic to read packed object streams into the respective subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 5 ++ streaming.c | 136 +--------------------------------------------------- 3 files changed, 134 insertions(+), 135 deletions(-) diff --git a/packfile.c b/packfile.c index b4bc40d895..ad56ce0b90 100644 --- a/packfile.c +++ b/packfile.c @@ -20,6 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" +#include "streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" @@ -2406,3 +2407,130 @@ void packfile_store_close(struct packfile_store *store) close_pack(p); } } + +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, + size_t sz) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; + break; + case ODB_PACKED_READ_STREAM_DONE: + return 0; + case ODB_PACKED_READ_STREAM_ERROR: + return -1; + case ODB_PACKED_READ_STREAM_INUSE: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->pack, &window, + st->pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; + break; + } + + /* + * Unlike the loose object case, we do not have to worry here + * about running out of input bytes and spinning infinitely. If + * we get Z_BUF_ERROR due to too few input bytes, then we'll + * replenish them in the next use_pack() call when we loop. If + * we truly hit the end of the pack (i.e., because it's corrupt + * or truncated), then use_pack() catches that and will die(). + */ + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_pack_non_delta(struct odb_read_stream *_st) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); + return 0; +} + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct odb_packed_read_stream *stream; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type in_pack_type; + unsigned long size; + + oi.sizep = &size; + + if (packfile_store_read_object_info(store, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(store->odb->repo) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; + + *out = &stream->base; + + return 0; +} diff --git a/packfile.h b/packfile.h index 0a98bddd81..3fcc5ae6e0 100644 --- a/packfile.h +++ b/packfile.h @@ -8,6 +8,7 @@ /* in odb.h */ struct object_info; +struct odb_read_stream; struct packed_git { struct hashmap_entry packmap_ent; @@ -144,6 +145,10 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid); + /* * Try to read the object identified by its ID from the object store and * populate the object info with its data. Returns 1 in case the object was diff --git a/streaming.c b/streaming.c index d5acc1c396..3140728a70 100644 --- a/streaming.c +++ b/streaming.c @@ -114,140 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Non-delta packed object stream - * - *****************************************************************/ - -struct odb_packed_read_stream { - struct odb_read_stream base; - struct packed_git *pack; - git_zstream z; - enum { - ODB_PACKED_READ_STREAM_UNINITIALIZED, - ODB_PACKED_READ_STREAM_INUSE, - ODB_PACKED_READ_STREAM_DONE, - ODB_PACKED_READ_STREAM_ERROR, - } z_state; - off_t pos; -}; - -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, - size_t sz) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_PACKED_READ_STREAM_UNINITIALIZED: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_INUSE; - break; - case ODB_PACKED_READ_STREAM_DONE: - return 0; - case ODB_PACKED_READ_STREAM_ERROR: - return -1; - case ODB_PACKED_READ_STREAM_INUSE: - break; - } - - while (total_read < sz) { - int status; - struct pack_window *window = NULL; - unsigned char *mapped; - - mapped = use_pack(st->pack, &window, - st->pos, &st->z.avail_in); - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); - - st->pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; - unuse_pack(&window); - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_DONE; - break; - } - - /* - * Unlike the loose object case, we do not have to worry here - * about running out of input bytes and spinning infinitely. If - * we get Z_BUF_ERROR due to too few input bytes, then we'll - * replenish them in the next use_pack() call when we loop. If - * we truly hit the end of the pack (i.e., because it's corrupt - * or truncated), then use_pack() catches that and will die(). - */ - if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_pack_non_delta(struct odb_read_stream *_st) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) - git_inflate_end(&st->z); - return 0; -} - -static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct object_database *odb, - const struct object_id *oid) -{ - struct odb_packed_read_stream *stream; - struct pack_window *window = NULL; - struct object_info oi = OBJECT_INFO_INIT; - enum object_type in_pack_type; - unsigned long size; - - oi.sizep = &size; - - if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || - oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(odb->repo) >= size) - return -1; - - in_pack_type = unpack_object_header(oi.u.packed.pack, - &window, - &oi.u.packed.offset, - &size); - unuse_pack(&window); - switch (in_pack_type) { - default: - return -1; /* we do not do deltas for now */ - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - } - - CALLOC_ARRAY(stream, 1); - stream->base.close = close_istream_pack_non_delta; - stream->base.read = read_istream_pack_non_delta; - stream->base.type = in_pack_type; - stream->base.size = size; - stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = oi.u.packed.pack; - stream->pos = oi.u.packed.offset; - - *out = &stream->base; - - return 0; -} - - /***************************************************************** * * In-core stream @@ -319,7 +185,7 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!open_istream_pack_non_delta(out, r->objects, oid)) + if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) return 0; odb_prepare_alternates(r->objects); From 378ec56beba161abbef6e2c87d9bc2ac43c355f3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:42 +0100 Subject: [PATCH 17/19] streaming: refactor interface to be object-database-centric Refactor the streaming interface to be centered around object databases instead of centered around the repository. Rename the functions accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 6 +++--- archive-zip.c | 12 ++++++------ builtin/index-pack.c | 8 ++++---- builtin/pack-objects.c | 14 +++++++------- object-file.c | 8 ++++---- streaming.c | 44 +++++++++++++++++++++--------------------- streaming.h | 30 +++++++++++++++++++++++----- 7 files changed, 71 insertions(+), 51 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index dc1eda09e0..4d87b28504 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -135,16 +135,16 @@ static int stream_blocked(struct repository *r, const struct object_id *oid) char buf[BLOCKSIZE]; ssize_t readlen; - st = open_istream(r, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { - readlen = read_istream(st, buf, sizeof(buf)); + readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen <= 0) break; do_write_blocked(buf, readlen); } - close_istream(st); + odb_read_stream_close(st); if (!readlen) finish_record(); return readlen; diff --git a/archive-zip.c b/archive-zip.c index 40a9c93ff9..c44684aebc 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -348,8 +348,8 @@ static int write_zip_entry(struct archiver_args *args, if (!buffer) { enum object_type type; - stream = open_istream(args->repo, oid, &type, &size, - NULL); + stream = odb_read_stream_open(args->repo->objects, oid, + &type, &size, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); @@ -429,7 +429,7 @@ static int write_zip_entry(struct archiver_args *args, ssize_t readlen; for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -439,7 +439,7 @@ static int write_zip_entry(struct archiver_args *args, buf, readlen); write_or_die(1, buf, readlen); } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; @@ -462,7 +462,7 @@ static int write_zip_entry(struct archiver_args *args, zstream.avail_out = sizeof(compressed); for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -486,7 +486,7 @@ static int write_zip_entry(struct archiver_args *args, } } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 5f90f12f92..fb76ef0f4c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size, } while (size) { - ssize_t len = read_istream(data->st, data->buf, size); + ssize_t len = odb_read_stream_read(data->st, data->buf, size); if (len == 0) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&data->entry->idx.oid)); @@ -807,15 +807,15 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, - NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL); if (!data.st) return -1; if (size != entry->size || type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); - close_istream(data.st); + odb_read_stream_close(data.st); free(data.buf); return 0; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c693d948e1..1353c2384c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct odb_read_stream *st, struct ha for (;;) { ssize_t readlen; int zret = Z_OK; - readlen = read_istream(st, ibuf, sizeof(ibuf)); + readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf)); if (readlen == -1) die(_("unable to read %s"), oid_to_hex(oid)); @@ -520,8 +520,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL)) != NULL) buf = NULL; else { buf = odb_read_object(the_repository->objects, @@ -577,7 +577,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -591,7 +591,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent */ if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -601,7 +601,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } else { if (limit && hdrlen + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -609,7 +609,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } if (st) { datalen = write_large_blob_data(st, f, &entry->idx.oid); - close_istream(st); + odb_read_stream_close(st); } else { hashwrite(f, buf, datalen); free(buf); diff --git a/object-file.c b/object-file.c index 8c67847fea..9ba40a848c 100644 --- a/object-file.c +++ b/object-file.c @@ -139,7 +139,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) char hdr[MAX_HEADER_LEN]; int hdrlen; - st = open_istream(r, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); if (!st) return -1; @@ -151,10 +151,10 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, hdr, hdrlen); for (;;) { char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) { - close_istream(st); + odb_read_stream_close(st); return -1; } if (!readlen) @@ -162,7 +162,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - close_istream(st); + odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } diff --git a/streaming.c b/streaming.c index 3140728a70..06993a751c 100644 --- a/streaming.c +++ b/streaming.c @@ -35,7 +35,7 @@ static int close_istream_filtered(struct odb_read_stream *_fs) { struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; free_stream_filter(fs->filter); - return close_istream(fs->upstream); + return odb_read_stream_close(fs->upstream); } static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, @@ -87,7 +87,7 @@ static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, /* refill the input from the upstream */ if (!fs->input_finished) { - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); + fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER); if (fs->i_end < 0) return -1; if (fs->i_end) @@ -149,7 +149,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_ } static int open_istream_incore(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -163,7 +163,7 @@ static int open_istream_incore(struct odb_read_stream **out, oi.typep = &stream.base.type; oi.sizep = &stream.base.size; oi.contentp = (void **)&stream.buf; - ret = odb_read_object_info_extended(r->objects, oid, &oi, + ret = odb_read_object_info_extended(odb, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; @@ -180,47 +180,47 @@ static int open_istream_incore(struct odb_read_stream **out, *****************************************************************************/ static int istream_source(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct odb_source *source; - if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) + if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) return 0; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; - return open_istream_incore(out, r, oid); + return open_istream_incore(out, odb, oid); } /**************************************************************** * Users of streaming interface ****************************************************************/ -int close_istream(struct odb_read_stream *st) +int odb_read_stream_close(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) +ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct odb_read_stream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { struct odb_read_stream *st; - const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(&st, r, real); + const struct object_id *real = lookup_replace_object(odb->repo, oid); + int ret = istream_source(&st, odb, real); if (ret) return NULL; @@ -229,7 +229,7 @@ struct odb_read_stream *open_istream(struct repository *r, /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { - close_istream(st); + odb_read_stream_close(st); return NULL; } st = nst; @@ -252,7 +252,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, ssize_t kept = 0; int result = -1; - st = open_istream(odb->repo, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); @@ -263,7 +263,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, for (;;) { char buf[1024 * 16]; ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) goto close_and_exit; @@ -294,6 +294,6 @@ int odb_stream_blob_to_fd(struct object_database *odb, result = 0; close_and_exit: - close_istream(st); + odb_read_stream_close(st); return result; } diff --git a/streaming.h b/streaming.h index acfdef1598..7cb55213b7 100644 --- a/streaming.h +++ b/streaming.h @@ -24,11 +24,31 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ }; -struct odb_read_stream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct odb_read_stream *); -ssize_t read_istream(struct odb_read_stream *, void *, size_t); +/* + * Create a new object stream for the given object database. Populates the type + * and size pointers with the object's info. An optional filter can be used to + * transform the object's content. + * + * Returns the stream on success, a `NULL` pointer otherwise. + */ +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter); + +/* + * Close the given read stream and release all resources associated with it. + * Returns 0 on success, a negative error code otherwise. + */ +int odb_read_stream_close(struct odb_read_stream *stream); + +/* + * Read data from the stream into the buffer. Returns 0 on EOF and the number + * of bytes read on success. Returns a negative error code in case reading from + * the stream fails. + */ +ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); /* * Look up the object by its ID and write the full contents to the file From 1599b68d5e960a12f5ac624f81c70ece317db5a6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:43 +0100 Subject: [PATCH 18/19] streaming: move into object database subsystem The "streaming" terminology is somewhat generic, so it may not be immediately obvious that "streaming.{c,h}" is specific to the object database. Rectify this by moving it into the "odb/" directory so that it can be immediately attributed to the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 2 +- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/cat-file.c | 2 +- builtin/fsck.c | 2 +- builtin/index-pack.c | 2 +- builtin/log.c | 2 +- builtin/pack-objects.c | 2 +- entry.c | 2 +- meson.build | 2 +- object-file.c | 2 +- streaming.c => odb/streaming.c | 2 +- streaming.h => odb/streaming.h | 0 packfile.c | 2 +- parallel-checkout.c | 2 +- 15 files changed, 14 insertions(+), 14 deletions(-) rename streaming.c => odb/streaming.c (99%) rename streaming.h => odb/streaming.h (100%) diff --git a/Makefile b/Makefile index 7e0f77e298..6d8dcc4622 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o @@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o LIB_OBJS += stable-qsort.o LIB_OBJS += statinfo.o LIB_OBJS += strbuf.o -LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += strmap.o LIB_OBJS += strvec.o diff --git a/archive-tar.c b/archive-tar.c index 4d87b28504..494b9f0667 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -12,8 +12,8 @@ #include "tar.h" #include "archive.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" -#include "streaming.h" #include "run-command.h" #include "write-or-die.h" diff --git a/archive-zip.c b/archive-zip.c index c44684aebc..a0bdc2fe3b 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -10,9 +10,9 @@ #include "gettext.h" #include "git-zlib.h" #include "hex.h" -#include "streaming.h" #include "utf8.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" #include "userdiff.h" #include "write-or-die.h" diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 120d626d66..505ddaa12f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -18,13 +18,13 @@ #include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" -#include "streaming.h" #include "oid-array.h" #include "packfile.h" #include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" diff --git a/builtin/fsck.c b/builtin/fsck.c index 1a348d43c2..c7d2eea287 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,11 +13,11 @@ #include "fsck.h" #include "parse-options.h" #include "progress.h" -#include "streaming.h" #include "packfile.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" #include "replace-object.h" diff --git a/builtin/index-pack.c b/builtin/index-pack.c index fb76ef0f4c..581023495f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -16,12 +16,12 @@ #include "progress.h" #include "fsck.h" #include "strbuf.h" -#include "streaming.h" #include "thread-utils.h" #include "packfile.h" #include "pack-revindex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oid-array.h" #include "oidset.h" #include "path.h" diff --git a/builtin/log.c b/builtin/log.c index e7b83a6e00..d4cf9c59c8 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -16,6 +16,7 @@ #include "refs.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "pager.h" #include "color.h" #include "commit.h" @@ -35,7 +36,6 @@ #include "parse-options.h" #include "line-log.h" #include "branch.h" -#include "streaming.h" #include "version.h" #include "mailmap.h" #include "progress.h" diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1353c2384c..f109e26786 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,7 +22,6 @@ #include "pack-objects.h" #include "progress.h" #include "refs.h" -#include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" #include "delta-islands.h" @@ -33,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "dir.h" #include "midx.h" diff --git a/entry.c b/entry.c index 38dfe670f7..7817aee362 100644 --- a/entry.c +++ b/entry.c @@ -2,13 +2,13 @@ #include "git-compat-util.h" #include "odb.h" +#include "odb/streaming.h" #include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" #include "name-hash.h" #include "sparse-index.h" -#include "streaming.h" #include "submodule.h" #include "symlinks.h" #include "progress.h" diff --git a/meson.build b/meson.build index 1f95a06edb..fc82929b37 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,7 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/streaming.c', 'oid-array.c', 'oidmap.c', 'oidset.c', @@ -490,7 +491,6 @@ libgit_sources = [ 'stable-qsort.c', 'statinfo.c', 'strbuf.c', - 'streaming.c', 'string-list.c', 'strmap.c', 'strvec.c', diff --git a/object-file.c b/object-file.c index 9ba40a848c..9601fdb12d 100644 --- a/object-file.c +++ b/object-file.c @@ -20,13 +20,13 @@ #include "object-file-convert.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" #include "read-cache-ll.h" #include "setup.h" -#include "streaming.h" #include "tempfile.h" #include "tmp-objdir.h" diff --git a/streaming.c b/odb/streaming.c similarity index 99% rename from streaming.c rename to odb/streaming.c index 06993a751c..7ef58adaa2 100644 --- a/streaming.c +++ b/odb/streaming.c @@ -5,10 +5,10 @@ #include "git-compat-util.h" #include "convert.h" #include "environment.h" -#include "streaming.h" #include "repository.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "packfile.h" diff --git a/streaming.h b/odb/streaming.h similarity index 100% rename from streaming.h rename to odb/streaming.h diff --git a/packfile.c b/packfile.c index ad56ce0b90..7a16aaa90d 100644 --- a/packfile.c +++ b/packfile.c @@ -20,7 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" -#include "streaming.h" +#include "odb/streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" diff --git a/parallel-checkout.c b/parallel-checkout.c index 1cb6701b92..0bf4bd6d4a 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -13,7 +13,7 @@ #include "read-cache-ll.h" #include "run-command.h" #include "sigchain.h" -#include "streaming.h" +#include "odb/streaming.h" #include "symlinks.h" #include "thread-utils.h" #include "trace2.h" From 7b940286527ec2175dffbb317f47e080bb37cf3e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:44 +0100 Subject: [PATCH 19/19] streaming: drop redundant type and size pointers In the preceding commits we have turned `struct odb_read_stream` into a publicly visible structure. Furthermore, this structure now contains the type and size of the object that we are about to stream. Consequently, the out-pointers that we used before to propagate the type and size of the streamed object are now somewhat redundant with the data contained in the structure itself. Drop these out-pointers and adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 4 +--- archive-zip.c | 5 ++--- builtin/index-pack.c | 7 ++----- builtin/pack-objects.c | 6 ++++-- object-file.c | 6 ++---- odb/streaming.c | 10 ++-------- odb/streaming.h | 7 ++----- 7 files changed, 15 insertions(+), 30 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 494b9f0667..0fc70d13a8 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -130,12 +130,10 @@ static void write_trailer(void) static int stream_blocked(struct repository *r, const struct object_id *oid) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; char buf[BLOCKSIZE]; ssize_t readlen; - st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { diff --git a/archive-zip.c b/archive-zip.c index a0bdc2fe3b..97ea8d60d6 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_DEFLATE; if (!buffer) { - enum object_type type; - stream = odb_read_stream_open(args->repo->objects, oid, - &type, &size, NULL); + stream = odb_read_stream_open(args->repo->objects, oid, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); + size = stream->size; flags |= ZIP_STREAM; out = NULL; } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 581023495f..b01cb77f4a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size, static int check_collison(struct object_entry *entry) { struct compare_data data; - enum object_type type; - unsigned long size; if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || entry->type != OBJ_BLOB) @@ -807,11 +805,10 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (data.st->size != entry->size || data.st->type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f109e26786..0d1d6995bf 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -521,9 +521,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL)) != NULL) + NULL)) != NULL) { buf = NULL; - else { + type = st->type; + size = st->size; + } else { buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, &size); diff --git a/object-file.c b/object-file.c index 9601fdb12d..12177a7dd7 100644 --- a/object-file.c +++ b/object-file.c @@ -132,19 +132,17 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int stream_object_signature(struct repository *r, const struct object_id *oid) { struct object_id real_oid; - unsigned long size; - enum object_type obj_type; struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return -1; /* Generate the header */ - hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); + hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); /* Sha1.. */ r->hash_algo->init_fn(&c); diff --git a/odb/streaming.c b/odb/streaming.c index 7ef58adaa2..745cd486fb 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -214,8 +214,6 @@ ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter) { struct odb_read_stream *st; @@ -235,8 +233,6 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, st = nst; } - *size = st->size; - *type = st->type; return st; } @@ -247,18 +243,16 @@ int odb_stream_blob_to_fd(struct object_database *odb, int can_seek) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; ssize_t kept = 0; int result = -1; - st = odb_read_stream_open(odb, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, filter); if (!st) { if (filter) free_stream_filter(filter); return result; } - if (type != OBJ_BLOB) + if (st->type != OBJ_BLOB) goto close_and_exit; for (;;) { char buf[1024 * 16]; diff --git a/odb/streaming.h b/odb/streaming.h index 7cb55213b7..c7861f7e13 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -25,16 +25,13 @@ struct odb_read_stream { }; /* - * Create a new object stream for the given object database. Populates the type - * and size pointers with the object's info. An optional filter can be used to - * transform the object's content. + * Create a new object stream for the given object database. An optional filter + * can be used to transform the object's content. * * Returns the stream on success, a `NULL` pointer otherwise. */ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter); /*