streaming: move logic to read loose objects streams into backend

Move the logic to read loose object streams into the respective
subsystem. This allows us to make a couple of function declarations
private.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Patrick Steinhardt
2025-11-23 19:59:40 +01:00
committed by Junio C Hamano
parent ffc9a34485
commit bc30a2f5df
3 changed files with 164 additions and 178 deletions

View File

@@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
return map;
}
void *odb_source_loose_map_object(struct odb_source *source,
const struct object_id *oid,
unsigned long *size)
static void *odb_source_loose_map_object(struct odb_source *source,
const struct object_id *oid,
unsigned long *size)
{
const char *p;
int fd = open_loose_object(source->loose, oid, &p);
@@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source,
return map_fd(fd, p, size);
}
enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned char *map,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz)
enum unpack_loose_header_result {
ULHR_OK,
ULHR_BAD,
ULHR_TOO_LONG,
};
/**
* unpack_loose_header() initializes the data stream needed to unpack
* a loose object header.
*
* Returns:
*
* - ULHR_OK on success
* - ULHR_BAD on error
* - ULHR_TOO_LONG if the header was too long
*
* It will only parse up to MAX_HEADER_LEN bytes.
*/
static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned char *map,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz)
{
int status;
@@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream,
}
/*
* parse_loose_header() parses the starting "<type> <len>\0" of an
* object. If it doesn't follow that format -1 is returned. To check
* the validity of the <type> populate the "typep" in the "struct
* object_info". It will be OBJ_BAD if the object type is unknown. The
* parsed <len> can be retrieved via "oi->sizep", and from there
* passed to unpack_loose_rest().
*
* We used to just use "sscanf()", but that's actually way
* too permissive for what we want to check. So do an anal
* object header parse by hand.
*/
int parse_loose_header(const char *hdr, struct object_info *oi)
static int parse_loose_header(const char *hdr, struct object_info *oi)
{
const char *type_buf = hdr;
size_t size;
@@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose)
loose_object_map_clear(&loose->map);
free(loose);
}
struct odb_loose_read_stream {
struct odb_read_stream base;
git_zstream z;
enum {
ODB_LOOSE_READ_STREAM_INUSE,
ODB_LOOSE_READ_STREAM_DONE,
ODB_LOOSE_READ_STREAM_ERROR,
} z_state;
void *mapped;
unsigned long mapsize;
char hdr[32];
int hdr_avail;
int hdr_used;
};
static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
{
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
size_t total_read = 0;
switch (st->z_state) {
case ODB_LOOSE_READ_STREAM_DONE:
return 0;
case ODB_LOOSE_READ_STREAM_ERROR:
return -1;
default:
break;
}
if (st->hdr_used < st->hdr_avail) {
size_t to_copy = st->hdr_avail - st->hdr_used;
if (sz < to_copy)
to_copy = sz;
memcpy(buf, st->hdr + st->hdr_used, to_copy);
st->hdr_used += to_copy;
total_read += to_copy;
}
while (total_read < sz) {
int status;
st->z.next_out = (unsigned char *)buf + total_read;
st->z.avail_out = sz - total_read;
status = git_inflate(&st->z, Z_FINISH);
total_read = st->z.next_out - (unsigned char *)buf;
if (status == Z_STREAM_END) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_DONE;
break;
}
if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
return -1;
}
}
return total_read;
}
static int close_istream_loose(struct odb_read_stream *_st)
{
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
git_inflate_end(&st->z);
munmap(st->mapped, st->mapsize);
return 0;
}
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid)
{
struct object_info oi = OBJECT_INFO_INIT;
struct odb_loose_read_stream *st;
unsigned long mapsize;
void *mapped;
mapped = odb_source_loose_map_object(source, oid, &mapsize);
if (!mapped)
return -1;
/*
* Note: we must allocate this structure early even though we may still
* fail. This is because we need to initialize the zlib stream, and it
* is not possible to copy the stream around after the fact because it
* has self-referencing pointers.
*/
CALLOC_ARRAY(st, 1);
switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
sizeof(st->hdr))) {
case ULHR_OK:
break;
case ULHR_BAD:
case ULHR_TOO_LONG:
goto error;
}
oi.sizep = &st->base.size;
oi.typep = &st->base.type;
if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
goto error;
st->mapped = mapped;
st->mapsize = mapsize;
st->hdr_used = strlen(st->hdr) + 1;
st->hdr_avail = st->z.total_out;
st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
st->base.close = close_istream_loose;
st->base.read = read_istream_loose;
*out = &st->base;
return 0;
error:
git_inflate_end(&st->z);
munmap(st->mapped, st->mapsize);
free(st);
return -1;
}

View File

@@ -16,6 +16,8 @@ enum {
int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
struct object_info;
struct odb_read_stream;
struct odb_source;
struct odb_source_loose {
@@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi, int flags);
void *odb_source_loose_map_object(struct odb_source *source,
const struct object_id *oid,
unsigned long *size);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid);
/*
* Return true iff an object database source has a loose object
@@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb,
int format_object_header(char *str, size_t size, enum object_type type,
size_t objsize);
/**
* unpack_loose_header() initializes the data stream needed to unpack
* a loose object header.
*
* Returns:
*
* - ULHR_OK on success
* - ULHR_BAD on error
* - ULHR_TOO_LONG if the header was too long
*
* It will only parse up to MAX_HEADER_LEN bytes.
*/
enum unpack_loose_header_result {
ULHR_OK,
ULHR_BAD,
ULHR_TOO_LONG,
};
enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned char *map,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz);
/**
* parse_loose_header() parses the starting "<type> <len>\0" of an
* object. If it doesn't follow that format -1 is returned. To check
* the validity of the <type> populate the "typep" in the "struct
* object_info". It will be OBJ_BAD if the object type is unknown. The
* parsed <len> can be retrieved via "oi->sizep", and from there
* passed to unpack_loose_rest().
*/
struct object_info;
int parse_loose_header(const char *hdr, struct object_info *oi);
int force_object_loose(struct odb_source *source,
const struct object_id *oid, time_t mtime);

View File

@@ -114,137 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
return &fs->base;
}
/*****************************************************************
*
* Loose object stream
*
*****************************************************************/
struct odb_loose_read_stream {
struct odb_read_stream base;
git_zstream z;
enum {
ODB_LOOSE_READ_STREAM_INUSE,
ODB_LOOSE_READ_STREAM_DONE,
ODB_LOOSE_READ_STREAM_ERROR,
} z_state;
void *mapped;
unsigned long mapsize;
char hdr[32];
int hdr_avail;
int hdr_used;
};
static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
{
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
size_t total_read = 0;
switch (st->z_state) {
case ODB_LOOSE_READ_STREAM_DONE:
return 0;
case ODB_LOOSE_READ_STREAM_ERROR:
return -1;
default:
break;
}
if (st->hdr_used < st->hdr_avail) {
size_t to_copy = st->hdr_avail - st->hdr_used;
if (sz < to_copy)
to_copy = sz;
memcpy(buf, st->hdr + st->hdr_used, to_copy);
st->hdr_used += to_copy;
total_read += to_copy;
}
while (total_read < sz) {
int status;
st->z.next_out = (unsigned char *)buf + total_read;
st->z.avail_out = sz - total_read;
status = git_inflate(&st->z, Z_FINISH);
total_read = st->z.next_out - (unsigned char *)buf;
if (status == Z_STREAM_END) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_DONE;
break;
}
if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
return -1;
}
}
return total_read;
}
static int close_istream_loose(struct odb_read_stream *_st)
{
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
git_inflate_end(&st->z);
munmap(st->mapped, st->mapsize);
return 0;
}
static int open_istream_loose(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid)
{
struct object_info oi = OBJECT_INFO_INIT;
struct odb_loose_read_stream *st;
unsigned long mapsize;
void *mapped;
mapped = odb_source_loose_map_object(source, oid, &mapsize);
if (!mapped)
return -1;
/*
* Note: we must allocate this structure early even though we may still
* fail. This is because we need to initialize the zlib stream, and it
* is not possible to copy the stream around after the fact because it
* has self-referencing pointers.
*/
CALLOC_ARRAY(st, 1);
switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
sizeof(st->hdr))) {
case ULHR_OK:
break;
case ULHR_BAD:
case ULHR_TOO_LONG:
goto error;
}
oi.sizep = &st->base.size;
oi.typep = &st->base.type;
if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
goto error;
st->mapped = mapped;
st->mapsize = mapsize;
st->hdr_used = strlen(st->hdr) + 1;
st->hdr_avail = st->z.total_out;
st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
st->base.close = close_istream_loose;
st->base.read = read_istream_loose;
*out = &st->base;
return 0;
error:
git_inflate_end(&st->z);
munmap(st->mapped, st->mapsize);
free(st);
return -1;
}
/*****************************************************************
*
* Non-delta packed object stream
@@ -455,7 +324,7 @@ static int istream_source(struct odb_read_stream **out,
odb_prepare_alternates(r->objects);
for (source = r->objects->sources; source; source = source->next)
if (!open_istream_loose(out, source, oid))
if (!odb_source_loose_read_object_stream(out, source, oid))
return 0;
return open_istream_incore(out, r, oid);