mirror of
https://github.com/git/git.git
synced 2025-12-12 20:36:24 +01:00
Infd888311fb(reftable/table: move reading block into block reader, 2025-04-07), we have refactored how reftable blocks are read so that most of the logic is contained in the "block.c" subsystem itself. Most importantly, the whole logic to read the data itself is now contained in that subsystem. This change caused a significant performance regression though when reading blocks that aren't of the specific type one is searching for: Benchmark 1: update-ref: create 100k refs (revision = fd888311fbc~) Time (mean ± σ): 2.171 s ± 0.028 s [User: 1.189 s, System: 0.977 s] Range (min … max): 2.117 s … 2.206 s 10 runs Benchmark 2: update-ref: create 100k refs (revision =fd888311fb) Time (mean ± σ): 3.418 s ± 0.030 s [User: 2.371 s, System: 1.037 s] Range (min … max): 3.377 s … 3.473 s 10 runs Summary update-ref: create 100k refs (revision = fd888311fbc~) ran 1.57 ± 0.02 times faster than update-ref: create 100k refs (revision =fd888311fb) The root caute of the performance regression is that we changed when exactly blocks of an uninteresting type are being discarded. Previous to the refactoring in the mentioned commit we'd load the block data, read its type, notice that it's not the wanted type and discard the block. After the commit though we don't discard the block immediately, but we fully decode it only to realize that it's not the desired type. We then discard the block again, but have already performed a bunch of pointless work. Fix the regression by making `reftable_block_init()` return early in case the block is not of the desired type. This fixes the performance hit: Benchmark 1: update-ref: create 100k refs (revision = HEAD~) Time (mean ± σ): 2.712 s ± 0.018 s [User: 1.990 s, System: 0.716 s] Range (min … max): 2.682 s … 2.741 s 10 runs Benchmark 2: update-ref: create 100k refs (revision = HEAD) Time (mean ± σ): 1.670 s ± 0.012 s [User: 0.991 s, System: 0.676 s] Range (min … max): 1.652 s … 1.693 s 10 runs Summary update-ref: create 100k refs (revision = HEAD) ran 1.62 ± 0.02 times faster than update-ref: create 100k refs (revision = HEAD~) Note that the baseline performance is lower than in the original due to a couple of unrelated performance improvements that have landed since the original commit. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
76 lines
1.9 KiB
C
76 lines
1.9 KiB
C
/*
|
|
* Copyright 2020 Google LLC
|
|
*
|
|
* Use of this source code is governed by a BSD-style
|
|
* license that can be found in the LICENSE file or at
|
|
* https://developers.google.com/open-source/licenses/bsd
|
|
*/
|
|
|
|
#ifndef REFTABLE_BLOCK_H
|
|
#define REFTABLE_BLOCK_H
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "reftable-basics.h"
|
|
#include "reftable-blocksource.h"
|
|
#include "reftable-iterator.h"
|
|
|
|
struct z_stream_s;
|
|
|
|
/*
|
|
* A block part of a reftable. Contains records as well as some metadata
|
|
* describing them.
|
|
*/
|
|
struct reftable_block {
|
|
/*
|
|
* Offset of the block header; nonzero for the first block in a
|
|
* reftable.
|
|
*/
|
|
uint32_t header_off;
|
|
|
|
/* The memory block. */
|
|
struct reftable_block_data block_data;
|
|
uint32_t hash_size;
|
|
|
|
/* Uncompressed data for log entries. */
|
|
struct z_stream_s *zstream;
|
|
unsigned char *uncompressed_data;
|
|
size_t uncompressed_cap;
|
|
|
|
/*
|
|
* Restart point data. Restart points are located after the block's
|
|
* record data.
|
|
*/
|
|
uint16_t restart_count;
|
|
uint32_t restart_off;
|
|
|
|
/*
|
|
* Size of the data in the file. For log blocks, this is the compressed
|
|
* size.
|
|
*/
|
|
uint32_t full_block_size;
|
|
uint8_t block_type;
|
|
};
|
|
|
|
/* Initialize a reftable block from the given block source. */
|
|
int reftable_block_init(struct reftable_block *b,
|
|
struct reftable_block_source *source,
|
|
uint32_t offset, uint32_t header_size,
|
|
uint32_t table_block_size, uint32_t hash_size,
|
|
uint8_t want_type);
|
|
|
|
/* Release resources allocated by the block. */
|
|
void reftable_block_release(struct reftable_block *b);
|
|
|
|
/* Initialize a generic record iterator from the given block. */
|
|
int reftable_block_init_iterator(const struct reftable_block *b,
|
|
struct reftable_iterator *it);
|
|
|
|
/* Returns the block type (eg. 'r' for refs). */
|
|
uint8_t reftable_block_type(const struct reftable_block *b);
|
|
|
|
/* Decodes the first key in the block. */
|
|
int reftable_block_first_key(const struct reftable_block *b, struct reftable_buf *key);
|
|
|
|
#endif /* REFTABLE_BLOCK_H */
|