Merge bitcoin/bitcoin#33657: rest: allow reading partial block data from storage

07135290c1 rest: allow reading partial block data from storage (Roman Zeyde)
4e2af1c065 blockstorage: allow reading partial block data from storage (Roman Zeyde)
f2fd1aa21c blockstorage: return an error code from `ReadRawBlock()` (Roman Zeyde)

Pull request description:

  It allows fetching specific transactions using an external index, following https://github.com/bitcoin/bitcoin/pull/32541#issuecomment-3267485313.

  Currently, electrs and other indexers map between an address/scripthash to the list of the relevant transactions.

  However, in order to fetch those transactions from bitcoind, electrs relies on reading the whole block and post-filtering for a specific transaction[^1]. Other indexers use a `txindex` to fetch a transaction using its txid [^2][^3][^4].

  The above approach has significant storage and CPU overhead, since the `txid` is a pseudo-random 32-byte value. Also, mainnet `txindex` takes ~60GB today.

  This PR is adding support for using the transaction's position within its block to be able to fetch it directly using [REST API](https://github.com/bitcoin/bitcoin/blob/master/doc/REST-interface.md), using the following HTTP request:

  ```
  GET /rest/blockpart/BLOCKHASH.bin?offset=OFFSET&size=SIZE
  ```

  - The offsets' index can be encoded much more efficiently ([~1.3GB today](https://github.com/romanz/bindex-rs/pull/66#issuecomment-3508476436)).

  - Address history query performance can be tested on mainnet using [1BitcoinEaterAddressDontSendf59kuE](https://mempool.space/address/1BitcoinEaterAddressDontSendf59kuE) - assuming warm OS block cache, [it takes <1s to fetch 5200 txs, i.e. <0.2ms per tx](https://github.com/romanz/bindex-rs/pull/66#issuecomment-3508476436) with [bindex](https://github.com/romanz/bindex-rs).

  - Only binary and hex response formats are supported.

  [^1]: https://github.com/romanz/electrs/blob/master/doc/schema.md
  [^2]: https://github.com/Blockstream/electrs/blob/new-index/doc/schema.md#txstore
  [^3]: https://github.com/spesmilo/electrumx/blob/master/docs/HOWTO.rst#prerequisites
  [^4]: https://github.com/cculianu/Fulcrum/blob/master/README.md#requirements

ACKs for top commit:
  maflcko:
    review ACK 07135290c1 🏪
  l0rinc:
    ACK 07135290c1
  hodlinator:
    re-ACK 07135290c1

Tree-SHA512: bcce7bf4b9a3e5e920ab5a83e656f50d5d7840cdde6b7147d329cf578f8a2db555fc1aa5334e8ee64d5630d25839ece77a2cf421c6c3ac1fa379bb453163bd4f
This commit is contained in:
merge-script
2025-12-12 13:22:00 +00:00
11 changed files with 216 additions and 48 deletions

View File

@@ -47,6 +47,11 @@ The HTTP request and response are both handled entirely in-memory.
With the /notxdetails/ option JSON response will only contain the transaction hash instead of the complete transaction details. The option only affects the JSON response.
- `GET /rest/blockpart/<BLOCK-HASH>.<bin|hex>?offset=<OFFSET>&size=<SIZE>`
Given a block hash: returns a block part, in binary or hex-encoded binary formats.
Responds with 404 if the block or the byte range doesn't exist.
#### Blockheaders
`GET /rest/headers/<BLOCK-HASH>.<bin|hex|json>?count=<COUNT=5>`

View File

@@ -0,0 +1,5 @@
New REST API
------------
- A new REST API endpoint (`/rest/blockpart/BLOCKHASH.bin?offset=X&size=Y`) has been introduced
for efficiently fetching a range of bytes from block `BLOCKHASH`.

View File

@@ -57,11 +57,9 @@ static void ReadRawBlockBench(benchmark::Bench& bench)
const auto testing_setup{MakeNoLogFileContext<const TestingSetup>(ChainType::MAIN)};
auto& blockman{testing_setup->m_node.chainman->m_blockman};
const auto pos{blockman.WriteBlock(CreateTestBlock(), 413'567)};
std::vector<std::byte> block_data;
blockman.ReadRawBlock(block_data, pos); // warmup
bench.run([&] {
const auto success{blockman.ReadRawBlock(block_data, pos)};
assert(success);
const auto res{blockman.ReadRawBlock(pos)};
assert(res);
});
}

View File

@@ -1759,7 +1759,11 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
g_zmq_notification_interface = CZMQNotificationInterface::Create(
[&chainman = node.chainman](std::vector<std::byte>& block, const CBlockIndex& index) {
assert(chainman);
return chainman->m_blockman.ReadRawBlock(block, WITH_LOCK(cs_main, return index.GetBlockPos()));
if (auto ret{chainman->m_blockman.ReadRawBlock(WITH_LOCK(cs_main, return index.GetBlockPos()))}) {
block = std::move(*ret);
return true;
}
return false;
});
if (g_zmq_notification_interface) {

View File

@@ -2276,8 +2276,9 @@ void PeerManagerImpl::ProcessGetBlockData(CNode& pfrom, Peer& peer, const CInv&
} else if (inv.IsMsgWitnessBlk()) {
// Fast-path: in this case it is possible to serve the block directly from disk,
// as the network format matches the format on disk
std::vector<std::byte> block_data;
if (!m_chainman.m_blockman.ReadRawBlock(block_data, block_pos)) {
if (const auto block_data{m_chainman.m_blockman.ReadRawBlock(block_pos)}) {
MakeAndPushMessage(pfrom, NetMsgType::BLOCK, std::span{*block_data});
} else {
if (WITH_LOCK(m_chainman.GetMutex(), return m_chainman.m_blockman.IsBlockPruned(*pindex))) {
LogDebug(BCLog::NET, "Block was pruned before it could be read, %s\n", pfrom.DisconnectMsg(fLogIPs));
} else {
@@ -2286,7 +2287,6 @@ void PeerManagerImpl::ProcessGetBlockData(CNode& pfrom, Peer& peer, const CInv&
pfrom.fDisconnect = true;
return;
}
MakeAndPushMessage(pfrom, NetMsgType::BLOCK, std::span{block_data});
// Don't set pblock as we've sent the block
} else {
// Send block from disk

View File

@@ -1006,14 +1006,14 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos, const std::o
block.SetNull();
// Open history file to read
std::vector<std::byte> block_data;
if (!ReadRawBlock(block_data, pos)) {
const auto block_data{ReadRawBlock(pos)};
if (!block_data) {
return false;
}
try {
// Read block
SpanReader{block_data} >> TX_WITH_WITNESS(block);
SpanReader{*block_data} >> TX_WITH_WITNESS(block);
} catch (const std::exception& e) {
LogError("Deserialize or I/O error - %s at %s while reading block", e.what(), pos.ToString());
return false;
@@ -1048,19 +1048,19 @@ bool BlockManager::ReadBlock(CBlock& block, const CBlockIndex& index) const
return ReadBlock(block, block_pos, index.GetBlockHash());
}
bool BlockManager::ReadRawBlock(std::vector<std::byte>& block, const FlatFilePos& pos) const
BlockManager::ReadRawBlockResult BlockManager::ReadRawBlock(const FlatFilePos& pos, std::optional<std::pair<size_t, size_t>> block_part) const
{
if (pos.nPos < STORAGE_HEADER_BYTES) {
// If nPos is less than STORAGE_HEADER_BYTES, we can't read the header that precedes the block data
// This would cause an unsigned integer underflow when trying to position the file cursor
// This can happen after pruning or default constructed positions
LogError("Failed for %s while reading raw block storage header", pos.ToString());
return false;
return util::Unexpected{ReadRawError::IO};
}
AutoFile filein{OpenBlockFile({pos.nFile, pos.nPos - STORAGE_HEADER_BYTES}, /*fReadOnly=*/true)};
if (filein.IsNull()) {
LogError("OpenBlockFile failed for %s while reading raw block", pos.ToString());
return false;
return util::Unexpected{ReadRawError::IO};
}
try {
@@ -1072,23 +1072,31 @@ bool BlockManager::ReadRawBlock(std::vector<std::byte>& block, const FlatFilePos
if (blk_start != GetParams().MessageStart()) {
LogError("Block magic mismatch for %s: %s versus expected %s while reading raw block",
pos.ToString(), HexStr(blk_start), HexStr(GetParams().MessageStart()));
return false;
return util::Unexpected{ReadRawError::IO};
}
if (blk_size > MAX_SIZE) {
LogError("Block data is larger than maximum deserialization size for %s: %s versus %s while reading raw block",
pos.ToString(), blk_size, MAX_SIZE);
return false;
return util::Unexpected{ReadRawError::IO};
}
block.resize(blk_size); // Zeroing of memory is intentional here
filein.read(block);
if (block_part) {
const auto [offset, size]{*block_part};
if (size == 0 || offset >= blk_size || size > blk_size - offset) {
return util::Unexpected{ReadRawError::BadPartRange}; // Avoid logging - offset/size come from untrusted REST input
}
filein.seek(offset, SEEK_CUR);
blk_size = size;
}
std::vector<std::byte> data(blk_size); // Zeroing of memory is intentional here
filein.read(data);
return data;
} catch (const std::exception& e) {
LogError("Read from block file failed: %s for %s while reading raw block", e.what(), pos.ToString());
return false;
return util::Unexpected{ReadRawError::IO};
}
return true;
}
FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight)

View File

@@ -17,6 +17,7 @@
#include <streams.h>
#include <sync.h>
#include <uint256.h>
#include <util/expected.h>
#include <util/fs.h>
#include <util/hasher.h>
@@ -169,6 +170,10 @@ struct BlockfileCursor {
std::ostream& operator<<(std::ostream& os, const BlockfileCursor& cursor);
enum class ReadRawError {
IO,
BadPartRange,
};
/**
* Maintains a tree of blocks (stored in `m_block_index`) which is consulted
@@ -302,6 +307,7 @@ private:
public:
using Options = kernel::BlockManagerOpts;
using ReadRawBlockResult = util::Expected<std::vector<std::byte>, ReadRawError>;
explicit BlockManager(const util::SignalInterrupt& interrupt, Options opts);
@@ -455,7 +461,7 @@ public:
/** Functions for disk access for blocks */
bool ReadBlock(CBlock& block, const FlatFilePos& pos, const std::optional<uint256>& expected_hash) const;
bool ReadBlock(CBlock& block, const CBlockIndex& index) const;
bool ReadRawBlock(std::vector<std::byte>& block, const FlatFilePos& pos) const;
ReadRawBlockResult ReadRawBlock(const FlatFilePos& pos, std::optional<std::pair<size_t, size_t>> block_part = std::nullopt) const;
bool ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index) const;

View File

@@ -379,10 +379,17 @@ static bool rest_spent_txouts(const std::any& context, HTTPRequest* req, const s
}
}
/**
* This handler is used by multiple HTTP endpoints:
* - `/block/` via `rest_block_extended()`
* - `/block/notxdetails/` via `rest_block_notxdetails()`
* - `/blockpart/` via `rest_block_part()` (doesn't support JSON response, so `tx_verbosity` is unset)
*/
static bool rest_block(const std::any& context,
HTTPRequest* req,
const std::string& uri_part,
TxVerbosity tx_verbosity)
std::optional<TxVerbosity> tx_verbosity,
std::optional<std::pair<size_t, size_t>> block_part = std::nullopt)
{
if (!CheckWarmup(req))
return false;
@@ -416,34 +423,43 @@ static bool rest_block(const std::any& context,
pos = pblockindex->GetBlockPos();
}
std::vector<std::byte> block_data{};
if (!chainman.m_blockman.ReadRawBlock(block_data, pos)) {
return RESTERR(req, HTTP_NOT_FOUND, hashStr + " not found");
const auto block_data{chainman.m_blockman.ReadRawBlock(pos, block_part)};
if (!block_data) {
switch (block_data.error()) {
case node::ReadRawError::IO: return RESTERR(req, HTTP_INTERNAL_SERVER_ERROR, "I/O error reading " + hashStr);
case node::ReadRawError::BadPartRange:
assert(block_part);
return RESTERR(req, HTTP_BAD_REQUEST, strprintf("Bad block part offset/size %d/%d for %s", block_part->first, block_part->second, hashStr));
} // no default case, so the compiler can warn about missing cases
assert(false);
}
switch (rf) {
case RESTResponseFormat::BINARY: {
req->WriteHeader("Content-Type", "application/octet-stream");
req->WriteReply(HTTP_OK, block_data);
req->WriteReply(HTTP_OK, *block_data);
return true;
}
case RESTResponseFormat::HEX: {
const std::string strHex{HexStr(block_data) + "\n"};
const std::string strHex{HexStr(*block_data) + "\n"};
req->WriteHeader("Content-Type", "text/plain");
req->WriteReply(HTTP_OK, strHex);
return true;
}
case RESTResponseFormat::JSON: {
CBlock block{};
DataStream block_stream{block_data};
block_stream >> TX_WITH_WITNESS(block);
UniValue objBlock = blockToJSON(chainman.m_blockman, block, *tip, *pblockindex, tx_verbosity, chainman.GetConsensus().powLimit);
std::string strJSON = objBlock.write() + "\n";
req->WriteHeader("Content-Type", "application/json");
req->WriteReply(HTTP_OK, strJSON);
return true;
if (tx_verbosity) {
CBlock block{};
DataStream block_stream{*block_data};
block_stream >> TX_WITH_WITNESS(block);
UniValue objBlock = blockToJSON(chainman.m_blockman, block, *tip, *pblockindex, *tx_verbosity, chainman.GetConsensus().powLimit);
std::string strJSON = objBlock.write() + "\n";
req->WriteHeader("Content-Type", "application/json");
req->WriteReply(HTTP_OK, strJSON);
return true;
}
return RESTERR(req, HTTP_BAD_REQUEST, "JSON output is not supported for this request type");
}
default: {
@@ -462,6 +478,25 @@ static bool rest_block_notxdetails(const std::any& context, HTTPRequest* req, co
return rest_block(context, req, uri_part, TxVerbosity::SHOW_TXID);
}
static bool rest_block_part(const std::any& context, HTTPRequest* req, const std::string& uri_part)
{
try {
if (const auto opt_offset{ToIntegral<size_t>(req->GetQueryParameter("offset").value_or(""))}) {
if (const auto opt_size{ToIntegral<size_t>(req->GetQueryParameter("size").value_or(""))}) {
return rest_block(context, req, uri_part,
/*tx_verbosity=*/std::nullopt,
/*block_part=*/{{*opt_offset, *opt_size}});
} else {
return RESTERR(req, HTTP_BAD_REQUEST, "Block part size missing or invalid");
}
} else {
return RESTERR(req, HTTP_BAD_REQUEST, "Block part offset missing or invalid");
}
} catch (const std::runtime_error& e) {
return RESTERR(req, HTTP_BAD_REQUEST, e.what());
}
}
static bool rest_filter_header(const std::any& context, HTTPRequest* req, const std::string& uri_part)
{
if (!CheckWarmup(req)) return false;
@@ -1110,6 +1145,7 @@ static const struct {
{"/rest/tx/", rest_tx},
{"/rest/block/notxdetails/", rest_block_notxdetails},
{"/rest/block/", rest_block_extended},
{"/rest/blockpart/", rest_block_part},
{"/rest/blockfilter/", rest_block_filter},
{"/rest/blockfilterheaders/", rest_filter_header},
{"/rest/chaininfo", rest_chaininfo},

View File

@@ -680,7 +680,6 @@ static CBlock GetBlockChecked(BlockManager& blockman, const CBlockIndex& blockin
static std::vector<std::byte> GetRawBlockChecked(BlockManager& blockman, const CBlockIndex& blockindex)
{
std::vector<std::byte> data{};
FlatFilePos pos{};
{
LOCK(cs_main);
@@ -688,13 +687,10 @@ static std::vector<std::byte> GetRawBlockChecked(BlockManager& blockman, const C
pos = blockindex.GetBlockPos();
}
if (!blockman.ReadRawBlock(data, pos)) {
// Block not found on disk. This shouldn't normally happen unless the block was
// pruned right after we released the lock above.
throw JSONRPCError(RPC_MISC_ERROR, "Block not found on disk");
}
return data;
if (auto data{blockman.ReadRawBlock(pos)}) return std::move(*data);
// Block not found on disk. This shouldn't normally happen unless the block was
// pruned right after we released the lock above.
throw JSONRPCError(RPC_MISC_ERROR, "Block not found on disk");
}
static CBlockUndo GetUndoChecked(BlockManager& blockman, const CBlockIndex& blockindex)

View File

@@ -138,6 +138,68 @@ BOOST_FIXTURE_TEST_CASE(blockmanager_block_data_availability, TestChain100Setup)
BOOST_CHECK(!blockman.CheckBlockDataAvailability(tip, *last_pruned_block));
}
BOOST_FIXTURE_TEST_CASE(blockmanager_block_data_part, TestChain100Setup)
{
LOCK(::cs_main);
auto& chainman{m_node.chainman};
auto& blockman{chainman->m_blockman};
const CBlockIndex& tip{*chainman->ActiveTip()};
const FlatFilePos tip_block_pos{tip.GetBlockPos()};
auto block{blockman.ReadRawBlock(tip_block_pos)};
BOOST_REQUIRE(block);
BOOST_REQUIRE_GE(block->size(), 200);
const auto expect_part{[&](size_t offset, size_t size) {
auto res{blockman.ReadRawBlock(tip_block_pos, std::pair{offset, size})};
BOOST_CHECK(res);
const auto& part{res.value()};
BOOST_CHECK_EQUAL_COLLECTIONS(part.begin(), part.end(), block->begin() + offset, block->begin() + offset + size);
}};
expect_part(0, 20);
expect_part(0, block->size() - 1);
expect_part(0, block->size() - 10);
expect_part(0, block->size());
expect_part(1, block->size() - 1);
expect_part(10, 20);
expect_part(block->size() - 1, 1);
}
BOOST_FIXTURE_TEST_CASE(blockmanager_block_data_part_error, TestChain100Setup)
{
LOCK(::cs_main);
auto& chainman{m_node.chainman};
auto& blockman{chainman->m_blockman};
const CBlockIndex& tip{*chainman->ActiveTip()};
const FlatFilePos tip_block_pos{tip.GetBlockPos()};
auto block{blockman.ReadRawBlock(tip_block_pos)};
BOOST_REQUIRE(block);
BOOST_REQUIRE_GE(block->size(), 200);
const auto expect_part_error{[&](size_t offset, size_t size) {
auto res{blockman.ReadRawBlock(tip_block_pos, std::pair{offset, size})};
BOOST_CHECK(!res);
BOOST_CHECK_EQUAL(res.error(), node::ReadRawError::BadPartRange);
}};
expect_part_error(0, 0);
expect_part_error(0, block->size() + 1);
expect_part_error(0, std::numeric_limits<size_t>::max());
expect_part_error(1, block->size());
expect_part_error(2, block->size() - 1);
expect_part_error(block->size() - 1, 2);
expect_part_error(block->size() - 2, 3);
expect_part_error(block->size() + 1, 0);
expect_part_error(block->size() + 1, 1);
expect_part_error(block->size() + 2, 2);
expect_part_error(block->size(), 0);
expect_part_error(block->size(), 1);
expect_part_error(std::numeric_limits<size_t>::max(), 1);
expect_part_error(std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max());
}
BOOST_FIXTURE_TEST_CASE(blockmanager_readblock_hash_mismatch, TestingSetup)
{
CBlockIndex index;

View File

@@ -28,7 +28,6 @@ from test_framework.wallet import (
MiniWallet,
getnewdestination,
)
from typing import Optional
INVALID_PARAM = "abc"
@@ -66,13 +65,16 @@ class RESTTest (BitcoinTestFramework):
body: str = '',
status: int = 200,
ret_type: RetType = RetType.JSON,
query_params: Optional[dict[str, typing.Any]] = None,
query_params: typing.Union[dict[str, typing.Any], str, None] = None,
) -> typing.Union[http.client.HTTPResponse, bytes, str, None]:
rest_uri = '/rest' + uri
if req_type in ReqType:
rest_uri += f'.{req_type.name.lower()}'
if query_params:
rest_uri += f'?{urllib.parse.urlencode(query_params)}'
if isinstance(query_params, str):
rest_uri += f'?{query_params}'
else:
rest_uri += f'?{urllib.parse.urlencode(query_params)}'
conn = http.client.HTTPConnection(self.url.hostname, self.url.port)
self.log.debug(f'{http_method} {rest_uri} {body}')
@@ -82,7 +84,7 @@ class RESTTest (BitcoinTestFramework):
conn.request('POST', rest_uri, body)
resp = conn.getresponse()
assert_equal(resp.status, status)
assert resp.status == status, f"Expected: {status}, Got: {resp.status} - Response: {str(resp.read())}"
if ret_type == RetType.OBJ:
return resp
@@ -455,6 +457,52 @@ class RESTTest (BitcoinTestFramework):
expected = [(p["scriptPubKey"], p["value"]) for p in prevouts]
assert_equal(expected, actual)
self.log.info("Test the /blockpart URI")
blockhash = self.nodes[0].getbestblockhash()
block_bin = self.test_rest_request(f"/block/{blockhash}", req_type=ReqType.BIN, ret_type=RetType.BYTES)
for req_type in (ReqType.BIN, ReqType.HEX):
def get_block_part(status: int = 200, **kwargs):
resp = self.test_rest_request(f"/blockpart/{blockhash}", status=status,
req_type=req_type, ret_type=RetType.BYTES, **kwargs)
assert isinstance(resp, bytes)
if req_type is ReqType.HEX and status == 200:
resp = bytes.fromhex(resp.decode().strip())
return resp
assert_equal(block_bin, get_block_part(query_params={"offset": 0, "size": len(block_bin)}))
assert len(block_bin) >= 500
assert_equal(block_bin[20:320], get_block_part(query_params={"offset": 20, "size": 300}))
assert_equal(block_bin[-5:], get_block_part(query_params={"offset": len(block_bin) - 5, "size": 5}))
get_block_part(status=400, query_params={"offset": 10})
get_block_part(status=400, query_params={"size": 100})
get_block_part(status=400, query_params={"offset": "x"})
get_block_part(status=400, query_params={"size": "y"})
get_block_part(status=400, query_params={"offset": "x", "size": "y"})
assert get_block_part(status=400, query_params="%XY").decode("utf-8").startswith("URI parsing failed")
get_block_part(status=400, query_params={"offset": 0, "size": 0})
get_block_part(status=400, query_params={"offset": len(block_bin), "size": 0})
get_block_part(status=400, query_params={"offset": len(block_bin) + 1, "size": 1})
get_block_part(status=400, query_params={"offset": len(block_bin), "size": 1})
get_block_part(status=400, query_params={"offset": len(block_bin) + 1, "size": 1})
get_block_part(status=400, query_params={"offset": 0, "size": len(block_bin) + 1})
self.test_rest_request(f"/blockpart/{blockhash}", status=400, req_type=ReqType.JSON, ret_type=RetType.OBJ)
self.log.info("Missing block data should cause REST API to fail")
self.test_rest_request(f"/block/{blockhash}", status=200, req_type=ReqType.BIN, ret_type=RetType.OBJ)
self.test_rest_request(f"/blockpart/{blockhash}", query_params={"offset": 0, "size": 1}, status=200, req_type=ReqType.BIN, ret_type=RetType.OBJ)
blk_files = list(self.nodes[0].blocks_path.glob("blk*.dat"))
for blk_file in blk_files:
blk_file.rename(blk_file.with_suffix('.bkp'))
self.test_rest_request(f"/block/{blockhash}", status=500, req_type=ReqType.BIN, ret_type=RetType.OBJ)
self.test_rest_request(f"/blockpart/{blockhash}", query_params={"offset": 0, "size": 1}, status=500, req_type=ReqType.BIN, ret_type=RetType.OBJ)
for blk_file in blk_files:
blk_file.with_suffix('.bkp').rename(blk_file)
self.log.info("Test the /deploymentinfo URI")