[byteTree] Write fixed sized data using an optimized form

If we know the size of a type at compile time (like we do for all the
integer types), it is cheaper to assign the data buffer directly instead
of using a memcpy.
This commit is contained in:
Alex Hoppen
2018-08-24 11:10:00 -07:00
parent f97d13d984
commit 33d86d5cb6
5 changed files with 123 additions and 38 deletions

View File

@@ -21,6 +21,7 @@
#include "llvm/Support/BinaryStreamError.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "swift/Basic/ExponentialGrowthAppendingBinaryByteStream.h"
#include <map>
namespace {
@@ -35,7 +36,7 @@ class ByteTreeWriter;
using UserInfoMap = std::map<void *, void *>;
/// Add a template specialization of \c ObjectTraits for any that type
/// Add a template specialization of \c ObjectTraits for any type that
/// serializes as an object consisting of multiple fields.
template <class T>
struct ObjectTraits {
@@ -55,7 +56,7 @@ struct ObjectTraits {
// UserInfoMap &UserInfo);
};
/// Add a template specialization of \c ScalarTraits for any that type
/// Add a template specialization of \c ScalarTraits for any type that
/// serializes into a raw set of bytes.
template <class T>
struct ScalarTraits {
@@ -70,7 +71,17 @@ struct ScalarTraits {
// static llvm::Error write(llvm::BinaryStreamWriter &Writer, const T &Value);
};
/// Add a template specialization of \c WrapperTypeTraits for any that type
/// Add a template specialization of \c DirectlyEncodable for any type whose
/// serialized form is equal to its binary representation on the serializing
/// machine.
template <class T>
struct DirectlyEncodable {
// Must provide:
// static bool const value = true;
};
/// Add a template specialization of \c WrapperTypeTraits for any type that
/// serializes as a type that already has a specialization of \c ScalarTypes.
/// This will typically be useful for types like enums that have a 1-to-1
/// mapping to e.g. an integer.
@@ -143,6 +154,12 @@ private:
/// The writer to which the binary data is written.
llvm::BinaryStreamWriter &StreamWriter;
/// The underlying stream of the StreamWriter. We need this reference so that
/// we can call \c ExponentialGrowthAppendingBinaryByteStream.writeRaw
/// which is more efficient than the generic \c writeBytes of
/// \c llvm::BinaryStreamWriter since it avoids the arbitrary size memcopy.
ExponentialGrowthAppendingBinaryByteStream &Stream;
/// The number of fields this object contains. \c UINT_MAX if it has not been
/// set yet. No member may be written to the object if expected number of
/// fields has not been set yet.
@@ -157,8 +174,21 @@ private:
/// The \c ByteTreeWriter can only be constructed internally. Use
/// \c ByteTreeWriter.write to serialize a new object.
ByteTreeWriter(llvm::BinaryStreamWriter &StreamWriter, UserInfoMap &UserInfo)
: StreamWriter(StreamWriter), UserInfo(UserInfo) {}
/// \p Stream must be the underlying stream of \p SteamWriter.
ByteTreeWriter(ExponentialGrowthAppendingBinaryByteStream &Stream,
llvm::BinaryStreamWriter &StreamWriter, UserInfoMap &UserInfo)
: StreamWriter(StreamWriter), Stream(Stream), UserInfo(UserInfo) {}
/// Write the given value to the ByteTree in the same form in which it is
/// represented on the serializing machine.
template <typename T>
llvm::Error writeRaw(T Value) {
// FIXME: We implicitly inherit the endianess of the serializing machine.
// Since we're currently only supporting macOS that's not a problem for now.
auto Error = Stream.writeRaw(StreamWriter.getOffset(), Value);
StreamWriter.setOffset(StreamWriter.getOffset() + sizeof(T));
return Error;
}
/// Set the expected number of fields the object written by this writer is
/// expected to have.
@@ -175,7 +205,7 @@ private:
// Set the most significant bit to indicate that the next construct is an
// object and not a scalar.
uint32_t ToWrite = NumFields | (1 << 31);
auto Error = StreamWriter.writeInteger(ToWrite);
auto Error = writeRaw(ToWrite);
(void)Error;
assert(!Error);
@@ -205,11 +235,13 @@ public:
/// the stream by the specified ProtocolVersion.
template <typename T>
typename std::enable_if<has_ObjectTraits<T>::value, void>::type
static write(uint32_t ProtocolVersion, llvm::BinaryStreamWriter &StreamWriter,
const T &Object, UserInfoMap &UserInfo) {
ByteTreeWriter Writer(StreamWriter, UserInfo);
static write(ExponentialGrowthAppendingBinaryByteStream &Stream,
uint32_t ProtocolVersion, const T &Object,
UserInfoMap &UserInfo) {
llvm::BinaryStreamWriter StreamWriter(Stream);
ByteTreeWriter Writer(Stream, StreamWriter, UserInfo);
auto Error = Writer.StreamWriter.writeInteger(ProtocolVersion);
auto Error = Writer.writeRaw(ProtocolVersion);
(void)Error;
assert(!Error);
@@ -224,7 +256,7 @@ public:
write(const T &Object, unsigned Index) {
validateAndIncreaseFieldIndex(Index);
auto ObjectWriter = ByteTreeWriter(StreamWriter, UserInfo);
auto ObjectWriter = ByteTreeWriter(Stream, StreamWriter, UserInfo);
ObjectWriter.setNumFields(ObjectTraits<T>::numFields(Object, UserInfo));
ObjectTraits<T>::write(ObjectWriter, Object, UserInfo);
@@ -240,7 +272,7 @@ public:
// bitflag that indicates if the next construct in the tree is an object
// or a scalar.
assert((ValueSize & ((uint32_t)1 << 31)) == 0 && "Value size too large");
auto SizeError = StreamWriter.writeInteger(ValueSize);
auto SizeError = writeRaw(ValueSize);
(void)SizeError;
assert(!SizeError);
@@ -254,6 +286,21 @@ public:
"ScalarTraits<T>::size");
}
template <typename T>
typename std::enable_if<DirectlyEncodable<T>::value, void>::type
write(const T &Value, unsigned Index) {
validateAndIncreaseFieldIndex(Index);
uint32_t ValueSize = sizeof(T);
auto SizeError = writeRaw(ValueSize);
(void)SizeError;
assert(!SizeError);
auto ContentError = writeRaw(Value);
(void)ContentError;
assert(!ContentError);
}
template <typename T>
typename std::enable_if<has_WrapperTypeTraits<T>::value, void>::type
write(const T &Value, unsigned Index) {
@@ -268,30 +315,18 @@ public:
// Define serialization schemes for common types
template <>
struct ScalarTraits<uint8_t> {
static unsigned size(const uint8_t &Value) { return 1; }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const uint8_t &Value) {
return Writer.writeInteger(Value);
}
struct DirectlyEncodable<uint8_t> {
static bool const value = true;
};
template <>
struct ScalarTraits<uint16_t> {
static unsigned size(const uint16_t &Value) { return 2; }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const uint16_t &Value) {
return Writer.writeInteger(Value);
}
struct DirectlyEncodable<uint16_t> {
static bool const value = true;
};
template <>
struct ScalarTraits<uint32_t> {
static unsigned size(const uint32_t &Value) { return 4; }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const uint32_t &Value) {
return Writer.writeInteger(Value);
}
struct DirectlyEncodable<uint32_t> {
static bool const value = true;
};
template <>

View File

@@ -57,6 +57,31 @@ public:
llvm::Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override;
/// This is an optimized version of \c writeBytes that assumes we know the
/// size of \p Value at compile time (which in particular holds for integers).
/// It does so by avoiding the memcopy that \c writeBytes requires to copy
/// the arbitrarily sized Buffer to the output buffer and using a direct
/// memory assignment instead.
/// This assumes that the enianess of this steam is the same as the native
/// endianess on the executing machine. No endianess transformations are
/// performed.
template<typename T>
llvm::Error writeRaw(uint32_t Offset, T Value) {
if (auto Error = checkOffsetForWrite(Offset, sizeof(T))) {
return Error;
}
// Resize the internal buffer if needed.
uint32_t RequiredSize = Offset + sizeof(T);
if (RequiredSize > Data.size()) {
Data.resize(RequiredSize);
}
*(T *)(Data.data() + Offset) = Value;
return llvm::Error::success();
}
llvm::Error commit() override { return llvm::Error::success(); }
virtual llvm::BinaryStreamFlags getFlags() const override {

View File

@@ -2446,10 +2446,9 @@ void serializeSyntaxTreeAsByteTree(
swift::ExponentialGrowthAppendingBinaryByteStream Stream(
llvm::support::endianness::little);
Stream.reserve(32 * 1024);
llvm::BinaryStreamWriter Writer(Stream);
std::map<void *, void *> UserInfo;
UserInfo[swift::byteTree::UserInfoKeyReusedNodeIds] = &ReusedNodeIds;
swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
swift::byteTree::ByteTreeWriter::write(Stream, /*ProtocolVersion=*/1,
*SyntaxTree.getRaw(), UserInfo);
std::unique_ptr<llvm::WritableMemoryBuffer> Buf =

View File

@@ -729,14 +729,15 @@ int doSerializeRawTree(const char *MainExecutablePath,
return EXIT_FAILURE;
}
llvm::AppendingBinaryByteStream Stream(llvm::support::endianness::little);
llvm::BinaryStreamWriter Writer(Stream);
swift::ExponentialGrowthAppendingBinaryByteStream Stream(
llvm::support::endianness::little);
Stream.reserve(32 * 1024);
std::map<void *, void *> UserInfo;
UserInfo[swift::byteTree::UserInfoKeyReusedNodeIds] = &ReusedNodeIds;
if (options::AddByteTreeFields) {
UserInfo[swift::byteTree::UserInfoKeyAddInvalidFields] = (void *)true;
}
swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
swift::byteTree::ByteTreeWriter::write(Stream, /*ProtocolVersion=*/1,
*Root, UserInfo);
auto OutputBufferOrError = llvm::FileOutputBuffer::create(
options::OutputFilename, Stream.data().size());

View File

@@ -116,11 +116,9 @@ TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, GrowMultipleSteps) {
}
TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteIntoMiddle) {
// Test that the stream resizes correctly if we write into its middle
ExponentialGrowthAppendingBinaryByteStream Stream(llvm::support::little);
// Test that the buffer can grow multiple steps at once, e.g. 1 -> 2 -> 4
// Test that the stream resizes correctly if we write into its middle
std::vector<uint8_t> InitialData = {'T', 'e', 's', 't'};
auto InitialDataRef = makeArrayRef(InitialData);
EXPECT_THAT_ERROR(Stream.writeBytes(0, InitialDataRef), Succeeded());
@@ -144,3 +142,30 @@ TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteIntoMiddle) {
EXPECT_EQ(DataAfterInsertRef, Stream.data());
EXPECT_EQ(6u, Stream.getLength());
}
TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteRaw) {
ExponentialGrowthAppendingBinaryByteStream Stream(llvm::support::little);
// Test the writeRaw method
std::vector<uint8_t> InitialData = {'H', 'e', 'l', 'l', 'o'};
auto InitialDataRef = makeArrayRef(InitialData);
EXPECT_THAT_ERROR(Stream.writeBytes(0, InitialDataRef), Succeeded());
EXPECT_EQ(InitialDataRef, Stream.data());
EXPECT_THAT_ERROR(Stream.writeRaw(5, (uint8_t)' '), Succeeded());
std::vector<uint8_t> AfterFirstInsert = {'H', 'e', 'l', 'l', 'o', ' '};
auto AfterFirstInsertRef = makeArrayRef(AfterFirstInsert);
EXPECT_EQ(AfterFirstInsertRef, Stream.data());
EXPECT_EQ(6u, Stream.getLength());
uint32_t ToInsert = 'w' |
'o' << 8 |
'r' << 16 |
'l' << 24;
EXPECT_THAT_ERROR(Stream.writeRaw(6, ToInsert), Succeeded());
std::vector<uint8_t> AfterSecondInsert = {'H', 'e', 'l', 'l', 'o', ' ',
'w', 'o', 'r', 'l'};
auto AfterSecondInsertRef = makeArrayRef(AfterSecondInsert);
EXPECT_EQ(AfterSecondInsertRef, Stream.data());
EXPECT_EQ(10u, Stream.getLength());
}