[byteTree] Write fixed sized data using an optimized form

If we know the size of a type at compile time (like we do for all the integer types), it is cheaper to assign the data buffer directly instead of using a memcpy.
2025-12-21 12:14:44 +01:00 · 2018-08-24 11:10:00 -07:00
parent f97d13d984
commit 33d86d5cb6
5 changed files with 123 additions and 38 deletions
--- a/include/swift/Basic/ByteTreeSerialization.h
+++ b/include/swift/Basic/ByteTreeSerialization.h
@@ -21,6 +21,7 @@

 #include "llvm/Support/BinaryStreamError.h"
 #include "llvm/Support/BinaryStreamWriter.h"
+#include "swift/Basic/ExponentialGrowthAppendingBinaryByteStream.h"
 #include <map>

 namespace {
@@ -35,7 +36,7 @@ class ByteTreeWriter;

 using UserInfoMap = std::map<void *, void *>;

-/// Add a template specialization of \c ObjectTraits for any that type
+/// Add a template specialization of \c ObjectTraits for any type that
 /// serializes as an object consisting of multiple fields.
 template <class T>
 struct ObjectTraits {
@@ -55,7 +56,7 @@ struct ObjectTraits {
  //                   UserInfoMap &UserInfo);
 };

-/// Add a template specialization of \c ScalarTraits for any that type
+/// Add a template specialization of \c ScalarTraits for any type that
 /// serializes into a raw set of bytes.
 template <class T>
 struct ScalarTraits {
@@ -70,7 +71,17 @@ struct ScalarTraits {
  // static llvm::Error write(llvm::BinaryStreamWriter &Writer, const T &Value);
 };

-/// Add a template specialization of \c WrapperTypeTraits for any that type
+/// Add a template specialization of \c DirectlyEncodable for any type whose
+/// serialized form is equal to its binary representation on the serializing
+/// machine.
+template <class T>
+struct DirectlyEncodable {
+  // Must provide:
+
+  // static bool const value = true;
+};
+
+/// Add a template specialization of \c WrapperTypeTraits for any type that
 /// serializes as a type that already has a specialization of \c ScalarTypes.
 /// This will typically be useful for types like enums that have a 1-to-1
 /// mapping to e.g. an integer.
@@ -143,6 +154,12 @@ private:
  /// The writer to which the binary data is written.
  llvm::BinaryStreamWriter &StreamWriter;

+  /// The underlying stream of the StreamWriter. We need this reference so that
+  /// we can call \c ExponentialGrowthAppendingBinaryByteStream.writeRaw
+  /// which is more efficient than the generic \c writeBytes of
+  /// \c llvm::BinaryStreamWriter since it avoids the arbitrary size memcopy.
+  ExponentialGrowthAppendingBinaryByteStream &Stream;
+
  /// The number of fields this object contains. \c UINT_MAX if it has not been
  /// set yet. No member may be written to the object if expected number of
  /// fields has not been set yet.
@@ -157,8 +174,21 @@ private:

  /// The \c ByteTreeWriter can only be constructed internally. Use
  /// \c ByteTreeWriter.write to serialize a new object.
-  ByteTreeWriter(llvm::BinaryStreamWriter &StreamWriter, UserInfoMap &UserInfo)
-      : StreamWriter(StreamWriter), UserInfo(UserInfo) {}
+  /// \p Stream must be the underlying stream of \p SteamWriter.
+  ByteTreeWriter(ExponentialGrowthAppendingBinaryByteStream &Stream,
+                 llvm::BinaryStreamWriter &StreamWriter, UserInfoMap &UserInfo)
+      : StreamWriter(StreamWriter), Stream(Stream), UserInfo(UserInfo) {}
+
+  /// Write the given value to the ByteTree in the same form in which it is
+  /// represented on the serializing machine.
+  template <typename T>
+  llvm::Error writeRaw(T Value) {
+    // FIXME: We implicitly inherit the endianess of the serializing machine.
+    // Since we're currently only supporting macOS that's not a problem for now.
+    auto Error = Stream.writeRaw(StreamWriter.getOffset(), Value);
+    StreamWriter.setOffset(StreamWriter.getOffset() + sizeof(T));
+    return Error;
+  }

  /// Set the expected number of fields the object written by this writer is
  /// expected to have.
@@ -175,7 +205,7 @@ private:
    // Set the most significant bit to indicate that the next construct is an
    // object and not a scalar.
    uint32_t ToWrite = NumFields | (1 << 31);
-    auto Error = StreamWriter.writeInteger(ToWrite);
+    auto Error = writeRaw(ToWrite);
    (void)Error;
    assert(!Error);

@@ -205,11 +235,13 @@ public:
  /// the stream by the specified ProtocolVersion.
  template <typename T>
  typename std::enable_if<has_ObjectTraits<T>::value, void>::type
-  static write(uint32_t ProtocolVersion, llvm::BinaryStreamWriter &StreamWriter,
-               const T &Object, UserInfoMap &UserInfo) {
-    ByteTreeWriter Writer(StreamWriter, UserInfo);
+  static write(ExponentialGrowthAppendingBinaryByteStream &Stream,
+               uint32_t ProtocolVersion, const T &Object,
+               UserInfoMap &UserInfo) {
+    llvm::BinaryStreamWriter StreamWriter(Stream);
+    ByteTreeWriter Writer(Stream, StreamWriter, UserInfo);

-    auto Error = Writer.StreamWriter.writeInteger(ProtocolVersion);
+    auto Error = Writer.writeRaw(ProtocolVersion);
    (void)Error;
    assert(!Error);

@@ -224,7 +256,7 @@ public:
  write(const T &Object, unsigned Index) {
    validateAndIncreaseFieldIndex(Index);

-    auto ObjectWriter = ByteTreeWriter(StreamWriter, UserInfo);
+    auto ObjectWriter = ByteTreeWriter(Stream, StreamWriter, UserInfo);
    ObjectWriter.setNumFields(ObjectTraits<T>::numFields(Object, UserInfo));

    ObjectTraits<T>::write(ObjectWriter, Object, UserInfo);
@@ -240,7 +272,7 @@ public:
    // bitflag that indicates if the next construct in the tree is an object
    // or a scalar.
    assert((ValueSize & ((uint32_t)1 << 31)) == 0 && "Value size too large");
-    auto SizeError = StreamWriter.writeInteger(ValueSize);
+    auto SizeError = writeRaw(ValueSize);
    (void)SizeError;
    assert(!SizeError);

@@ -254,6 +286,21 @@ public:
           "ScalarTraits<T>::size");
  }

+  template <typename T>
+  typename std::enable_if<DirectlyEncodable<T>::value, void>::type
+  write(const T &Value, unsigned Index) {
+    validateAndIncreaseFieldIndex(Index);
+
+    uint32_t ValueSize = sizeof(T);
+    auto SizeError = writeRaw(ValueSize);
+    (void)SizeError;
+    assert(!SizeError);
+
+    auto ContentError = writeRaw(Value);
+    (void)ContentError;
+    assert(!ContentError);
+  }
+
  template <typename T>
  typename std::enable_if<has_WrapperTypeTraits<T>::value, void>::type
  write(const T &Value, unsigned Index) {
@@ -268,30 +315,18 @@ public:
 // Define serialization schemes for common types

 template <>
-struct ScalarTraits<uint8_t> {
-  static unsigned size(const uint8_t &Value) { return 1; }
-  static llvm::Error write(llvm::BinaryStreamWriter &Writer,
-                           const uint8_t &Value) {
-    return Writer.writeInteger(Value);
-  }
+struct DirectlyEncodable<uint8_t> {
+  static bool const value = true;
 };

 template <>
-struct ScalarTraits<uint16_t> {
-  static unsigned size(const uint16_t &Value) { return 2; }
-  static llvm::Error write(llvm::BinaryStreamWriter &Writer,
-                           const uint16_t &Value) {
-    return Writer.writeInteger(Value);
-  }
+struct DirectlyEncodable<uint16_t> {
+  static bool const value = true;
 };

 template <>
-struct ScalarTraits<uint32_t> {
-  static unsigned size(const uint32_t &Value) { return 4; }
-  static llvm::Error write(llvm::BinaryStreamWriter &Writer,
-                           const uint32_t &Value) {
-    return Writer.writeInteger(Value);
-  }
+struct DirectlyEncodable<uint32_t> {
+  static bool const value = true;
 };

 template <>
--- a/include/swift/Basic/ExponentialGrowthAppendingBinaryByteStream.h
+++ b/include/swift/Basic/ExponentialGrowthAppendingBinaryByteStream.h
@@ -57,6 +57,31 @@ public:

  llvm::Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override;

+  /// This is an optimized version of \c writeBytes that assumes we know the
+  /// size of \p Value at compile time (which in particular holds for integers).
+  /// It does so by avoiding the memcopy that \c writeBytes requires to copy
+  /// the arbitrarily sized Buffer to the output buffer and using a direct
+  /// memory assignment instead.
+  /// This assumes that the enianess of this steam is the same as the native
+  /// endianess on the executing machine. No endianess transformations are
+  /// performed.
+  template<typename T>
+  llvm::Error writeRaw(uint32_t Offset, T Value) {
+    if (auto Error = checkOffsetForWrite(Offset, sizeof(T))) {
+      return Error;
+    }
+
+    // Resize the internal buffer if needed.
+    uint32_t RequiredSize = Offset + sizeof(T);
+    if (RequiredSize > Data.size()) {
+      Data.resize(RequiredSize);
+    }
+
+    *(T *)(Data.data() + Offset) = Value;
+
+    return llvm::Error::success();
+  }
+
  llvm::Error commit() override { return llvm::Error::success(); }

  virtual llvm::BinaryStreamFlags getFlags() const override {
--- a/tools/SourceKit/tools/sourcekitd/lib/API/Requests.cpp
+++ b/tools/SourceKit/tools/sourcekitd/lib/API/Requests.cpp
@@ -2446,10 +2446,9 @@ void serializeSyntaxTreeAsByteTree(
  swift::ExponentialGrowthAppendingBinaryByteStream Stream(
      llvm::support::endianness::little);
  Stream.reserve(32 * 1024);
-  llvm::BinaryStreamWriter Writer(Stream);
  std::map<void *, void *> UserInfo;
  UserInfo[swift::byteTree::UserInfoKeyReusedNodeIds] = &ReusedNodeIds;
-  swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
+  swift::byteTree::ByteTreeWriter::write(Stream, /*ProtocolVersion=*/1,
                                         *SyntaxTree.getRaw(), UserInfo);

  std::unique_ptr<llvm::WritableMemoryBuffer> Buf =
--- a/tools/swift-syntax-test/swift-syntax-test.cpp
+++ b/tools/swift-syntax-test/swift-syntax-test.cpp
@@ -729,14 +729,15 @@ int doSerializeRawTree(const char *MainExecutablePath,
        return EXIT_FAILURE;
      }

-      llvm::AppendingBinaryByteStream Stream(llvm::support::endianness::little);
-      llvm::BinaryStreamWriter Writer(Stream);
+      swift::ExponentialGrowthAppendingBinaryByteStream Stream(
+          llvm::support::endianness::little);
+      Stream.reserve(32 * 1024);
      std::map<void *, void *> UserInfo;
      UserInfo[swift::byteTree::UserInfoKeyReusedNodeIds] = &ReusedNodeIds;
      if (options::AddByteTreeFields) {
        UserInfo[swift::byteTree::UserInfoKeyAddInvalidFields] = (void *)true;
      }
-      swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
+      swift::byteTree::ByteTreeWriter::write(Stream, /*ProtocolVersion=*/1,
                                             *Root, UserInfo);
      auto OutputBufferOrError = llvm::FileOutputBuffer::create(
          options::OutputFilename, Stream.data().size());
--- a/unittests/Basic/ExponentialGrowthAppendingBinaryByteStreamTests.cpp
+++ b/unittests/Basic/ExponentialGrowthAppendingBinaryByteStreamTests.cpp
@@ -116,11 +116,9 @@ TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, GrowMultipleSteps) {
 }

 TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteIntoMiddle) {
-  // Test that the stream resizes correctly if we write into its middle
-
  ExponentialGrowthAppendingBinaryByteStream Stream(llvm::support::little);

-  // Test that the buffer can grow multiple steps at once, e.g. 1 -> 2 -> 4
+  // Test that the stream resizes correctly if we write into its middle
  std::vector<uint8_t> InitialData = {'T', 'e', 's', 't'};
  auto InitialDataRef = makeArrayRef(InitialData);
  EXPECT_THAT_ERROR(Stream.writeBytes(0, InitialDataRef), Succeeded());
@@ -144,3 +142,30 @@ TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteIntoMiddle) {
  EXPECT_EQ(DataAfterInsertRef, Stream.data());
  EXPECT_EQ(6u, Stream.getLength());
 }
+
+TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteRaw) {
+  ExponentialGrowthAppendingBinaryByteStream Stream(llvm::support::little);
+
+  // Test the writeRaw method
+  std::vector<uint8_t> InitialData = {'H', 'e', 'l', 'l', 'o'};
+  auto InitialDataRef = makeArrayRef(InitialData);
+  EXPECT_THAT_ERROR(Stream.writeBytes(0, InitialDataRef), Succeeded());
+  EXPECT_EQ(InitialDataRef, Stream.data());
+
+  EXPECT_THAT_ERROR(Stream.writeRaw(5, (uint8_t)' '), Succeeded());
+  std::vector<uint8_t> AfterFirstInsert = {'H', 'e', 'l', 'l', 'o', ' '};
+  auto AfterFirstInsertRef = makeArrayRef(AfterFirstInsert);
+  EXPECT_EQ(AfterFirstInsertRef, Stream.data());
+  EXPECT_EQ(6u, Stream.getLength());
+
+  uint32_t ToInsert = 'w' | 
+                      'o' << 8 |
+                      'r' << 16 |
+                      'l' << 24;
+  EXPECT_THAT_ERROR(Stream.writeRaw(6, ToInsert), Succeeded());
+  std::vector<uint8_t> AfterSecondInsert = {'H', 'e', 'l', 'l', 'o', ' ',
+                                            'w', 'o', 'r', 'l'};
+  auto AfterSecondInsertRef = makeArrayRef(AfterSecondInsert);
+  EXPECT_EQ(AfterSecondInsertRef, Stream.data());
+  EXPECT_EQ(10u, Stream.getLength());
+}