//===--- RawSyntax.h - Swift Raw Syntax Interface ---------------*- C++ -*-===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // This file defines the RawSyntax type. // // These are the "backbone or "skeleton" of the Syntax tree, providing // the recursive structure, child relationships, kind of node, etc. // // They are reference-counted and strictly immutable, so can be shared freely // among Syntax nodes and have no specific identity. They could even in theory // be shared for expressions like 1 + 1 + 1 + 1 - you don't need 7 syntax nodes // to express that at this layer. // // These are internal implementation ONLY - do not expose anything involving // RawSyntax publicly. Clients of lib/Syntax should not be aware that they // exist. // //===----------------------------------------------------------------------===// #ifndef SWIFT_SYNTAX_RAWSYNTAX_H #define SWIFT_SYNTAX_RAWSYNTAX_H #include "swift/Basic/Debug.h" #include "swift/Basic/InlineBitfield.h" #include "swift/Syntax/References.h" #include "swift/Syntax/SyntaxArena.h" #include "swift/Syntax/SyntaxKind.h" #include "swift/Syntax/TokenKinds.h" #include "swift/Syntax/Trivia.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/Support/Casting.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" #include using llvm::cast; using llvm::StringRef; #ifndef NDEBUG #define syntax_assert_child_kind(Raw, Cursor, ExpectedKind) \ do { \ if (auto &__Child = Raw->getChild(Cursor)) \ assert(__Child->getKind() == ExpectedKind); \ } while (false) #else #define syntax_assert_child_kind(Raw, Cursor, ExpectedKind) #endif #ifndef NDEBUG #define syntax_assert_child_token(Raw, CursorName, ...) \ do { \ bool __Found = false; \ if (auto &__Token = Raw->getChild(Cursor::CursorName)) { \ assert(__Token->isToken()); \ if (__Token->isPresent()) { \ for (auto Token : {__VA_ARGS__}) { \ if (__Token->getTokenKind() == Token) { \ __Found = true; \ break; \ } \ } \ assert(__Found && "invalid token supplied for " #CursorName \ ", expected one of {" #__VA_ARGS__ "}"); \ } \ } \ } while (false) #else #define syntax_assert_child_token(Raw, CursorName, ...) #endif #ifndef NDEBUG #define syntax_assert_child_token_text(Raw, CursorName, TokenKind, ...) \ do { \ bool __Found = false; \ if (auto &__Child = Raw->getChild(Cursor::CursorName)) { \ assert(__Child->isToken()); \ if (__Child->isPresent()) { \ assert(__Child->getTokenKind() == TokenKind); \ for (auto __Text : {__VA_ARGS__}) { \ if (__Child->getTokenText() == __Text) { \ __Found = true; \ break; \ } \ } \ assert(__Found && "invalid text supplied for " #CursorName \ ", expected one of {" #__VA_ARGS__ "}"); \ } \ } \ } while (false) #else #define syntax_assert_child_token_text(Raw, CursorName, TokenKind, ...) #endif #ifndef NDEBUG #define syntax_assert_token_is(Tok, Kind, Text) \ do { \ assert(Tok.getTokenKind() == Kind); \ assert(Tok.getText() == Text); \ } while (false) #else #define syntax_assert_token_is(Tok, Kind, Text) #endif namespace swift { namespace syntax { class SyntaxArena; using CursorIndex = size_t; /// Get a numeric index suitable for array/vector indexing /// from a syntax node's Cursor enum value. template constexpr CursorIndex cursorIndex(CursorType C) { return static_cast(C); } /// An indicator of whether a Syntax node was found or written in the source. /// /// This is not an 'implicit' bit. enum class SourcePresence { /// The syntax was authored by a human and found, or was generated. Present, /// The syntax was expected or optional, but not found in the source. Missing, }; /// The print option to specify when printing a raw syntax node. struct SyntaxPrintOptions { bool Visual = false; bool PrintSyntaxKind = false; bool PrintTrivialNodeKind = false; }; typedef unsigned SyntaxNodeId; /// RawSyntax - the strictly immutable, shared backing nodes for all syntax. /// /// This is implementation detail - do not expose it in public API. class RawSyntax final : private llvm::TrailingObjects> { friend TrailingObjects; /// The ID that shall be used for the next node that is created and does not /// have a manually specified id static SyntaxNodeId NextFreeNodeId; /// An ID of this node that is stable across incremental parses SyntaxNodeId NodeId; /// If this node was allocated using a \c SyntaxArena's bump allocator, a /// reference to the arena to keep the underlying memory buffer of this node /// alive. If this is a \c nullptr, the node owns its own memory buffer. RC Arena; union { struct { // FIXME: Reduce TextLength to 30 bits so that common fits in 4 bytes? /// Number of bytes this node takes up spelled out in the source code unsigned TextLength : 32; /// Whether this piece of syntax was actually present in the source. unsigned Presence : 1; unsigned IsToken : 1; } Common; enum { NumRawSyntaxBits = 32 + 1 + 1 }; // For "layout" nodes. struct { static_assert(NumRawSyntaxBits <= 64, "Only 64 bits reserved for standard syntax bits"); uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 32 bits /// Number of children this "layout" node has. unsigned NumChildren : 32; /// Total number of sub nodes, i.e. number of transitive children of this /// node. This does not include the node itself. unsigned TotalSubNodeCount : 32; /// The kind of syntax this node represents. unsigned Kind : bitmax(NumSyntaxKindBits, 8); } Layout; // For "token" nodes. struct { static_assert(NumRawSyntaxBits <= 64, "Only 64 bits reserved for standard syntax bits"); uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 16 bits /// The kind of token this "token" node represents. unsigned TokenKind : 16; StringRef LeadingTrivia; StringRef TokenText; StringRef TrailingTrivia; } Token; } Bits; size_t numTrailingObjects(OverloadToken>) const { return isToken() ? 0 : Bits.Layout.NumChildren; } /// Constructor for creating layout nodes. /// If the node has been allocated inside the bump allocator of a /// \c SyntaxArena, that arena must be passed as \p Arena to retain the node's /// underlying storage. /// If \p NodeId is \c None, the next free NodeId is used, if it is passed, /// the caller needs to assure that the node ID has not been used yet. RawSyntax(SyntaxKind Kind, ArrayRef> Layout, size_t TextLength, SourcePresence Presence, const RC &Arena, llvm::Optional NodeId); /// Constructor for creating token nodes /// \c SyntaxArena, that arena must be passed as \p Arena to retain the node's /// underlying storage. /// If \p NodeId is \c None, the next free NodeId is used, if it is passed, /// the caller needs to assure that the NodeId has not been used yet. RawSyntax(tok TokKind, StringRef Text, size_t TextLength, StringRef LeadingTrivia, StringRef TrailingTrivia, SourcePresence Presence, const RC &Arena, llvm::Optional NodeId); /// Compute the node's text length by summing up the length of its childern size_t computeTextLength() { size_t TextLength = 0; for (size_t I = 0, NumChildren = getNumChildren(); I < NumChildren; ++I) { auto &ChildNode = getChild(I); if (ChildNode && !ChildNode->isMissing()) { TextLength += ChildNode->getTextLength(); } } return TextLength; } mutable std::atomic RefCount; public: ~RawSyntax(); // This is a copy-pased implementation of llvm::ThreadSafeRefCountedBase with // the difference that we do not delete the RawSyntax node's memory if the // node was allocated within a SyntaxArena and thus doesn't own its memory. void Retain() const { RefCount.fetch_add(1, std::memory_order_relaxed); } void Release() const { int NewRefCount = RefCount.fetch_sub(1, std::memory_order_acq_rel) - 1; assert(NewRefCount >= 0 && "Reference count was already zero."); if (NewRefCount == 0) { // The node was allocated inside a SyntaxArena and thus doesn't own its // own memory region. Hence we cannot free it. It will be deleted once // the last RawSyntax node allocated with it will release its reference // to the arena. this->~RawSyntax(); } } /// \name Factory methods. /// @{ /// Make a raw "layout" syntax node. static RC make(SyntaxKind Kind, ArrayRef> Layout, size_t TextLength, SourcePresence Presence, const RC &Arena = SyntaxArena::make(), llvm::Optional NodeId = llvm::None); static RC makeAndCalcLength(SyntaxKind Kind, ArrayRef> Layout, SourcePresence Presence, const RC &Arena = SyntaxArena::make(), llvm::Optional NodeId = llvm::None) { size_t TextLength = 0; for (auto Child : Layout) { if (Child) { TextLength += Child->getTextLength(); } } return make(Kind, Layout, TextLength, Presence, Arena, NodeId); } /// Make a raw "token" syntax node. static RC make(tok TokKind, StringRef Text, size_t TextLength, StringRef LeadingTrivia, StringRef TrailingTrivia, SourcePresence Presence, const RC &Arena = SyntaxArena::make(), llvm::Optional NodeId = llvm::None); /// Make a raw "token" syntax node that was allocated in \p Arena. static RC makeAndCalcLength(tok TokKind, StringRef Text, StringRef LeadingTrivia, StringRef TrailingTrivia, SourcePresence Presence, const RC &Arena = SyntaxArena::make(), llvm::Optional NodeId = llvm::None) { size_t TextLength = 0; if (Presence != SourcePresence::Missing) { TextLength += LeadingTrivia.size(); TextLength += Text.size(); TextLength += TrailingTrivia.size(); } return make(TokKind, Text, TextLength, LeadingTrivia, TrailingTrivia, Presence, Arena, NodeId); } /// Make a missing raw "layout" syntax node. static RC missing(SyntaxKind Kind, const RC &Arena = SyntaxArena::make()) { return make(Kind, {}, /*TextLength=*/0, SourcePresence::Missing, Arena); } /// Make a missing raw "token" syntax node. static RC missing(tok TokKind, StringRef Text, const RC &Arena = SyntaxArena::make()) { return make(TokKind, Text, /*TextLength=*/0, {}, {}, SourcePresence::Missing, Arena); } /// @} SourcePresence getPresence() const { return static_cast(Bits.Common.Presence); } SyntaxKind getKind() const { if (Bits.Common.IsToken) { return SyntaxKind::Token; } else { return static_cast(Bits.Layout.Kind); } } /// Get the number of nodes included in the subtree spanned by this node. /// This includes all transitive children and this node itself. size_t getTotalNodes() { return getTotalSubNodeCount() + 1; } /// Get the number of transitive children of this node. This does not include /// the node itself. size_t getTotalSubNodeCount() { if (isToken()) { return 0; } else { return Bits.Layout.TotalSubNodeCount; } } /// Get an ID for this node that is stable across incremental parses SyntaxNodeId getId() const { return NodeId; } /// Returns true if the node is "missing" in the source (i.e. it was /// expected (or optional) but not written. bool isMissing() const { return getPresence() == SourcePresence::Missing; } /// Returns true if the node is "present" in the source. bool isPresent() const { return getPresence() == SourcePresence::Present; } /// Returns true if this raw syntax node is some kind of declaration. bool isDecl() const { return isDeclKind(getKind()); } /// Returns true if this raw syntax node is some kind of type syntax. bool isType() const { return isTypeKind(getKind()); } /// Returns true if this raw syntax node is some kind of statement. bool isStmt() const { return isStmtKind(getKind()); } /// Returns true if this raw syntax node is some kind of expression. bool isExpr() const { return isExprKind(getKind()); } /// Returns true if this raw syntax node is some kind of pattern. bool isPattern() const { return isPatternKind(getKind()); } /// Return true is this raw syntax node is a unknown node. bool isUnknown() const { return isUnknownKind(getKind()); } /// Return true if this raw syntax node is a token. bool isToken() const { return Bits.Common.IsToken; } /// \name Getter routines for SyntaxKind::Token. /// @{ /// Get the kind of the token. tok getTokenKind() const { assert(isToken()); return static_cast(Bits.Token.TokenKind); } /// Return the text of the token as a reference. The referenced buffer may /// disappear when the syntax node gets freed. StringRef getTokenText() const { assert(isToken()); return Bits.Token.TokenText; } /// Return the unparsed leading trivia of the token. StringRef getLeadingTrivia() const { assert(isToken()); return Bits.Token.LeadingTrivia; } /// Return the unparsed trailing trivia of the token. StringRef getTrailingTrivia() const { assert(isToken()); return Bits.Token.TrailingTrivia; } /// Return pieces that make up the leading trivia of the token. /// Note that this triggers trivia parsing which may be expensive. If the /// trivia pieces are required multiple times, consider caching them. Trivia getLeadingTriviaPieces() const; /// Return the trailing trivia list of the token. /// Note that this triggers trivia parsing which may be expensive. If the /// trivia pieces are required multiple times, consider caching them. Trivia getTrailingTriviaPieces() const; /// Return \c true if this is the given kind of token. bool isToken(tok K) const { return isToken() && getTokenKind() == K; } /// @} /// \name Transform routines for "token" nodes. /// @{ /// Return a new token like this one, but with the given leading /// trivia instead. RC withLeadingTrivia(StringRef NewLeadingTrivia) const { return makeAndCalcLength(getTokenKind(), getTokenText(), NewLeadingTrivia, getTrailingTrivia(), getPresence()); } /// Return a new token like this one, but with the given trailing /// trivia instead. RC withTrailingTrivia(StringRef NewTrailingTrivia) const { return makeAndCalcLength(getTokenKind(), getTokenText(), getLeadingTrivia(), NewTrailingTrivia, getPresence()); } /// @} /// \name Getter routines for "layout" nodes. /// @{ /// Get the child nodes. ArrayRef> getLayout() const { if (isToken()) return {}; return {getTrailingObjects>(), Bits.Layout.NumChildren}; } size_t getNumChildren() const { if (isToken()) return 0; return Bits.Layout.NumChildren; } /// Get a child based on a particular node's "Cursor", indicating /// the position of the terms in the production of the Swift grammar. const RC &getChild(CursorIndex Index) const { return getLayout()[Index]; } /// Return the number of bytes this node takes when spelled out in the source size_t getTextLength() { return Bits.Common.TextLength; } /// @} /// \name Transform routines for "layout" nodes. /// @{ /// Return a new raw syntax node with the given new layout element appended /// to the end of the node's layout. RC append(RC NewLayoutElement) const; /// Return a new raw syntax node with the given new layout element replacing /// another at some cursor position. RC replacingChild(CursorIndex Index, RC NewLayoutElement) const; /// @} size_t getLeadingTriviaLength() const { return getLeadingTrivia().size(); } size_t getTrailingTriviaLength() const { return getTrailingTrivia().size(); } /// Print this piece of syntax recursively. void print(llvm::raw_ostream &OS, SyntaxPrintOptions Opts) const; /// Dump this piece of syntax recursively for debugging or testing. SWIFT_DEBUG_DUMP; /// Dump this piece of syntax recursively. void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const; static void Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text, StringRef LeadingTrivia, StringRef TrailingTrivia); }; } // end namespace syntax } // end namespace swift #endif // SWIFT_SYNTAX_RAWSYNTAX_H