//===--- Parser.cpp - ReST parser -----------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #include "swift/ReST/Parser.h" #include "Detail.h" #include "swift/ReST/LineList.h" #include "swift/ReST/XMLUtils.h" #include "llvm/Support/ErrorHandling.h" #include "clang/Basic/CharInfo.h" using namespace llvm; using namespace rest; using namespace llvm::rest::detail; using namespace clang; namespace { struct ParsedEnumerator { LineKind Kind; unsigned EnumeratorBytes; unsigned Value; }; } // unnamed namespace static bool startsWithWhitespaceOrEOL(StringRef Text, unsigned &WhitespaceBytes) { if (Text.empty()) { WhitespaceBytes = 0; return true; } if (!isReSTWhitespace(Text.front())) return false; for (unsigned i = 1, e = Text.size(); i != e; ++i) { if (!isReSTWhitespace(Text[i])) { WhitespaceBytes = i; return true; } } WhitespaceBytes = Text.size(); return true; } /// Returns true on success. static bool tryParseEnumerator(StringRef Text, ParsedEnumerator &PE) { if (Text.empty()) { return false; } if (Text[0] == '#') { PE = { LineKind::EnumeratedListAuto, 1, 0 }; return true; } if (isDigit(Text[0])) { unsigned EnumeratorBytes = 1; for (unsigned e = Text.size(); EnumeratorBytes != e; ++EnumeratorBytes) { if (!isDigit(Text[EnumeratorBytes])) break; } unsigned Value; if (Text.substr(0, EnumeratorBytes).getAsInteger(10, Value)) { // FIXME: we should produce a diagnostic if there was an overflow. return false; } PE = { LineKind::EnumeratedListArabic, EnumeratorBytes, Value }; return true; } // FIXME: implement other enumerator kinds. return false; } static LineClassification tryParseEnumeratorWithFormatting(StringRef Text) { // [ReST/Syntax Details/Body Elements/Enumerated Lists] // Quote: // The following formatting types are recognized: // // * suffixed with a period: "1.", "A.", "a.", "I.", "i.". // * surrounded by parentheses: "(1)", "(A)", "(a)", "(I)", "(i)". // * suffixed with a right-parenthesis: "1)", "A)", "a)", "I)", "i)". ParsedEnumerator PE; if (Text.startswith("(")) { if (!tryParseEnumerator(Text.drop_front(1), PE)) return LineClassification::makeUnknown(); StringRef WithoutEnumeratorValue = Text.drop_front(1 + PE.EnumeratorBytes); if (!WithoutEnumeratorValue.startswith(")")) return LineClassification::makeUnknown(); StringRef WithoutEnumerator = WithoutEnumeratorValue.drop_front(1); unsigned WhitespaceBytes; if (startsWithWhitespaceOrEOL(WithoutEnumerator, WhitespaceBytes)) { bool HasTextAfterEnumerator = WhitespaceBytes != WithoutEnumerator.size(); return LineClassification::makeEnumerated( PE.Kind, EnumeratorStyleKind::SurroundedByParens, HasTextAfterEnumerator, 1 + PE.EnumeratorBytes + 1 + WhitespaceBytes); } return LineClassification::makeUnknown(); } if (tryParseEnumerator(Text, PE)) { StringRef WithoutEnumeratorValue = Text.drop_front(PE.EnumeratorBytes); bool IsDotAfter = WithoutEnumeratorValue.startswith("."); bool IsParenAfter = WithoutEnumeratorValue.startswith(")"); if (!IsDotAfter && !IsParenAfter) return LineClassification::makeUnknown(); StringRef WithoutEnumerator = WithoutEnumeratorValue.drop_front(1); unsigned WhitespaceBytes; if (startsWithWhitespaceOrEOL(WithoutEnumerator, WhitespaceBytes)) { bool HasTextAfterEnumerator = WhitespaceBytes != WithoutEnumerator.size(); return LineClassification::makeEnumerated( PE.Kind, IsDotAfter ? EnumeratorStyleKind::DotAfter : EnumeratorStyleKind::ParenAfter, HasTextAfterEnumerator, PE.EnumeratorBytes + 1 + WhitespaceBytes); } return LineClassification::makeUnknown(); } return LineClassification::makeUnknown(); } llvm::rest::detail::LineClassification llvm::rest::detail::classifyLine(const Line &L) { StringRef Text = L.Text.drop_front(L.FirstTextByte); if (Text.empty()) return LineClassification::makeBlank(); // [ReST/Syntax Details/Body Elements/Field Lists] // Quote: // A field name may consist of any characters, but colons (":") inside of // field names must be escaped with a backslash. Inline markup is parsed // in field names. // [...] // The field marker is followed by whitespace and the field body. // // The initial check is very lightweight here (just look if there is a // colon at the beginning), so handle this case first. if (Text.startswith(":") && Text.size() >= 3 && Text[1] != ':') { // This might be a field name. This is a field list if the line contains a // colon that is not escaped, and the field name is not empty. // REST-FIXME: clarify that the field name can not be empty. unsigned i = 1; if (Text[i] == '\\') { // Skip the next character, it is escaped. i += 2; } for (unsigned e = Text.size(); i != e; ++i) { if (Text[i] == ':') { unsigned FieldNameBytes = i - 1; // Check that the second colon is followed by end of line or // whitespace. StringRef WithoutFieldMarker = Text.drop_front(i + 1); unsigned WhitespaceBytes; if (startsWithWhitespaceOrEOL(WithoutFieldMarker, WhitespaceBytes)) return LineClassification::makeFieldList(FieldNameBytes, i + 1 + WhitespaceBytes); else break; } if (Text[i] == '\\') { // Skip the next character, it is escaped. ++i; if (i == e) break; } } } // [ReST/Syntax Details/Body Elements/Bullet Lists] // ReST allows the following characters to start a bulleted list: // U+002A ASTERISK // U+002B PLUS SIGN // U+002D HYPHEN-MINUS // U+2022 BULLET // U+2023 TRIANGULAR BULLET // U+2043 HYPHEN BULLET // // Note: the following code tries to avoid using heavy machinery to decode // UTF-8. LineKind Kind = LineKind::Unknown; unsigned BulletBytes = 0; if (Text.startswith("*")) { Kind = LineKind::BulletListAsterisk; BulletBytes = 1; } else if (Text.startswith("+")) { Kind = LineKind::BulletListPlus; BulletBytes = 1; } else if (Text.startswith("-")) { Kind = LineKind::BulletListHyphenMinus; BulletBytes = 1; } else if (Text.startswith("\u2022")) { Kind = LineKind::BulletListBullet; BulletBytes = 3; } else if (Text.startswith("\u2023")) { Kind = LineKind::BulletListTriangularBullet; BulletBytes = 3; } else if (Text.startswith("\u2043")) { Kind = LineKind::BulletListHyphenBullet; BulletBytes = 3; } if (Kind != LineKind::Unknown) { // We have a bullet. This is the initial line of a bullet list if the // bullet is at the end of the line or is followed by whitespace. StringRef WithoutBullet = Text.drop_front(BulletBytes); unsigned WhitespaceBytes; if (startsWithWhitespaceOrEOL(WithoutBullet, WhitespaceBytes)) return LineClassification::makeBullet(Kind, BulletBytes + WhitespaceBytes); Kind = LineKind::Unknown; } { LineClassification MaybeEnumerator = tryParseEnumeratorWithFormatting(Text); if (MaybeEnumerator.Kind != LineKind::Unknown) return MaybeEnumerator; } // [ReST/Syntax Details/Body Elements/Option Lists] // FIXME: implement later. return LineClassification::makeUnknown(); } static bool isDefinitionList(LineListRef LL) { assert(LL.size() != 0); if (LL.size() < 2) return false; if (LL[0].getClassification().Kind != LineKind::Unknown) return false; if (LL[1].getClassification().Kind == LineKind::Blank) return false; return LL[0].FirstTextCol < LL[1].FirstTextCol; } static bool isEnumeratedListItem(LineListRef LL) { Optional IsListItem; if (LL.isNextLineBlank(0)) IsListItem = true; if (!IsListItem.hasValue() && LL[1].FirstTextCol == LL[0].FirstTextCol) { bool IsNEELEL = isEnumerated(LL[1].getClassification().Kind) && LL[1].getClassification().hasTextAfterEnumerator(); if (IsNEELEL) { if (LL[0].getClassification().Kind == LL[1].getClassification().Kind && LL[0].getClassification().getEnumeratorStyle() == LL[1].getClassification().getEnumeratorStyle()) IsListItem = true; // FIXME: check numeric value of enumerator. } if (!IsListItem.hasValue()) IsListItem = false; } if (!IsListItem.hasValue() && LL[1].FirstTextCol < LL[0].FirstTextCol) IsListItem = true; if (!IsListItem.hasValue() && (LL[1].FirstTextCol < LL[0].FirstTextCol + LL[0].getClassification().getEnumeratorAndWhitespaceCols())) { // Next line does not have enough indentation, so this line is not a // list item. IsListItem = false; } if (!IsListItem.hasValue()) IsListItem = true; return IsListItem.getValue(); } namespace { class Parser { ReSTContext &Context; std::pair parseParagraph(LineListRef LL, ColumnNum BaseIndentation); std::pair parseBulletList(LineListRef LL); std::pair parseEnumeratedList(LineListRef LL); std::pair parseDefinitionList(LineListRef LL); std::pair parseFieldList(LineListRef LL); /// This might parse an idnented literal block or a block quote. std::pair parseUnresolvedIndentedBlock(LineListRef LL); unsigned parseLevelImpl(LineListRef LL, SmallVectorImpl &Children, ColumnNum BaseIndentation, ColumnNum LeftMarginIndentation, bool IgnoreIndentationOfTheFirstLine, ColumnNum *MinIndentation); unsigned parseLevel(LineListRef LL, SmallVectorImpl &Children); public: Parser(ReSTContext &Context) : Context(Context) {} Document *parseDocument(LineListRef LL); }; } // unnamed namespace std::pair Parser::parseParagraph(LineListRef LL, ColumnNum BaseIndentation) { assert(LL.size() != 0); assert(LL[0].getClassification().Kind == LineKind::Unknown || isEnumerated(LL[0].getClassification().Kind)); unsigned i = 0; for (unsigned e = LL.size(); i != e; ++i) { if (LL[i].getClassification().Kind != LineKind::Blank && !(i == 0 && LL.isFirstLineTruncated())) { if (LL[i].FirstTextCol > BaseIndentation) { // Indent. assert(i != 1 && "can not be a definition list"); // Unexpected indent. Paragraph ends here, the next line starts a new // block. break; } else if (LL[i].FirstTextCol < BaseIndentation) { // Unexpected unindent. Paragraph ends here, the next line should // match up with something else we parsed previously. break; } } switch (LL[i].getClassification().Kind) { case LineKind::Unknown: continue; case LineKind::Blank: { // Paragraph ends at a blank line. auto *P = new (Context) Paragraph(new (Context) TextAndInline(LL.subList(0, i))); return { P, i }; } case LineKind::BulletListAsterisk: case LineKind::BulletListPlus: case LineKind::BulletListHyphenMinus: case LineKind::BulletListBullet: case LineKind::BulletListTriangularBullet: case LineKind::BulletListHyphenBullet: assert(!LL.isPreviousLineBlank(i)); continue; case LineKind::EnumeratedListArabic: case LineKind::EnumeratedListUppercaseAlphabet: case LineKind::EnumeratedListLowercaseAlphabet: case LineKind::EnumeratedListUppercaseRoman: case LineKind::EnumeratedListLowercaseRoman: case LineKind::EnumeratedListUppercaseAmbiguous: case LineKind::EnumeratedListLowercaseAmbiguous: case LineKind::EnumeratedListAuto: assert(i == 0 || !LL.isPreviousLineBlank(i)); continue; case LineKind::FieldList: assert(!LL.isPreviousLineBlank(i)); continue; } } auto *P = new (Context) Paragraph(new (Context) TextAndInline(LL.subList(0, i))); assert(i != 0); return { P, i }; } std::pair Parser::parseBulletList(LineListRef LL) { SmallVector ItemInfos; SmallVector ItemChildren; auto Kind = LL[0].getClassification().Kind; ColumnNum BulletIndentation = LL[0].FirstTextCol; unsigned i = 0; for (unsigned e = LL.size(); i != e;) { // At the beginning of every iteration, we are either at the beginning of // the next list item or at the end of the list. if (LL[i].getClassification().Kind != LineKind::Blank) { if (LL[i].FirstTextCol > BulletIndentation) { // Indent. Note that this indent is not large enough to line up with // the previous item's children. The list ends here, the next line // will start a block quote, but at the same nesting level as this // list. break; } else if (LL[i].FirstTextCol < BulletIndentation) { // Unexpected unindent. List ends here, the next line should match up // with something else we parsed previously. break; } } bool IsEndOfList = false; switch (LL[i].getClassification().Kind) { case LineKind::Unknown: IsEndOfList = true; break; case LineKind::Blank: // Skip blank lines? i++; continue; case LineKind::BulletListAsterisk: case LineKind::BulletListPlus: case LineKind::BulletListHyphenMinus: case LineKind::BulletListBullet: case LineKind::BulletListTriangularBullet: case LineKind::BulletListHyphenBullet: if (LL[i].getClassification().Kind != Kind) IsEndOfList = true; break; case LineKind::EnumeratedListArabic: case LineKind::EnumeratedListUppercaseAlphabet: case LineKind::EnumeratedListLowercaseAlphabet: case LineKind::EnumeratedListUppercaseRoman: case LineKind::EnumeratedListLowercaseRoman: case LineKind::EnumeratedListUppercaseAmbiguous: case LineKind::EnumeratedListLowercaseAmbiguous: case LineKind::EnumeratedListAuto: IsEndOfList = true; break; case LineKind::FieldList: IsEndOfList = true; break; } if (IsEndOfList) break; // If we got here, this is the start of a list item. auto SubLL = LL.dropFrontLines(i); SubLL.fromFirstLineDropFront( LL[i].getClassification().getBulletAndWhitespaceBytes()); SmallVector CurrItemChildren; unsigned NumLines = parseLevel(SubLL, CurrItemChildren); i += NumLines; ItemInfos.push_back({ static_cast(ItemChildren.size()), static_cast(CurrItemChildren.size()) }); ItemChildren.append(CurrItemChildren.begin(), CurrItemChildren.end()); } auto *BL = BulletList::create(Context, ItemInfos, ItemChildren); return { BL, i }; } std::pair Parser::parseEnumeratedList(LineListRef LL) { SmallVector ItemInfos; SmallVector ItemChildren; auto Kind = LL[0].getClassification().Kind; auto EnumeratorStyle = LL[0].getClassification().getEnumeratorStyle(); ColumnNum EnumeratorIndentation = LL[0].FirstTextCol; unsigned i = 0; for (unsigned e = LL.size(); i != e;) { // Invariant: at the beginning of every iteration, we are either at the // beginning of the next list item or at the end of the list. if (LL[i].getClassification().Kind != LineKind::Blank) { if (LL[i].FirstTextCol > EnumeratorIndentation) { // Indent. Note that this indent is not large enough to line up with // the previous item's children. The list ends here, the next line // will start a block quote, but at the same nesting level as this // list. break; } else if (LL[i].FirstTextCol < EnumeratorIndentation) { // Unexpected unindent. List ends here, the next line should match up // with something else we parsed previously. break; } } bool IsEndOfList = false; switch (LL[i].getClassification().Kind) { case LineKind::Unknown: IsEndOfList = true; break; case LineKind::Blank: // Skip blank lines? i++; continue; case LineKind::BulletListAsterisk: case LineKind::BulletListPlus: case LineKind::BulletListHyphenMinus: case LineKind::BulletListBullet: case LineKind::BulletListTriangularBullet: case LineKind::BulletListHyphenBullet: IsEndOfList = true; break; case LineKind::EnumeratedListArabic: case LineKind::EnumeratedListUppercaseAlphabet: case LineKind::EnumeratedListLowercaseAlphabet: case LineKind::EnumeratedListUppercaseRoman: case LineKind::EnumeratedListLowercaseRoman: case LineKind::EnumeratedListUppercaseAmbiguous: case LineKind::EnumeratedListLowercaseAmbiguous: case LineKind::EnumeratedListAuto: if (LL[i].getClassification().Kind != Kind || LL[i].getClassification().getEnumeratorStyle() != EnumeratorStyle) IsEndOfList = true; if (!isEnumeratedListItem(LL.dropFrontLines(i))) IsEndOfList = true; break; case LineKind::FieldList: IsEndOfList = true; break; } if (IsEndOfList) break; // If we got here, this is the start of a list item. auto SubLL = LL.dropFrontLines(i); SubLL.fromFirstLineDropFront( LL[i].getClassification().getEnumeratorAndWhitespaceBytes()); SmallVector CurrItemChildren; unsigned NumLines = parseLevel(SubLL, CurrItemChildren); i += NumLines; ItemInfos.push_back({ static_cast(ItemChildren.size()), static_cast(CurrItemChildren.size()) }); ItemChildren.append(CurrItemChildren.begin(), CurrItemChildren.end()); } auto *EL = EnumeratedList::create(Context, ItemInfos, ItemChildren); return { EL, i }; } std::pair Parser::parseDefinitionList(LineListRef LL) { assert(isDefinitionList(LL)); ColumnNum TermIndentation = LL[0].FirstTextCol; SmallVector Children; unsigned i = 0; for (unsigned e = LL.size(); i != e;) { // Invariant: at the beginning of every iteration, we are either at the // beginning of the next list item or at the end of the list. if (LL[i].FirstTextCol < TermIndentation) { // Unindent. Definition list ends here. break; } if (!isDefinitionList(LL.dropFrontLines(i))) break; // FIXME: parse the term line into term and classifiers. auto Term = new (Context) TextAndInline(LL.subList(i, 1)); ColumnNum ItemBaseIndentation = LL[i + 1].FirstTextCol; SmallVector ItemChildren; unsigned NumLines = parseLevelImpl( LL.dropFrontLines(i + 1), ItemChildren, ItemBaseIndentation, TermIndentation + ColumnNum::make(1), /*IgnoreIndentationOfTheFirstLine=*/false, nullptr); Children.push_back( DefinitionListItem::create(Context, Term, {}, ItemChildren)); i += 1 + NumLines; } auto *DL = DefinitionList::create(Context, Children); assert(i != 0); return { DL, i }; } std::pair Parser::parseFieldList(LineListRef LL) { assert(LL[0].getClassification().Kind == LineKind::FieldList); ColumnNum FirstColonIndentation = LL[0].FirstTextCol; SmallVector Children; unsigned i = 0; for (unsigned e = LL.size(); i != e;) { // Invariant: at the beginning of every iteration, we are either at the // beginning of the next list item or at the end of the list. if (LL[i].FirstTextCol < FirstColonIndentation) { // Unindent. Field list ends here. break; } if (LL[i].getClassification().Kind != LineKind::FieldList) break; LinePart FieldNameText = LL.getLinePart(i, LL[i].FirstTextByte + 1, LL[i].getClassification().getFieldNameBytes()); auto FieldName = new (Context) TextAndInline(FieldNameText); ColumnNum ItemBaseIndentation; if (i + 1 != e) { for (unsigned j = i + 1; j != e; ++j) { if (LL[j].getClassification().Kind != LineKind::Blank) { ItemBaseIndentation = LL[j].FirstTextCol; break; } } } SmallVector BodyChildren; auto SubLL = LL.dropFrontLines(i); SubLL.fromFirstLineDropFront( SubLL[0].getClassification().getFieldMarkerAndWhitespaceBytes()); unsigned NumLines = parseLevelImpl(SubLL, BodyChildren, ItemBaseIndentation, FirstColonIndentation + ColumnNum::make(1), /*IgnoreIndentationOfTheFirstLine=*/true, nullptr); Children.push_back(Field::create(Context, FieldName, BodyChildren)); i += NumLines; } auto *FL = FieldList::create(Context, Children); assert(i != 0); return { FL, i }; } std::pair Parser::parseUnresolvedIndentedBlock(LineListRef LL) { SmallVector Children; unsigned NumLines = parseLevel(LL, Children); auto *BQ = BlockQuote::create(Context, Children); assert(NumLines != 0); return { BQ, NumLines }; } unsigned Parser::parseLevelImpl(LineListRef LL, SmallVectorImpl &Children, ColumnNum BaseIndentation, ColumnNum LeftMarginIndentation, bool IgnoreIndentationOfTheFirstLine, ColumnNum *MinIndentation) { assert(Children.size() == 0); if (LL.empty()) return 0; unsigned i = 0; for (unsigned e = LL.size(); i != e;) { if (LL[i].getClassification().Kind != LineKind::Blank && !(i == 0 && IgnoreIndentationOfTheFirstLine)) { if (LL[i].FirstTextCol > BaseIndentation) { // Indent. // // FIXME: parse a definition list or a block quote. ReSTASTNode *N; unsigned NumLines; std::tie(N, NumLines) = parseUnresolvedIndentedBlock(LL.dropFrontLines(i)); Children.push_back(N); i += NumLines; continue; } else if (LL[i].FirstTextCol < LeftMarginIndentation) { // Unexpected unindent. Current indentation level ends here, the next // line should match up with something else we parsed previously. break; } else if (LL[i].FirstTextCol < BaseIndentation) { auto *BQ = BlockQuote::create(Context, Children); Children.clear(); Children.push_back(BQ); BaseIndentation = LL[i].FirstTextCol; } } switch (LL[i].getClassification().Kind) { case LineKind::Unknown: { auto SubLL = LL.dropFrontLines(i); ReSTASTNode *N; unsigned NumLines; if (isDefinitionList(SubLL) && !(i == 0 && IgnoreIndentationOfTheFirstLine)) std::tie(N, NumLines) = parseDefinitionList(SubLL); else std::tie(N, NumLines) = parseParagraph(SubLL, BaseIndentation); Children.push_back(N); i += NumLines; continue; } case LineKind::Blank: // Skip blank lines? i++; continue; case LineKind::BulletListAsterisk: case LineKind::BulletListPlus: case LineKind::BulletListHyphenMinus: case LineKind::BulletListBullet: case LineKind::BulletListTriangularBullet: case LineKind::BulletListHyphenBullet: { // If the line looks like a bullet list item, it is always a bullet list // item, no further checks required. ReSTASTNode *N; unsigned NumLines; std::tie(N, NumLines) = parseBulletList(LL.dropFrontLines(i)); Children.push_back(N); i += NumLines; continue; } case LineKind::EnumeratedListArabic: case LineKind::EnumeratedListUppercaseAlphabet: case LineKind::EnumeratedListLowercaseAlphabet: case LineKind::EnumeratedListUppercaseRoman: case LineKind::EnumeratedListLowercaseRoman: case LineKind::EnumeratedListUppercaseAmbiguous: case LineKind::EnumeratedListLowercaseAmbiguous: case LineKind::EnumeratedListAuto: { auto SubLL = LL.dropFrontLines(i); bool IsListItem = isEnumeratedListItem(SubLL); // FIXME: more checks on indentation? ReSTASTNode *N; unsigned NumLines; if (IsListItem) std::tie(N, NumLines) = parseEnumeratedList(SubLL); else std::tie(N, NumLines) = parseParagraph(SubLL, BaseIndentation); Children.push_back(N); i += NumLines; continue; } case LineKind::FieldList: { ReSTASTNode *N; unsigned NumLines; std::tie(N, NumLines) = parseFieldList(LL.dropFrontLines(i)); Children.push_back(N); i += NumLines; continue; } } } if (MinIndentation) *MinIndentation = BaseIndentation; assert(i != 0); return i; } unsigned Parser::parseLevel(LineListRef LL, SmallVectorImpl &Children) { if (LL.size() == 0) return 0; assert(LL[0].getClassification().Kind != LineKind::Blank); ColumnNum Indentation = LL[0].FirstTextCol; return parseLevelImpl(LL, Children, Indentation, Indentation, /*IgnoreIndentationOfTheFirstLine=*/false, nullptr); } Document *Parser::parseDocument(LineListRef LL) { unsigned i = 0; for (unsigned e = LL.size(); i != e; ++i) { if (LL[i].getClassification().Kind != LineKind::Blank) break; } auto SubLL = LL.dropFrontLines(i); if (SubLL.empty()) return Document::create(Context, {}); SmallVector Children; ColumnNum MinIndentation; unsigned NumLines = parseLevelImpl( SubLL, Children, SubLL[0].FirstTextCol, ColumnNum::make(0), /*IgnoreIndentationOfTheFirstLine=*/false, &MinIndentation); assert(NumLines == SubLL.size()); if (!Context.LangOpts.IgnoreUniformIndentation && MinIndentation != ColumnNum::make(0)) { auto *BQ = BlockQuote::create(Context, Children); Children.clear(); Children.push_back(BQ); } return Document::create(Context, Children); } Document *llvm::rest::parseDocument(ReSTContext &C, LineListRef LL) { Parser P(C); return P.parseDocument(LL); } struct CommentToDocutilsXMLConverter { raw_ostream &OS; CommentToDocutilsXMLConverter(raw_ostream &OS) : OS(OS) {} void printASTNode(const ReSTASTNode *N) { switch (N->getKind()) { case ASTNodeKind::Document: printDocument(cast(N)); break; case ASTNodeKind::Section: case ASTNodeKind::Topic: case ASTNodeKind::Sidebar: case ASTNodeKind::Title: case ASTNodeKind::Subtitle: case ASTNodeKind::Transition: llvm_unreachable("implement"); case ASTNodeKind::Paragraph: printParagraph(cast(N)); break; case ASTNodeKind::BulletList: printBulletList(cast(N)); break; case ASTNodeKind::EnumeratedList: printEnumeratedList(cast(N)); break; case ASTNodeKind::DefinitionListItem: printDefinitionListItem(cast(N)); break; case ASTNodeKind::DefinitionList: printDefinitionList(cast(N)); break; case ASTNodeKind::Field: printField(cast(N)); break; case ASTNodeKind::FieldList: printFieldList(cast(N)); break; case ASTNodeKind::BlockQuote: printBlockQuote(cast
(N)); break; case ASTNodeKind::TextAndInline: printTextAndInline(cast(N)); break; case ASTNodeKind::PrivateExtension: printPrivateExtension(cast(N)); break; } } void printDocument(const Document *D) { OS << ""; for (const auto *N : D->getChildren()) { printASTNode(N); } OS << ""; } void printParagraph(const Paragraph *P) { OS << ""; printTextAndInline(P->getContent()); OS << ""; } void printBulletList(const BulletList *BL) { OS << ""; for (unsigned i = 0, e = BL->getNumItems(); i != e; ++i) { OS << ""; for (const auto *N : BL->getItemChildren(i)) { printASTNode(N); } OS << ""; } OS << ""; } void printEnumeratedList(const EnumeratedList *EL) { OS << ""; for (unsigned i = 0, e = EL->getNumItems(); i != e; ++i) { OS << ""; for (const auto *N : EL->getItemChildren(i)) { printASTNode(N); } OS << ""; } OS << ""; } void printDefinitionListItem(const DefinitionListItem *DLI) { OS << ""; OS << ""; printASTNode(DLI->getTerm()); OS << ""; for (const auto *N : DLI->getClassifiers()) { OS << ""; printASTNode(N); OS << ""; } OS << ""; for (const auto *N : DLI->getDefinitionChildren()) { printASTNode(N); } OS << ""; OS << ""; } void printDefinitionList(const DefinitionList *DL) { OS << ""; for (const auto *N : DL->getChildren()) { printASTNode(N); } OS << ""; } void printField(const Field *F) { OS << ""; OS << ""; printASTNode(F->getName()); OS << ""; OS << ""; for (const auto *N : F->getBodyChildren()) { printASTNode(N); } OS << ""; OS << ""; } void printFieldList(const FieldList *FL) { OS << ""; for (const auto *F : FL->getChildren()) { printASTNode(F); } OS << ""; } void printBlockQuote(const BlockQuote *BQ) { OS << ""; for (const auto *N : BQ->getChildren()) { printASTNode(N); } OS << ""; } void printTextAndInline(const TextAndInline *T) { if (T->isLinePart()) { LinePart LP = T->getLinePart(); appendWithXMLEscaping(OS, LP.Text); } else { LineListRef LL = T->getLines(); for (unsigned i = 0, e = LL.size(); i != e; ++i) { appendWithXMLEscaping(OS, LL[i].Text.drop_front(LL[i].FirstTextByte)); if (i != e - 1) OS << '\n'; } } } void printPrivateExtension(const PrivateExtension *PE) { OS << ""; } }; void llvm::rest::convertToDocutilsXML(const Document *D, raw_ostream &OS) { CommentToDocutilsXMLConverter Converter(OS); Converter.printASTNode(D); } void ReSTASTNode::dump() const { CommentToDocutilsXMLConverter Converter(llvm::errs()); Converter.printASTNode(this); llvm::errs() << '\n'; } static unsigned measureReSTWhitespace(StringRef Text) { unsigned i = 0; for (unsigned e = Text.size(); i != e; ++i) { if (!isReSTWhitespace(Text[i])) break; } return i; } static unsigned measureReSTWord(StringRef Text) { unsigned i = 0; for (unsigned e = Text.size(); i != e; ++i) { if (isReSTWhitespace(Text[i])) break; } return i; } std::pair llvm::rest::extractWord(LinePart LP) { unsigned NumWordBytes = measureReSTWord(LP.Text); unsigned NumWhitespaceBytes = (NumWordBytes == 0) ? 0 : measureReSTWhitespace(LP.Text.drop_front(NumWordBytes)); LinePart Word = { LP.Text.substr(0, NumWordBytes), SourceRange(LP.Range.Start, LP.Range.Start.getAdvancedLoc(NumWordBytes))}; LinePart Rest = {LP.Text.drop_front(NumWordBytes + NumWhitespaceBytes), SourceRange(LP.Range.Start.getAdvancedLoc( NumWordBytes + NumWhitespaceBytes), LP.Range.End)}; return {Word, Rest}; } std::pair llvm::rest::extractWord(LineListRef LL) { for (unsigned i = 0, e = LL.size(); i != e; ++i) { const Line &L = LL[i]; StringRef Text = L.Text.drop_front(L.FirstTextByte); if (Text.empty()) continue; unsigned NumWordBytes = measureReSTWord(Text); unsigned NumWhitespaceBytes = (NumWordBytes == 0) ? 0 : measureReSTWhitespace(Text.drop_front(NumWordBytes)); LinePart Word = { Text.substr(0, NumWordBytes), SourceRange(L.Range.Start, L.Range.Start.getAdvancedLoc(NumWordBytes))}; LineListRef Rest = LL.subList(i, LL.size() - i); Rest.fromFirstLineDropFront(NumWordBytes + NumWhitespaceBytes); return {Word, Rest}; } return {LinePart(), LL}; } LinePart llvm::rest::extractWord(TextAndInline *TAI) { if (TAI->isLinePart()) { auto WordAndRest = ::extractWord(TAI->getLinePart()); TAI->setLinePart(WordAndRest.second); return WordAndRest.first; } else { auto WordAndRest = ::extractWord(TAI->getLines()); TAI->setLines(WordAndRest.second); return WordAndRest.first; } }