Files
swift-mirror/lib/ReST/Parser.cpp
2014-04-24 09:13:42 +00:00

1053 lines
33 KiB
C++

//===--- Parser.cpp - ReST parser -----------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "swift/ReST/Parser.h"
#include "Detail.h"
#include "swift/ReST/LineList.h"
#include "swift/ReST/XMLUtils.h"
#include "llvm/Support/ErrorHandling.h"
#include "clang/Basic/CharInfo.h"
using namespace llvm;
using namespace rest;
using namespace llvm::rest::detail;
using namespace clang;
namespace {
struct ParsedEnumerator {
LineKind Kind;
unsigned EnumeratorBytes;
unsigned Value;
};
} // unnamed namespace
static bool startsWithWhitespaceOrEOL(StringRef Text,
unsigned &WhitespaceBytes) {
if (Text.empty()) {
WhitespaceBytes = 0;
return true;
}
if (!isReSTWhitespace(Text.front()))
return false;
for (unsigned i = 1, e = Text.size(); i != e; ++i) {
if (!isReSTWhitespace(Text[i])) {
WhitespaceBytes = i;
return true;
}
}
WhitespaceBytes = Text.size();
return true;
}
/// Returns true on success.
static bool tryParseEnumerator(StringRef Text, ParsedEnumerator &PE) {
if (Text.empty()) {
return false;
}
if (Text[0] == '#') {
PE = { LineKind::EnumeratedListAuto, 1, 0 };
return true;
}
if (isDigit(Text[0])) {
unsigned EnumeratorBytes = 1;
for (unsigned e = Text.size(); EnumeratorBytes != e; ++EnumeratorBytes) {
if (!isDigit(Text[EnumeratorBytes]))
break;
}
unsigned Value;
if (Text.substr(0, EnumeratorBytes).getAsInteger(10, Value)) {
// FIXME: we should produce a diagnostic if there was an overflow.
return false;
}
PE = { LineKind::EnumeratedListArabic, EnumeratorBytes, Value };
return true;
}
// FIXME: implement other enumerator kinds.
return false;
}
static LineClassification tryParseEnumeratorWithFormatting(StringRef Text) {
// [ReST/Syntax Details/Body Elements/Enumerated Lists]
// Quote:
// The following formatting types are recognized:
//
// * suffixed with a period: "1.", "A.", "a.", "I.", "i.".
// * surrounded by parentheses: "(1)", "(A)", "(a)", "(I)", "(i)".
// * suffixed with a right-parenthesis: "1)", "A)", "a)", "I)", "i)".
ParsedEnumerator PE;
if (Text.startswith("(")) {
if (!tryParseEnumerator(Text.drop_front(1), PE))
return LineClassification::makeUnknown();
StringRef WithoutEnumeratorValue = Text.drop_front(1 + PE.EnumeratorBytes);
if (!WithoutEnumeratorValue.startswith(")"))
return LineClassification::makeUnknown();
StringRef WithoutEnumerator = WithoutEnumeratorValue.drop_front(1);
unsigned WhitespaceBytes;
if (startsWithWhitespaceOrEOL(WithoutEnumerator, WhitespaceBytes)) {
bool HasTextAfterEnumerator = WhitespaceBytes != WithoutEnumerator.size();
return LineClassification::makeEnumerated(
PE.Kind, EnumeratorStyleKind::SurroundedByParens,
HasTextAfterEnumerator, 1 + PE.EnumeratorBytes + 1 + WhitespaceBytes);
}
return LineClassification::makeUnknown();
}
if (tryParseEnumerator(Text, PE)) {
StringRef WithoutEnumeratorValue = Text.drop_front(PE.EnumeratorBytes);
bool IsDotAfter = WithoutEnumeratorValue.startswith(".");
bool IsParenAfter = WithoutEnumeratorValue.startswith(")");
if (!IsDotAfter && !IsParenAfter)
return LineClassification::makeUnknown();
StringRef WithoutEnumerator = WithoutEnumeratorValue.drop_front(1);
unsigned WhitespaceBytes;
if (startsWithWhitespaceOrEOL(WithoutEnumerator, WhitespaceBytes)) {
bool HasTextAfterEnumerator = WhitespaceBytes != WithoutEnumerator.size();
return LineClassification::makeEnumerated(
PE.Kind, IsDotAfter ? EnumeratorStyleKind::DotAfter
: EnumeratorStyleKind::ParenAfter,
HasTextAfterEnumerator, PE.EnumeratorBytes + 1 + WhitespaceBytes);
}
return LineClassification::makeUnknown();
}
return LineClassification::makeUnknown();
}
llvm::rest::detail::LineClassification
llvm::rest::detail::classifyLine(const Line &L) {
StringRef Text = L.Text.drop_front(L.FirstTextByte);
if (Text.empty())
return LineClassification::makeBlank();
// [ReST/Syntax Details/Body Elements/Field Lists]
// Quote:
// A field name may consist of any characters, but colons (":") inside of
// field names must be escaped with a backslash. Inline markup is parsed
// in field names.
// [...]
// The field marker is followed by whitespace and the field body.
//
// The initial check is very lightweight here (just look if there is a
// colon at the beginning), so handle this case first.
if (Text.startswith(":") && Text.size() >= 3 && Text[1] != ':') {
// This might be a field name. This is a field list if the line contains a
// colon that is not escaped, and the field name is not empty.
// REST-FIXME: clarify that the field name can not be empty.
unsigned i = 1;
if (Text[i] == '\\') {
// Skip the next character, it is escaped.
i += 2;
}
for (unsigned e = Text.size(); i != e; ++i) {
if (Text[i] == ':') {
unsigned FieldNameBytes = i - 1;
// Check that the second colon is followed by end of line or
// whitespace.
StringRef WithoutFieldMarker = Text.drop_front(i + 1);
unsigned WhitespaceBytes;
if (startsWithWhitespaceOrEOL(WithoutFieldMarker, WhitespaceBytes))
return LineClassification::makeFieldList(FieldNameBytes,
i + 1 + WhitespaceBytes);
else
break;
}
if (Text[i] == '\\') {
// Skip the next character, it is escaped.
++i;
if (i == e)
break;
}
}
}
// [ReST/Syntax Details/Body Elements/Bullet Lists]
// ReST allows the following characters to start a bulleted list:
// U+002A ASTERISK
// U+002B PLUS SIGN
// U+002D HYPHEN-MINUS
// U+2022 BULLET
// U+2023 TRIANGULAR BULLET
// U+2043 HYPHEN BULLET
//
// Note: the following code tries to avoid using heavy machinery to decode
// UTF-8.
LineKind Kind = LineKind::Unknown;
unsigned BulletBytes = 0;
if (Text.startswith("*")) {
Kind = LineKind::BulletListAsterisk;
BulletBytes = 1;
} else if (Text.startswith("+")) {
Kind = LineKind::BulletListPlus;
BulletBytes = 1;
} else if (Text.startswith("-")) {
Kind = LineKind::BulletListHyphenMinus;
BulletBytes = 1;
} else if (Text.startswith("\u2022")) {
Kind = LineKind::BulletListBullet;
BulletBytes = 3;
} else if (Text.startswith("\u2023")) {
Kind = LineKind::BulletListTriangularBullet;
BulletBytes = 3;
} else if (Text.startswith("\u2043")) {
Kind = LineKind::BulletListHyphenBullet;
BulletBytes = 3;
}
if (Kind != LineKind::Unknown) {
// We have a bullet. This is the initial line of a bullet list if the
// bullet is at the end of the line or is followed by whitespace.
StringRef WithoutBullet = Text.drop_front(BulletBytes);
unsigned WhitespaceBytes;
if (startsWithWhitespaceOrEOL(WithoutBullet, WhitespaceBytes))
return LineClassification::makeBullet(Kind,
BulletBytes + WhitespaceBytes);
Kind = LineKind::Unknown;
}
{
LineClassification MaybeEnumerator = tryParseEnumeratorWithFormatting(Text);
if (MaybeEnumerator.Kind != LineKind::Unknown)
return MaybeEnumerator;
}
// [ReST/Syntax Details/Body Elements/Option Lists]
// FIXME: implement later.
return LineClassification::makeUnknown();
}
static bool isDefinitionList(LineListRef LL) {
assert(LL.size() != 0);
if (LL.size() < 2)
return false;
if (LL[0].getClassification().Kind != LineKind::Unknown)
return false;
if (LL[1].getClassification().Kind == LineKind::Blank)
return false;
return LL[0].FirstTextCol < LL[1].FirstTextCol;
}
static bool isEnumeratedListItem(LineListRef LL) {
Optional<bool> IsListItem;
if (LL.isNextLineBlank(0))
IsListItem = true;
if (!IsListItem.hasValue() && LL[1].FirstTextCol == LL[0].FirstTextCol) {
bool IsNEELEL = isEnumerated(LL[1].getClassification().Kind) &&
LL[1].getClassification().hasTextAfterEnumerator();
if (IsNEELEL) {
if (LL[0].getClassification().Kind == LL[1].getClassification().Kind &&
LL[0].getClassification().getEnumeratorStyle() ==
LL[1].getClassification().getEnumeratorStyle())
IsListItem = true;
// FIXME: check numeric value of enumerator.
}
if (!IsListItem.hasValue())
IsListItem = false;
}
if (!IsListItem.hasValue() && LL[1].FirstTextCol < LL[0].FirstTextCol)
IsListItem = true;
if (!IsListItem.hasValue() &&
(LL[1].FirstTextCol <
LL[0].FirstTextCol +
LL[0].getClassification().getEnumeratorAndWhitespaceCols())) {
// Next line does not have enough indentation, so this line is not a
// list item.
IsListItem = false;
}
if (!IsListItem.hasValue())
IsListItem = true;
return IsListItem.getValue();
}
namespace {
class Parser {
ReSTContext &Context;
std::pair<ReSTASTNode *, unsigned> parseParagraph(LineListRef LL,
ColumnNum BaseIndentation);
std::pair<ReSTASTNode *, unsigned> parseBulletList(LineListRef LL);
std::pair<ReSTASTNode *, unsigned> parseEnumeratedList(LineListRef LL);
std::pair<ReSTASTNode *, unsigned> parseDefinitionList(LineListRef LL);
std::pair<ReSTASTNode *, unsigned> parseFieldList(LineListRef LL);
/// This might parse an idnented literal block or a block quote.
std::pair<ReSTASTNode *, unsigned>
parseUnresolvedIndentedBlock(LineListRef LL);
unsigned parseLevelImpl(LineListRef LL,
SmallVectorImpl<ReSTASTNode *> &Children,
ColumnNum BaseIndentation,
ColumnNum LeftMarginIndentation,
bool IgnoreIndentationOfTheFirstLine,
ColumnNum *MinIndentation);
unsigned parseLevel(LineListRef LL, SmallVectorImpl<ReSTASTNode *> &Children);
public:
Parser(ReSTContext &Context) : Context(Context) {}
Document *parseDocument(LineListRef LL);
};
} // unnamed namespace
std::pair<ReSTASTNode *, unsigned>
Parser::parseParagraph(LineListRef LL, ColumnNum BaseIndentation) {
assert(LL.size() != 0);
assert(LL[0].getClassification().Kind == LineKind::Unknown ||
isEnumerated(LL[0].getClassification().Kind));
unsigned i = 0;
for (unsigned e = LL.size(); i != e; ++i) {
if (LL[i].getClassification().Kind != LineKind::Blank &&
!(i == 0 && LL.isFirstLineTruncated())) {
if (LL[i].FirstTextCol > BaseIndentation) {
// Indent.
assert(i != 1 && "can not be a definition list");
// Unexpected indent. Paragraph ends here, the next line starts a new
// block.
break;
} else if (LL[i].FirstTextCol < BaseIndentation) {
// Unexpected unindent. Paragraph ends here, the next line should
// match up with something else we parsed previously.
break;
}
}
switch (LL[i].getClassification().Kind) {
case LineKind::Unknown:
continue;
case LineKind::Blank: {
// Paragraph ends at a blank line.
auto *P = new (Context)
Paragraph(new (Context) TextAndInline(LL.subList(0, i)));
return { P, i };
}
case LineKind::BulletListAsterisk:
case LineKind::BulletListPlus:
case LineKind::BulletListHyphenMinus:
case LineKind::BulletListBullet:
case LineKind::BulletListTriangularBullet:
case LineKind::BulletListHyphenBullet:
assert(!LL.isPreviousLineBlank(i));
continue;
case LineKind::EnumeratedListArabic:
case LineKind::EnumeratedListUppercaseAlphabet:
case LineKind::EnumeratedListLowercaseAlphabet:
case LineKind::EnumeratedListUppercaseRoman:
case LineKind::EnumeratedListLowercaseRoman:
case LineKind::EnumeratedListUppercaseAmbiguous:
case LineKind::EnumeratedListLowercaseAmbiguous:
case LineKind::EnumeratedListAuto:
assert(i == 0 || !LL.isPreviousLineBlank(i));
continue;
case LineKind::FieldList:
assert(!LL.isPreviousLineBlank(i));
continue;
}
}
auto *P =
new (Context) Paragraph(new (Context) TextAndInline(LL.subList(0, i)));
assert(i != 0);
return { P, i };
}
std::pair<ReSTASTNode *, unsigned> Parser::parseBulletList(LineListRef LL) {
SmallVector<BulletList::ListItemInfo, 4> ItemInfos;
SmallVector<ReSTASTNode *, 4> ItemChildren;
auto Kind = LL[0].getClassification().Kind;
ColumnNum BulletIndentation = LL[0].FirstTextCol;
unsigned i = 0;
for (unsigned e = LL.size(); i != e;) {
// At the beginning of every iteration, we are either at the beginning of
// the next list item or at the end of the list.
if (LL[i].getClassification().Kind != LineKind::Blank) {
if (LL[i].FirstTextCol > BulletIndentation) {
// Indent. Note that this indent is not large enough to line up with
// the previous item's children. The list ends here, the next line
// will start a block quote, but at the same nesting level as this
// list.
break;
} else if (LL[i].FirstTextCol < BulletIndentation) {
// Unexpected unindent. List ends here, the next line should match up
// with something else we parsed previously.
break;
}
}
bool IsEndOfList = false;
switch (LL[i].getClassification().Kind) {
case LineKind::Unknown:
IsEndOfList = true;
break;
case LineKind::Blank:
// Skip blank lines?
i++;
continue;
case LineKind::BulletListAsterisk:
case LineKind::BulletListPlus:
case LineKind::BulletListHyphenMinus:
case LineKind::BulletListBullet:
case LineKind::BulletListTriangularBullet:
case LineKind::BulletListHyphenBullet:
if (LL[i].getClassification().Kind != Kind)
IsEndOfList = true;
break;
case LineKind::EnumeratedListArabic:
case LineKind::EnumeratedListUppercaseAlphabet:
case LineKind::EnumeratedListLowercaseAlphabet:
case LineKind::EnumeratedListUppercaseRoman:
case LineKind::EnumeratedListLowercaseRoman:
case LineKind::EnumeratedListUppercaseAmbiguous:
case LineKind::EnumeratedListLowercaseAmbiguous:
case LineKind::EnumeratedListAuto:
IsEndOfList = true;
break;
case LineKind::FieldList:
IsEndOfList = true;
break;
}
if (IsEndOfList)
break;
// If we got here, this is the start of a list item.
auto SubLL = LL.dropFrontLines(i);
SubLL.fromFirstLineDropFront(
LL[i].getClassification().getBulletAndWhitespaceBytes());
SmallVector<ReSTASTNode *, 4> CurrItemChildren;
unsigned NumLines = parseLevel(SubLL, CurrItemChildren);
i += NumLines;
ItemInfos.push_back({ static_cast<unsigned>(ItemChildren.size()),
static_cast<unsigned>(CurrItemChildren.size()) });
ItemChildren.append(CurrItemChildren.begin(), CurrItemChildren.end());
}
auto *BL = BulletList::create(Context, ItemInfos, ItemChildren);
return { BL, i };
}
std::pair<ReSTASTNode *, unsigned> Parser::parseEnumeratedList(LineListRef LL) {
SmallVector<EnumeratedList::ListItemInfo, 4> ItemInfos;
SmallVector<ReSTASTNode *, 4> ItemChildren;
auto Kind = LL[0].getClassification().Kind;
auto EnumeratorStyle = LL[0].getClassification().getEnumeratorStyle();
ColumnNum EnumeratorIndentation = LL[0].FirstTextCol;
unsigned i = 0;
for (unsigned e = LL.size(); i != e;) {
// Invariant: at the beginning of every iteration, we are either at the
// beginning of the next list item or at the end of the list.
if (LL[i].getClassification().Kind != LineKind::Blank) {
if (LL[i].FirstTextCol > EnumeratorIndentation) {
// Indent. Note that this indent is not large enough to line up with
// the previous item's children. The list ends here, the next line
// will start a block quote, but at the same nesting level as this
// list.
break;
} else if (LL[i].FirstTextCol < EnumeratorIndentation) {
// Unexpected unindent. List ends here, the next line should match up
// with something else we parsed previously.
break;
}
}
bool IsEndOfList = false;
switch (LL[i].getClassification().Kind) {
case LineKind::Unknown:
IsEndOfList = true;
break;
case LineKind::Blank:
// Skip blank lines?
i++;
continue;
case LineKind::BulletListAsterisk:
case LineKind::BulletListPlus:
case LineKind::BulletListHyphenMinus:
case LineKind::BulletListBullet:
case LineKind::BulletListTriangularBullet:
case LineKind::BulletListHyphenBullet:
IsEndOfList = true;
break;
case LineKind::EnumeratedListArabic:
case LineKind::EnumeratedListUppercaseAlphabet:
case LineKind::EnumeratedListLowercaseAlphabet:
case LineKind::EnumeratedListUppercaseRoman:
case LineKind::EnumeratedListLowercaseRoman:
case LineKind::EnumeratedListUppercaseAmbiguous:
case LineKind::EnumeratedListLowercaseAmbiguous:
case LineKind::EnumeratedListAuto:
if (LL[i].getClassification().Kind != Kind ||
LL[i].getClassification().getEnumeratorStyle() != EnumeratorStyle)
IsEndOfList = true;
if (!isEnumeratedListItem(LL.dropFrontLines(i)))
IsEndOfList = true;
break;
case LineKind::FieldList:
IsEndOfList = true;
break;
}
if (IsEndOfList)
break;
// If we got here, this is the start of a list item.
auto SubLL = LL.dropFrontLines(i);
SubLL.fromFirstLineDropFront(
LL[i].getClassification().getEnumeratorAndWhitespaceBytes());
SmallVector<ReSTASTNode *, 4> CurrItemChildren;
unsigned NumLines = parseLevel(SubLL, CurrItemChildren);
i += NumLines;
ItemInfos.push_back({ static_cast<unsigned>(ItemChildren.size()),
static_cast<unsigned>(CurrItemChildren.size()) });
ItemChildren.append(CurrItemChildren.begin(), CurrItemChildren.end());
}
auto *EL = EnumeratedList::create(Context, ItemInfos, ItemChildren);
return { EL, i };
}
std::pair<ReSTASTNode *, unsigned> Parser::parseDefinitionList(LineListRef LL) {
assert(isDefinitionList(LL));
ColumnNum TermIndentation = LL[0].FirstTextCol;
SmallVector<DefinitionListItem *, 4> Children;
unsigned i = 0;
for (unsigned e = LL.size(); i != e;) {
// Invariant: at the beginning of every iteration, we are either at the
// beginning of the next list item or at the end of the list.
if (LL[i].FirstTextCol < TermIndentation) {
// Unindent. Definition list ends here.
break;
}
if (!isDefinitionList(LL.dropFrontLines(i)))
break;
// FIXME: parse the term line into term and classifiers.
auto Term = new (Context) TextAndInline(LL.subList(i, 1));
ColumnNum ItemBaseIndentation = LL[i + 1].FirstTextCol;
SmallVector<ReSTASTNode *, 4> ItemChildren;
unsigned NumLines = parseLevelImpl(
LL.dropFrontLines(i + 1), ItemChildren, ItemBaseIndentation,
TermIndentation + ColumnNum::make(1),
/*IgnoreIndentationOfTheFirstLine=*/false, nullptr);
Children.push_back(
DefinitionListItem::create(Context, Term, {}, ItemChildren));
i += 1 + NumLines;
}
auto *DL = DefinitionList::create(Context, Children);
assert(i != 0);
return { DL, i };
}
std::pair<ReSTASTNode *, unsigned> Parser::parseFieldList(LineListRef LL) {
assert(LL[0].getClassification().Kind == LineKind::FieldList);
ColumnNum FirstColonIndentation = LL[0].FirstTextCol;
SmallVector<Field *, 4> Children;
unsigned i = 0;
for (unsigned e = LL.size(); i != e;) {
// Invariant: at the beginning of every iteration, we are either at the
// beginning of the next list item or at the end of the list.
if (LL[i].FirstTextCol < FirstColonIndentation) {
// Unindent. Field list ends here.
break;
}
if (LL[i].getClassification().Kind != LineKind::FieldList)
break;
LinePart FieldNameText =
LL.getLinePart(i, LL[i].FirstTextByte + 1,
LL[i].getClassification().getFieldNameBytes());
auto FieldName = new (Context) TextAndInline(FieldNameText);
ColumnNum ItemBaseIndentation;
if (i + 1 != e) {
for (unsigned j = i + 1; j != e; ++j) {
if (LL[j].getClassification().Kind != LineKind::Blank) {
ItemBaseIndentation = LL[j].FirstTextCol;
break;
}
}
}
SmallVector<ReSTASTNode *, 4> BodyChildren;
auto SubLL = LL.dropFrontLines(i);
SubLL.fromFirstLineDropFront(
SubLL[0].getClassification().getFieldMarkerAndWhitespaceBytes());
unsigned NumLines =
parseLevelImpl(SubLL, BodyChildren, ItemBaseIndentation,
FirstColonIndentation + ColumnNum::make(1),
/*IgnoreIndentationOfTheFirstLine=*/true, nullptr);
Children.push_back(Field::create(Context, FieldName, BodyChildren));
i += NumLines;
}
auto *FL = FieldList::create(Context, Children);
assert(i != 0);
return { FL, i };
}
std::pair<ReSTASTNode *, unsigned>
Parser::parseUnresolvedIndentedBlock(LineListRef LL) {
SmallVector<ReSTASTNode *, 4> Children;
unsigned NumLines = parseLevel(LL, Children);
auto *BQ = BlockQuote::create(Context, Children);
assert(NumLines != 0);
return { BQ, NumLines };
}
unsigned Parser::parseLevelImpl(LineListRef LL,
SmallVectorImpl<ReSTASTNode *> &Children,
ColumnNum BaseIndentation,
ColumnNum LeftMarginIndentation,
bool IgnoreIndentationOfTheFirstLine,
ColumnNum *MinIndentation) {
assert(Children.size() == 0);
if (LL.empty())
return 0;
unsigned i = 0;
for (unsigned e = LL.size(); i != e;) {
if (LL[i].getClassification().Kind != LineKind::Blank &&
!(i == 0 && IgnoreIndentationOfTheFirstLine)) {
if (LL[i].FirstTextCol > BaseIndentation) {
// Indent.
//
// FIXME: parse a definition list or a block quote.
ReSTASTNode *N;
unsigned NumLines;
std::tie(N, NumLines) =
parseUnresolvedIndentedBlock(LL.dropFrontLines(i));
Children.push_back(N);
i += NumLines;
continue;
} else if (LL[i].FirstTextCol < LeftMarginIndentation) {
// Unexpected unindent. Current indentation level ends here, the next
// line should match up with something else we parsed previously.
break;
} else if (LL[i].FirstTextCol < BaseIndentation) {
auto *BQ = BlockQuote::create(Context, Children);
Children.clear();
Children.push_back(BQ);
BaseIndentation = LL[i].FirstTextCol;
}
}
switch (LL[i].getClassification().Kind) {
case LineKind::Unknown: {
auto SubLL = LL.dropFrontLines(i);
ReSTASTNode *N;
unsigned NumLines;
if (isDefinitionList(SubLL) &&
!(i == 0 && IgnoreIndentationOfTheFirstLine))
std::tie(N, NumLines) = parseDefinitionList(SubLL);
else
std::tie(N, NumLines) = parseParagraph(SubLL, BaseIndentation);
Children.push_back(N);
i += NumLines;
continue;
}
case LineKind::Blank:
// Skip blank lines?
i++;
continue;
case LineKind::BulletListAsterisk:
case LineKind::BulletListPlus:
case LineKind::BulletListHyphenMinus:
case LineKind::BulletListBullet:
case LineKind::BulletListTriangularBullet:
case LineKind::BulletListHyphenBullet: {
// If the line looks like a bullet list item, it is always a bullet list
// item, no further checks required.
ReSTASTNode *N;
unsigned NumLines;
std::tie(N, NumLines) = parseBulletList(LL.dropFrontLines(i));
Children.push_back(N);
i += NumLines;
continue;
}
case LineKind::EnumeratedListArabic:
case LineKind::EnumeratedListUppercaseAlphabet:
case LineKind::EnumeratedListLowercaseAlphabet:
case LineKind::EnumeratedListUppercaseRoman:
case LineKind::EnumeratedListLowercaseRoman:
case LineKind::EnumeratedListUppercaseAmbiguous:
case LineKind::EnumeratedListLowercaseAmbiguous:
case LineKind::EnumeratedListAuto: {
auto SubLL = LL.dropFrontLines(i);
bool IsListItem = isEnumeratedListItem(SubLL);
// FIXME: more checks on indentation?
ReSTASTNode *N;
unsigned NumLines;
if (IsListItem)
std::tie(N, NumLines) = parseEnumeratedList(SubLL);
else
std::tie(N, NumLines) = parseParagraph(SubLL, BaseIndentation);
Children.push_back(N);
i += NumLines;
continue;
}
case LineKind::FieldList: {
ReSTASTNode *N;
unsigned NumLines;
std::tie(N, NumLines) = parseFieldList(LL.dropFrontLines(i));
Children.push_back(N);
i += NumLines;
continue;
}
}
}
if (MinIndentation)
*MinIndentation = BaseIndentation;
assert(i != 0);
return i;
}
unsigned Parser::parseLevel(LineListRef LL,
SmallVectorImpl<ReSTASTNode *> &Children) {
if (LL.size() == 0)
return 0;
assert(LL[0].getClassification().Kind != LineKind::Blank);
ColumnNum Indentation = LL[0].FirstTextCol;
return parseLevelImpl(LL, Children, Indentation, Indentation,
/*IgnoreIndentationOfTheFirstLine=*/false, nullptr);
}
Document *Parser::parseDocument(LineListRef LL) {
unsigned i = 0;
for (unsigned e = LL.size(); i != e; ++i) {
if (LL[i].getClassification().Kind != LineKind::Blank)
break;
}
auto SubLL = LL.dropFrontLines(i);
if (SubLL.empty())
return Document::create(Context, {});
SmallVector<ReSTASTNode *, 8> Children;
ColumnNum MinIndentation;
unsigned NumLines = parseLevelImpl(
SubLL, Children, SubLL[0].FirstTextCol, ColumnNum::make(0),
/*IgnoreIndentationOfTheFirstLine=*/false, &MinIndentation);
assert(NumLines == SubLL.size());
if (!Context.LangOpts.IgnoreUniformIndentation &&
MinIndentation != ColumnNum::make(0)) {
auto *BQ = BlockQuote::create(Context, Children);
Children.clear();
Children.push_back(BQ);
}
return Document::create(Context, Children);
}
Document *llvm::rest::parseDocument(ReSTContext &C, LineListRef LL) {
Parser P(C);
return P.parseDocument(LL);
}
struct CommentToDocutilsXMLConverter {
raw_ostream &OS;
CommentToDocutilsXMLConverter(raw_ostream &OS) : OS(OS) {}
void printASTNode(const ReSTASTNode *N) {
switch (N->getKind()) {
case ASTNodeKind::Document:
printDocument(cast<Document>(N));
break;
case ASTNodeKind::Section:
case ASTNodeKind::Topic:
case ASTNodeKind::Sidebar:
case ASTNodeKind::Title:
case ASTNodeKind::Subtitle:
case ASTNodeKind::Transition:
llvm_unreachable("implement");
case ASTNodeKind::Paragraph:
printParagraph(cast<Paragraph>(N));
break;
case ASTNodeKind::BulletList:
printBulletList(cast<BulletList>(N));
break;
case ASTNodeKind::EnumeratedList:
printEnumeratedList(cast<EnumeratedList>(N));
break;
case ASTNodeKind::DefinitionListItem:
printDefinitionListItem(cast<DefinitionListItem>(N));
break;
case ASTNodeKind::DefinitionList:
printDefinitionList(cast<DefinitionList>(N));
break;
case ASTNodeKind::Field:
printField(cast<Field>(N));
break;
case ASTNodeKind::FieldList:
printFieldList(cast<FieldList>(N));
break;
case ASTNodeKind::BlockQuote:
printBlockQuote(cast<BlockQuote>(N));
break;
case ASTNodeKind::TextAndInline:
printTextAndInline(cast<TextAndInline>(N));
break;
case ASTNodeKind::PrivateExtension:
printPrivateExtension(cast<PrivateExtension>(N));
break;
}
}
void printDocument(const Document *D) {
OS << "<document>";
for (const auto *N : D->getChildren()) {
printASTNode(N);
}
OS << "</document>";
}
void printParagraph(const Paragraph *P) {
OS << "<paragraph>";
printTextAndInline(P->getContent());
OS << "</paragraph>";
}
void printBulletList(const BulletList *BL) {
OS << "<bullet_list>";
for (unsigned i = 0, e = BL->getNumItems(); i != e; ++i) {
OS << "<list_item>";
for (const auto *N : BL->getItemChildren(i)) {
printASTNode(N);
}
OS << "</list_item>";
}
OS << "</bullet_list>";
}
void printEnumeratedList(const EnumeratedList *EL) {
OS << "<enumerated_list>";
for (unsigned i = 0, e = EL->getNumItems(); i != e; ++i) {
OS << "<list_item>";
for (const auto *N : EL->getItemChildren(i)) {
printASTNode(N);
}
OS << "</list_item>";
}
OS << "</enumerated_list>";
}
void printDefinitionListItem(const DefinitionListItem *DLI) {
OS << "<definition_list_item>";
OS << "<term>";
printASTNode(DLI->getTerm());
OS << "</term>";
for (const auto *N : DLI->getClassifiers()) {
OS << "<classifier>";
printASTNode(N);
OS << "</classifier>";
}
OS << "<definition>";
for (const auto *N : DLI->getDefinitionChildren()) {
printASTNode(N);
}
OS << "</definition>";
OS << "</definition_list_item>";
}
void printDefinitionList(const DefinitionList *DL) {
OS << "<definition_list>";
for (const auto *N : DL->getChildren()) {
printASTNode(N);
}
OS << "</definition_list>";
}
void printField(const Field *F) {
OS << "<field>";
OS << "<field_name>";
printASTNode(F->getName());
OS << "</field_name>";
OS << "<field_body>";
for (const auto *N : F->getBodyChildren()) {
printASTNode(N);
}
OS << "</field_body>";
OS << "</field>";
}
void printFieldList(const FieldList *FL) {
OS << "<field_list>";
for (const auto *F : FL->getChildren()) {
printASTNode(F);
}
OS << "</field_list>";
}
void printBlockQuote(const BlockQuote *BQ) {
OS << "<block_quote>";
for (const auto *N : BQ->getChildren()) {
printASTNode(N);
}
OS << "</block_quote>";
}
void printTextAndInline(const TextAndInline *T) {
if (T->isLinePart()) {
LinePart LP = T->getLinePart();
appendWithXMLEscaping(OS, LP.Text);
} else {
LineListRef LL = T->getLines();
for (unsigned i = 0, e = LL.size(); i != e; ++i) {
appendWithXMLEscaping(OS, LL[i].Text.drop_front(LL[i].FirstTextByte));
if (i != e - 1)
OS << '\n';
}
}
}
void printPrivateExtension(const PrivateExtension *PE) {
OS << "<llvm:private_extension />";
}
};
void llvm::rest::convertToDocutilsXML(const Document *D, raw_ostream &OS) {
CommentToDocutilsXMLConverter Converter(OS);
Converter.printASTNode(D);
}
void ReSTASTNode::dump() const {
CommentToDocutilsXMLConverter Converter(llvm::errs());
Converter.printASTNode(this);
llvm::errs() << '\n';
}
static unsigned measureReSTWhitespace(StringRef Text) {
unsigned i = 0;
for (unsigned e = Text.size(); i != e; ++i) {
if (!isReSTWhitespace(Text[i]))
break;
}
return i;
}
static unsigned measureReSTWord(StringRef Text) {
unsigned i = 0;
for (unsigned e = Text.size(); i != e; ++i) {
if (isReSTWhitespace(Text[i]))
break;
}
return i;
}
std::pair<LinePart, LinePart> llvm::rest::extractWord(LinePart LP) {
unsigned NumWordBytes = measureReSTWord(LP.Text);
unsigned NumWhitespaceBytes =
(NumWordBytes == 0)
? 0
: measureReSTWhitespace(LP.Text.drop_front(NumWordBytes));
LinePart Word = {
LP.Text.substr(0, NumWordBytes),
SourceRange(LP.Range.Start, LP.Range.Start.getAdvancedLoc(NumWordBytes))};
LinePart Rest = {LP.Text.drop_front(NumWordBytes + NumWhitespaceBytes),
SourceRange(LP.Range.Start.getAdvancedLoc(
NumWordBytes + NumWhitespaceBytes),
LP.Range.End)};
return {Word, Rest};
}
std::pair<LinePart, LineListRef> llvm::rest::extractWord(LineListRef LL) {
for (unsigned i = 0, e = LL.size(); i != e; ++i) {
const Line &L = LL[i];
StringRef Text = L.Text.drop_front(L.FirstTextByte);
if (Text.empty())
continue;
unsigned NumWordBytes = measureReSTWord(Text);
unsigned NumWhitespaceBytes =
(NumWordBytes == 0)
? 0
: measureReSTWhitespace(Text.drop_front(NumWordBytes));
LinePart Word = {
Text.substr(0, NumWordBytes),
SourceRange(L.Range.Start, L.Range.Start.getAdvancedLoc(NumWordBytes))};
LineListRef Rest = LL.subList(i, LL.size() - i);
Rest.fromFirstLineDropFront(NumWordBytes + NumWhitespaceBytes);
return {Word, Rest};
}
return {LinePart(), LL};
}
LinePart llvm::rest::extractWord(TextAndInline *TAI) {
if (TAI->isLinePart()) {
auto WordAndRest = ::extractWord(TAI->getLinePart());
TAI->setLinePart(WordAndRest.second);
return WordAndRest.first;
} else {
auto WordAndRest = ::extractWord(TAI->getLines());
TAI->setLines(WordAndRest.second);
return WordAndRest.first;
}
}