swift-mirror/lib/ReST/Parser.cpp

//===--- Parser.cpp - ReST parser -----------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#include "swift/ReST/Parser.h"
#include "Detail.h"
#include "swift/ReST/LineList.h"
#include "swift/ReST/XMLUtils.h"
#include "llvm/Support/ErrorHandling.h"
#include "clang/Basic/CharInfo.h"

using namespace llvm;
using namespace rest;
using namespace llvm::rest::detail;

using namespace clang;

namespace {
struct ParsedEnumerator {
  LineKind Kind;
  unsigned EnumeratorBytes;
  unsigned Value;
};
} // unnamed namespace

static bool startsWithWhitespaceOrEOL(StringRef Text,
                                      unsigned &WhitespaceBytes) {
  if (Text.empty()) {
    WhitespaceBytes = 0;
    return true;
  }

  if (!isReSTWhitespace(Text.front()))
    return false;

  for (unsigned i = 1, e = Text.size(); i != e; ++i) {
    if (!isReSTWhitespace(Text[i])) {
      WhitespaceBytes = i;
      return true;
    }
  }
  WhitespaceBytes = Text.size();
  return true;
}

/// Returns true on success.
static bool tryParseEnumerator(StringRef Text, ParsedEnumerator &PE) {
  if (Text.empty()) {
    return false;
  }

  if (Text[0] == '#') {
    PE = { LineKind::EnumeratedListAuto, 1, 0 };
    return true;
  }

  if (isDigit(Text[0])) {
    unsigned EnumeratorBytes = 1;
    for (unsigned e = Text.size(); EnumeratorBytes != e; ++EnumeratorBytes) {
      if (!isDigit(Text[EnumeratorBytes]))
        break;
    }
    unsigned Value;
    if (Text.substr(0, EnumeratorBytes).getAsInteger(10, Value)) {
      // FIXME: we should produce a diagnostic if there was an overflow.
      return false;
    }
    PE = { LineKind::EnumeratedListArabic, EnumeratorBytes, Value };
    return true;
  }

  // FIXME: implement other enumerator kinds.

  return false;
}

static LineClassification tryParseEnumeratorWithFormatting(StringRef Text) {
  // [ReST/Syntax Details/Body Elements/Enumerated Lists]
  // Quote:
  //    The following formatting types are recognized:
  //
  //    * suffixed with a period: "1.", "A.", "a.", "I.", "i.".
  //    * surrounded by parentheses: "(1)", "(A)", "(a)", "(I)", "(i)".
  //    * suffixed with a right-parenthesis: "1)", "A)", "a)", "I)", "i)".
  ParsedEnumerator PE;
  if (Text.startswith("(")) {
    if (!tryParseEnumerator(Text.drop_front(1), PE))
      return LineClassification::makeUnknown();

    StringRef WithoutEnumeratorValue = Text.drop_front(1 + PE.EnumeratorBytes);
    if (!WithoutEnumeratorValue.startswith(")"))
      return LineClassification::makeUnknown();

    StringRef WithoutEnumerator = WithoutEnumeratorValue.drop_front(1);
    unsigned WhitespaceBytes;
    if (startsWithWhitespaceOrEOL(WithoutEnumerator, WhitespaceBytes)) {
      bool HasTextAfterEnumerator = WhitespaceBytes != WithoutEnumerator.size();
      return LineClassification::makeEnumerated(
          PE.Kind, EnumeratorStyleKind::SurroundedByParens,
          HasTextAfterEnumerator, 1 + PE.EnumeratorBytes + 1 + WhitespaceBytes);
    }

    return LineClassification::makeUnknown();
  }
  if (tryParseEnumerator(Text, PE)) {
    StringRef WithoutEnumeratorValue = Text.drop_front(PE.EnumeratorBytes);
    bool IsDotAfter = WithoutEnumeratorValue.startswith(".");
    bool IsParenAfter = WithoutEnumeratorValue.startswith(")");
    if (!IsDotAfter && !IsParenAfter)
      return LineClassification::makeUnknown();

    StringRef WithoutEnumerator = WithoutEnumeratorValue.drop_front(1);
    unsigned WhitespaceBytes;
    if (startsWithWhitespaceOrEOL(WithoutEnumerator, WhitespaceBytes)) {
      bool HasTextAfterEnumerator = WhitespaceBytes != WithoutEnumerator.size();
      return LineClassification::makeEnumerated(
          PE.Kind, IsDotAfter ? EnumeratorStyleKind::DotAfter
                              : EnumeratorStyleKind::ParenAfter,
          HasTextAfterEnumerator, PE.EnumeratorBytes + 1 + WhitespaceBytes);
    }

    return LineClassification::makeUnknown();
  }
  return LineClassification::makeUnknown();
}

llvm::rest::detail::LineClassification
llvm::rest::detail::classifyLine(const Line &L) {
  StringRef Text = L.Text.drop_front(L.FirstTextByte);

  if (Text.empty())
    return LineClassification::makeBlank();

  // [ReST/Syntax Details/Body Elements/Field Lists]
  // Quote:
  //    A field name may consist of any characters, but colons (":") inside of
  //    field names must be escaped with a backslash.  Inline markup is parsed
  //    in field names.
  //    [...]
  //    The field marker is followed by whitespace and the field body.
  //
  // The initial check is very lightweight here (just look if there is a
  // colon at the beginning), so handle this case first.
  if (Text.startswith(":") && Text.size() >= 3 && Text[1] != ':') {
    // This might be a field name.  This is a field list if the line contains a
    // colon that is not escaped, and the field name is not empty.
    // REST-FIXME: clarify that the field name can not be empty.
    unsigned i = 1;
    if (Text[i] == '\\') {
      // Skip the next character, it is escaped.
      i += 2;
    }
    for (unsigned e = Text.size(); i != e; ++i) {
      if (Text[i] == ':') {
        unsigned FieldNameBytes = i - 1;
        // Check that the second colon is followed by end of line or
        // whitespace.
        StringRef WithoutFieldMarker = Text.drop_front(i + 1);
        unsigned WhitespaceBytes;
        if (startsWithWhitespaceOrEOL(WithoutFieldMarker, WhitespaceBytes))
          return LineClassification::makeFieldList(FieldNameBytes,
                                                   i + 1 + WhitespaceBytes);
        else
          break;
      }
      if (Text[i] == '\\') {
        // Skip the next character, it is escaped.
        ++i;
        if (i == e)
          break;
      }
    }
  }

  // [ReST/Syntax Details/Body Elements/Bullet Lists]
  // ReST allows the following characters to start a bulleted list:
  // U+002A ASTERISK
  // U+002B PLUS SIGN
  // U+002D HYPHEN-MINUS
  // U+2022 BULLET
  // U+2023 TRIANGULAR BULLET
  // U+2043 HYPHEN BULLET
  //
  // Note: the following code tries to avoid using heavy machinery to decode
  // UTF-8.
  LineKind Kind = LineKind::Unknown;
  unsigned BulletBytes = 0;
  if (Text.startswith("*")) {
    Kind = LineKind::BulletListAsterisk;
    BulletBytes = 1;
  } else if (Text.startswith("+")) {
    Kind = LineKind::BulletListPlus;
    BulletBytes = 1;
  } else if (Text.startswith("-")) {
    Kind = LineKind::BulletListHyphenMinus;
    BulletBytes = 1;
  } else if (Text.startswith("\u2022")) {
    Kind = LineKind::BulletListBullet;
    BulletBytes = 3;
  } else if (Text.startswith("\u2023")) {
    Kind = LineKind::BulletListTriangularBullet;
    BulletBytes = 3;
  } else if (Text.startswith("\u2043")) {
    Kind = LineKind::BulletListHyphenBullet;
    BulletBytes = 3;
  }
  if (Kind != LineKind::Unknown) {
    // We have a bullet.  This is the initial line of a bullet list if the
    // bullet is at the end of the line or is followed by whitespace.
    StringRef WithoutBullet = Text.drop_front(BulletBytes);
    unsigned WhitespaceBytes;
    if (startsWithWhitespaceOrEOL(WithoutBullet, WhitespaceBytes))
      return LineClassification::makeBullet(Kind,
                                            BulletBytes + WhitespaceBytes);

    Kind = LineKind::Unknown;
  }

  {
    LineClassification MaybeEnumerator = tryParseEnumeratorWithFormatting(Text);
    if (MaybeEnumerator.Kind != LineKind::Unknown)
      return MaybeEnumerator;
  }

  // [ReST/Syntax Details/Body Elements/Option Lists]
  // FIXME: implement later.

  return LineClassification::makeUnknown();
}

static bool isDefinitionList(LineListRef LL) {
  assert(LL.size() != 0);
  if (LL.size() < 2)
    return false;

  if (LL[0].getClassification().Kind != LineKind::Unknown)
    return false;

  if (LL[1].getClassification().Kind == LineKind::Blank)
    return false;

  return LL[0].FirstTextCol < LL[1].FirstTextCol;
}

static bool isEnumeratedListItem(LineListRef LL) {
  Optional<bool> IsListItem;
  if (LL.isNextLineBlank(0))
    IsListItem = true;

  if (!IsListItem.hasValue() && LL[1].FirstTextCol == LL[0].FirstTextCol) {
    bool IsNEELEL = isEnumerated(LL[1].getClassification().Kind) &&
                    LL[1].getClassification().hasTextAfterEnumerator();
    if (IsNEELEL) {
      if (LL[0].getClassification().Kind == LL[1].getClassification().Kind &&
          LL[0].getClassification().getEnumeratorStyle() ==
              LL[1].getClassification().getEnumeratorStyle())
        IsListItem = true;
      // FIXME: check numeric value of enumerator.
    }
    if (!IsListItem.hasValue())
      IsListItem = false;
  }

  if (!IsListItem.hasValue() && LL[1].FirstTextCol < LL[0].FirstTextCol)
    IsListItem = true;

  if (!IsListItem.hasValue() &&
      (LL[1].FirstTextCol <
       LL[0].FirstTextCol +
           LL[0].getClassification().getEnumeratorAndWhitespaceCols())) {
    // Next line does not have enough indentation, so this line is not a
    // list item.
    IsListItem = false;
  }

  if (!IsListItem.hasValue())
    IsListItem = true;
  return IsListItem.getValue();
}

namespace {
class Parser {
  ReSTContext &Context;

  std::pair<ReSTASTNode *, unsigned> parseParagraph(LineListRef LL,
                                                    ColumnNum BaseIndentation);
  std::pair<ReSTASTNode *, unsigned> parseBulletList(LineListRef LL);
  std::pair<ReSTASTNode *, unsigned> parseEnumeratedList(LineListRef LL);
  std::pair<ReSTASTNode *, unsigned> parseDefinitionList(LineListRef LL);
  std::pair<ReSTASTNode *, unsigned> parseFieldList(LineListRef LL);

  /// This might parse an idnented literal block or a block quote.
  std::pair<ReSTASTNode *, unsigned>
  parseUnresolvedIndentedBlock(LineListRef LL);

  unsigned parseLevelImpl(LineListRef LL,
                          SmallVectorImpl<ReSTASTNode *> &Children,
                          ColumnNum BaseIndentation,
                          ColumnNum LeftMarginIndentation,
                          bool IgnoreIndentationOfTheFirstLine,
                          ColumnNum *MinIndentation);

  unsigned parseLevel(LineListRef LL, SmallVectorImpl<ReSTASTNode *> &Children);

public:
  Parser(ReSTContext &Context) : Context(Context) {}

  Document *parseDocument(LineListRef LL);
};
} // unnamed namespace

std::pair<ReSTASTNode *, unsigned>
Parser::parseParagraph(LineListRef LL, ColumnNum BaseIndentation) {
  assert(LL.size() != 0);
  assert(LL[0].getClassification().Kind == LineKind::Unknown ||
         isEnumerated(LL[0].getClassification().Kind));
  unsigned i = 0;
  for (unsigned e = LL.size(); i != e; ++i) {
    if (LL[i].getClassification().Kind != LineKind::Blank &&
        !(i == 0 && LL.isFirstLineTruncated())) {
      if (LL[i].FirstTextCol > BaseIndentation) {
        // Indent.
        assert(i != 1 && "can not be a definition list");
        // Unexpected indent.  Paragraph ends here, the next line starts a new
        // block.
        break;
      } else if (LL[i].FirstTextCol < BaseIndentation) {
        // Unexpected unindent.  Paragraph ends here, the next line should
        // match up with something else we parsed previously.
        break;
      }
    }

    switch (LL[i].getClassification().Kind) {
    case LineKind::Unknown:
      continue;

    case LineKind::Blank: {
      // Paragraph ends at a blank line.
      auto *P = new (Context)
          Paragraph(new (Context) TextAndInline(LL.subList(0, i)));
      return { P, i };
    }

    case LineKind::BulletListAsterisk:
    case LineKind::BulletListPlus:
    case LineKind::BulletListHyphenMinus:
    case LineKind::BulletListBullet:
    case LineKind::BulletListTriangularBullet:
    case LineKind::BulletListHyphenBullet:
      assert(!LL.isPreviousLineBlank(i));
      continue;

    case LineKind::EnumeratedListArabic:
    case LineKind::EnumeratedListUppercaseAlphabet:
    case LineKind::EnumeratedListLowercaseAlphabet:
    case LineKind::EnumeratedListUppercaseRoman:
    case LineKind::EnumeratedListLowercaseRoman:
    case LineKind::EnumeratedListUppercaseAmbiguous:
    case LineKind::EnumeratedListLowercaseAmbiguous:
    case LineKind::EnumeratedListAuto:
      assert(i == 0 || !LL.isPreviousLineBlank(i));
      continue;

    case LineKind::FieldList:
      assert(!LL.isPreviousLineBlank(i));
      continue;
    }
  }
  auto *P =
      new (Context) Paragraph(new (Context) TextAndInline(LL.subList(0, i)));
  assert(i != 0);
  return { P, i };
}

std::pair<ReSTASTNode *, unsigned> Parser::parseBulletList(LineListRef LL) {
  SmallVector<BulletList::ListItemInfo, 4> ItemInfos;
  SmallVector<ReSTASTNode *, 4> ItemChildren;

  auto Kind = LL[0].getClassification().Kind;
  ColumnNum BulletIndentation = LL[0].FirstTextCol;

  unsigned i = 0;
  for (unsigned e = LL.size(); i != e;) {
    // At the beginning of every iteration, we are either at the beginning of
    // the next list item or at the end of the list.
    if (LL[i].getClassification().Kind != LineKind::Blank) {
      if (LL[i].FirstTextCol > BulletIndentation) {
        // Indent.  Note that this indent is not large enough to line up with
        // the previous item's children.  The list ends here, the next line
        // will start a block quote, but at the same nesting level as this
        // list.
        break;
      } else if (LL[i].FirstTextCol < BulletIndentation) {
        // Unexpected unindent.  List ends here, the next line should match up
        // with something else we parsed previously.
        break;
      }
    }

    bool IsEndOfList = false;
    switch (LL[i].getClassification().Kind) {
    case LineKind::Unknown:
      IsEndOfList = true;
      break;

    case LineKind::Blank:
      // Skip blank lines?
      i++;
      continue;

    case LineKind::BulletListAsterisk:
    case LineKind::BulletListPlus:
    case LineKind::BulletListHyphenMinus:
    case LineKind::BulletListBullet:
    case LineKind::BulletListTriangularBullet:
    case LineKind::BulletListHyphenBullet:
      if (LL[i].getClassification().Kind != Kind)
        IsEndOfList = true;
      break;

    case LineKind::EnumeratedListArabic:
    case LineKind::EnumeratedListUppercaseAlphabet:
    case LineKind::EnumeratedListLowercaseAlphabet:
    case LineKind::EnumeratedListUppercaseRoman:
    case LineKind::EnumeratedListLowercaseRoman:
    case LineKind::EnumeratedListUppercaseAmbiguous:
    case LineKind::EnumeratedListLowercaseAmbiguous:
    case LineKind::EnumeratedListAuto:
      IsEndOfList = true;
      break;

    case LineKind::FieldList:
      IsEndOfList = true;
      break;
    }
    if (IsEndOfList)
      break;

    // If we got here, this is the start of a list item.
    auto SubLL = LL.dropFrontLines(i);
    SubLL.fromFirstLineDropFront(
        LL[i].getClassification().getBulletAndWhitespaceBytes());
    SmallVector<ReSTASTNode *, 4> CurrItemChildren;
    unsigned NumLines = parseLevel(SubLL, CurrItemChildren);
    i += NumLines;
    ItemInfos.push_back({ static_cast<unsigned>(ItemChildren.size()),
                          static_cast<unsigned>(CurrItemChildren.size()) });
    ItemChildren.append(CurrItemChildren.begin(), CurrItemChildren.end());
  }

  auto *BL = BulletList::create(Context, ItemInfos, ItemChildren);
  return { BL, i };
}

std::pair<ReSTASTNode *, unsigned> Parser::parseEnumeratedList(LineListRef LL) {
  SmallVector<EnumeratedList::ListItemInfo, 4> ItemInfos;
  SmallVector<ReSTASTNode *, 4> ItemChildren;

  auto Kind = LL[0].getClassification().Kind;
  auto EnumeratorStyle = LL[0].getClassification().getEnumeratorStyle();
  ColumnNum EnumeratorIndentation = LL[0].FirstTextCol;

  unsigned i = 0;
  for (unsigned e = LL.size(); i != e;) {
    // Invariant: at the beginning of every iteration, we are either at the
    // beginning of the next list item or at the end of the list.

    if (LL[i].getClassification().Kind != LineKind::Blank) {
      if (LL[i].FirstTextCol > EnumeratorIndentation) {
        // Indent.  Note that this indent is not large enough to line up with
        // the previous item's children.  The list ends here, the next line
        // will start a block quote, but at the same nesting level as this
        // list.
        break;
      } else if (LL[i].FirstTextCol < EnumeratorIndentation) {
        // Unexpected unindent.  List ends here, the next line should match up
        // with something else we parsed previously.
        break;
      }
    }

    bool IsEndOfList = false;
    switch (LL[i].getClassification().Kind) {
    case LineKind::Unknown:
      IsEndOfList = true;
      break;

    case LineKind::Blank:
      // Skip blank lines?
      i++;
      continue;

    case LineKind::BulletListAsterisk:
    case LineKind::BulletListPlus:
    case LineKind::BulletListHyphenMinus:
    case LineKind::BulletListBullet:
    case LineKind::BulletListTriangularBullet:
    case LineKind::BulletListHyphenBullet:
      IsEndOfList = true;
      break;

    case LineKind::EnumeratedListArabic:
    case LineKind::EnumeratedListUppercaseAlphabet:
    case LineKind::EnumeratedListLowercaseAlphabet:
    case LineKind::EnumeratedListUppercaseRoman:
    case LineKind::EnumeratedListLowercaseRoman:
    case LineKind::EnumeratedListUppercaseAmbiguous:
    case LineKind::EnumeratedListLowercaseAmbiguous:
    case LineKind::EnumeratedListAuto:
      if (LL[i].getClassification().Kind != Kind ||
          LL[i].getClassification().getEnumeratorStyle() != EnumeratorStyle)
        IsEndOfList = true;
      if (!isEnumeratedListItem(LL.dropFrontLines(i)))
        IsEndOfList = true;
      break;

    case LineKind::FieldList:
      IsEndOfList = true;
      break;
    }
    if (IsEndOfList)
      break;

    // If we got here, this is the start of a list item.
    auto SubLL = LL.dropFrontLines(i);
    SubLL.fromFirstLineDropFront(
        LL[i].getClassification().getEnumeratorAndWhitespaceBytes());
    SmallVector<ReSTASTNode *, 4> CurrItemChildren;
    unsigned NumLines = parseLevel(SubLL, CurrItemChildren);
    i += NumLines;
    ItemInfos.push_back({ static_cast<unsigned>(ItemChildren.size()),
                          static_cast<unsigned>(CurrItemChildren.size()) });
    ItemChildren.append(CurrItemChildren.begin(), CurrItemChildren.end());
  }

  auto *EL = EnumeratedList::create(Context, ItemInfos, ItemChildren);
  return { EL, i };
}

std::pair<ReSTASTNode *, unsigned> Parser::parseDefinitionList(LineListRef LL) {
  assert(isDefinitionList(LL));

  ColumnNum TermIndentation = LL[0].FirstTextCol;

  SmallVector<DefinitionListItem *, 4> Children;

  unsigned i = 0;
  for (unsigned e = LL.size(); i != e;) {
    // Invariant: at the beginning of every iteration, we are either at the
    // beginning of the next list item or at the end of the list.

    if (LL[i].FirstTextCol < TermIndentation) {
      // Unindent.  Definition list ends here.
      break;
    }

    if (!isDefinitionList(LL.dropFrontLines(i)))
      break;

    // FIXME: parse the term line into term and classifiers.
    auto Term = new (Context) TextAndInline(LL.subList(i, 1));

    ColumnNum ItemBaseIndentation = LL[i + 1].FirstTextCol;
    SmallVector<ReSTASTNode *, 4> ItemChildren;
    unsigned NumLines = parseLevelImpl(
        LL.dropFrontLines(i + 1), ItemChildren, ItemBaseIndentation,
        TermIndentation + ColumnNum::make(1),
        /*IgnoreIndentationOfTheFirstLine=*/false, nullptr);
    Children.push_back(
        DefinitionListItem::create(Context, Term, {}, ItemChildren));
    i += 1 + NumLines;
  }

  auto *DL = DefinitionList::create(Context, Children);
  assert(i != 0);
  return { DL, i };
}

std::pair<ReSTASTNode *, unsigned> Parser::parseFieldList(LineListRef LL) {
  assert(LL[0].getClassification().Kind == LineKind::FieldList);

  ColumnNum FirstColonIndentation = LL[0].FirstTextCol;

  SmallVector<Field *, 4> Children;

  unsigned i = 0;
  for (unsigned e = LL.size(); i != e;) {
    // Invariant: at the beginning of every iteration, we are either at the
    // beginning of the next list item or at the end of the list.

    if (LL[i].FirstTextCol < FirstColonIndentation) {
      // Unindent.  Field list ends here.
      break;
    }

    if (LL[i].getClassification().Kind != LineKind::FieldList)
      break;

    LinePart FieldNameText =
        LL.getLinePart(i, LL[i].FirstTextByte + 1,
                       LL[i].getClassification().getFieldNameBytes());
    auto FieldName = new (Context) TextAndInline(FieldNameText);

    ColumnNum ItemBaseIndentation;
    if (i + 1 != e) {
      for (unsigned j = i + 1; j != e; ++j) {
        if (LL[j].getClassification().Kind != LineKind::Blank) {
          ItemBaseIndentation = LL[j].FirstTextCol;
          break;
        }
      }
    }
    SmallVector<ReSTASTNode *, 4> BodyChildren;
    auto SubLL = LL.dropFrontLines(i);
    SubLL.fromFirstLineDropFront(
        SubLL[0].getClassification().getFieldMarkerAndWhitespaceBytes());
    unsigned NumLines =
        parseLevelImpl(SubLL, BodyChildren, ItemBaseIndentation,
                       FirstColonIndentation + ColumnNum::make(1),
                       /*IgnoreIndentationOfTheFirstLine=*/true, nullptr);
    Children.push_back(Field::create(Context, FieldName, BodyChildren));
    i += NumLines;
  }

  auto *FL = FieldList::create(Context, Children);
  assert(i != 0);
  return { FL, i };
}

std::pair<ReSTASTNode *, unsigned>
Parser::parseUnresolvedIndentedBlock(LineListRef LL) {
  SmallVector<ReSTASTNode *, 4> Children;
  unsigned NumLines = parseLevel(LL, Children);

  auto *BQ = BlockQuote::create(Context, Children);
  assert(NumLines != 0);
  return { BQ, NumLines };
}

unsigned Parser::parseLevelImpl(LineListRef LL,
                                SmallVectorImpl<ReSTASTNode *> &Children,
                                ColumnNum BaseIndentation,
                                ColumnNum LeftMarginIndentation,
                                bool IgnoreIndentationOfTheFirstLine,
                                ColumnNum *MinIndentation) {
  assert(Children.size() == 0);
  if (LL.empty())
    return 0;

  unsigned i = 0;
  for (unsigned e = LL.size(); i != e;) {
    if (LL[i].getClassification().Kind != LineKind::Blank &&
        !(i == 0 && IgnoreIndentationOfTheFirstLine)) {
      if (LL[i].FirstTextCol > BaseIndentation) {
        // Indent.
        //
        // FIXME: parse a definition list or a block quote.
        ReSTASTNode *N;
        unsigned NumLines;
        std::tie(N, NumLines) =
            parseUnresolvedIndentedBlock(LL.dropFrontLines(i));
        Children.push_back(N);
        i += NumLines;
        continue;
      } else if (LL[i].FirstTextCol < LeftMarginIndentation) {
        // Unexpected unindent.  Current indentation level ends here, the next
        // line should match up with something else we parsed previously.
        break;
      } else if (LL[i].FirstTextCol < BaseIndentation) {
        auto *BQ = BlockQuote::create(Context, Children);
        Children.clear();
        Children.push_back(BQ);
        BaseIndentation = LL[i].FirstTextCol;
      }
    }

    switch (LL[i].getClassification().Kind) {
    case LineKind::Unknown: {
      auto SubLL = LL.dropFrontLines(i);
      ReSTASTNode *N;
      unsigned NumLines;
      if (isDefinitionList(SubLL) &&
          !(i == 0 && IgnoreIndentationOfTheFirstLine))
        std::tie(N, NumLines) = parseDefinitionList(SubLL);
      else
        std::tie(N, NumLines) = parseParagraph(SubLL, BaseIndentation);
      Children.push_back(N);
      i += NumLines;
      continue;
    }

    case LineKind::Blank:
      // Skip blank lines?
      i++;
      continue;

    case LineKind::BulletListAsterisk:
    case LineKind::BulletListPlus:
    case LineKind::BulletListHyphenMinus:
    case LineKind::BulletListBullet:
    case LineKind::BulletListTriangularBullet:
    case LineKind::BulletListHyphenBullet: {
      // If the line looks like a bullet list item, it is always a bullet list
      // item, no further checks required.
      ReSTASTNode *N;
      unsigned NumLines;
      std::tie(N, NumLines) = parseBulletList(LL.dropFrontLines(i));
      Children.push_back(N);
      i += NumLines;
      continue;
    }

    case LineKind::EnumeratedListArabic:
    case LineKind::EnumeratedListUppercaseAlphabet:
    case LineKind::EnumeratedListLowercaseAlphabet:
    case LineKind::EnumeratedListUppercaseRoman:
    case LineKind::EnumeratedListLowercaseRoman:
    case LineKind::EnumeratedListUppercaseAmbiguous:
    case LineKind::EnumeratedListLowercaseAmbiguous:
    case LineKind::EnumeratedListAuto: {
      auto SubLL = LL.dropFrontLines(i);
      bool IsListItem = isEnumeratedListItem(SubLL);
      // FIXME: more checks on indentation?
      ReSTASTNode *N;
      unsigned NumLines;
      if (IsListItem)
        std::tie(N, NumLines) = parseEnumeratedList(SubLL);
      else
        std::tie(N, NumLines) = parseParagraph(SubLL, BaseIndentation);
      Children.push_back(N);
      i += NumLines;
      continue;
    }

    case LineKind::FieldList: {
      ReSTASTNode *N;
      unsigned NumLines;
      std::tie(N, NumLines) = parseFieldList(LL.dropFrontLines(i));
      Children.push_back(N);
      i += NumLines;
      continue;
    }
    }
  }
  if (MinIndentation)
    *MinIndentation = BaseIndentation;
  assert(i != 0);
  return i;
}

unsigned Parser::parseLevel(LineListRef LL,
                            SmallVectorImpl<ReSTASTNode *> &Children) {
  if (LL.size() == 0)
    return 0;

  assert(LL[0].getClassification().Kind != LineKind::Blank);
  ColumnNum Indentation = LL[0].FirstTextCol;
  return parseLevelImpl(LL, Children, Indentation, Indentation,
                        /*IgnoreIndentationOfTheFirstLine=*/false, nullptr);
}

Document *Parser::parseDocument(LineListRef LL) {
  unsigned i = 0;
  for (unsigned e = LL.size(); i != e; ++i) {
    if (LL[i].getClassification().Kind != LineKind::Blank)
      break;
  }

  auto SubLL = LL.dropFrontLines(i);

  if (SubLL.empty())
    return Document::create(Context, {});

  SmallVector<ReSTASTNode *, 8> Children;
  ColumnNum MinIndentation;
  unsigned NumLines = parseLevelImpl(
      SubLL, Children, SubLL[0].FirstTextCol, ColumnNum::make(0),
      /*IgnoreIndentationOfTheFirstLine=*/false, &MinIndentation);
  assert(NumLines == SubLL.size());

  if (!Context.LangOpts.IgnoreUniformIndentation &&
      MinIndentation != ColumnNum::make(0)) {
    auto *BQ = BlockQuote::create(Context, Children);
    Children.clear();
    Children.push_back(BQ);
  }

  return Document::create(Context, Children);
}

Document *llvm::rest::parseDocument(ReSTContext &C, LineListRef LL) {
  Parser P(C);
  return P.parseDocument(LL);
}

struct CommentToDocutilsXMLConverter {
  raw_ostream &OS;

  CommentToDocutilsXMLConverter(raw_ostream &OS) : OS(OS) {}

  void printASTNode(const ReSTASTNode *N) {
    switch (N->getKind()) {
    case ASTNodeKind::Document:
      printDocument(cast<Document>(N));
      break;

    case ASTNodeKind::Section:
    case ASTNodeKind::Topic:
    case ASTNodeKind::Sidebar:
    case ASTNodeKind::Title:
    case ASTNodeKind::Subtitle:
    case ASTNodeKind::Transition:
      llvm_unreachable("implement");

    case ASTNodeKind::Paragraph:
      printParagraph(cast<Paragraph>(N));
      break;
    case ASTNodeKind::BulletList:
      printBulletList(cast<BulletList>(N));
      break;
    case ASTNodeKind::EnumeratedList:
      printEnumeratedList(cast<EnumeratedList>(N));
      break;
    case ASTNodeKind::DefinitionListItem:
      printDefinitionListItem(cast<DefinitionListItem>(N));
      break;
    case ASTNodeKind::DefinitionList:
      printDefinitionList(cast<DefinitionList>(N));
      break;
    case ASTNodeKind::Field:
      printField(cast<Field>(N));
      break;
    case ASTNodeKind::FieldList:
      printFieldList(cast<FieldList>(N));
      break;
    case ASTNodeKind::BlockQuote:
      printBlockQuote(cast<BlockQuote>(N));
      break;
    case ASTNodeKind::TextAndInline:
      printTextAndInline(cast<TextAndInline>(N));
      break;
    case ASTNodeKind::PrivateExtension:
      printPrivateExtension(cast<PrivateExtension>(N));
      break;
    }
  }

  void printDocument(const Document *D) {
    OS << "<document>";
    for (const auto *N : D->getChildren()) {
      printASTNode(N);
    }
    OS << "</document>";
  }

  void printParagraph(const Paragraph *P) {
    OS << "<paragraph>";
    printTextAndInline(P->getContent());
    OS << "</paragraph>";
  }

  void printBulletList(const BulletList *BL) {
    OS << "<bullet_list>";
    for (unsigned i = 0, e = BL->getNumItems(); i != e; ++i) {
      OS << "<list_item>";
      for (const auto *N : BL->getItemChildren(i)) {
        printASTNode(N);
      }
      OS << "</list_item>";
    }
    OS << "</bullet_list>";
  }

  void printEnumeratedList(const EnumeratedList *EL) {
    OS << "<enumerated_list>";
    for (unsigned i = 0, e = EL->getNumItems(); i != e; ++i) {
      OS << "<list_item>";
      for (const auto *N : EL->getItemChildren(i)) {
        printASTNode(N);
      }
      OS << "</list_item>";
    }
    OS << "</enumerated_list>";
  }

  void printDefinitionListItem(const DefinitionListItem *DLI) {
    OS << "<definition_list_item>";

    OS << "<term>";
    printASTNode(DLI->getTerm());
    OS << "</term>";

    for (const auto *N : DLI->getClassifiers()) {
      OS << "<classifier>";
      printASTNode(N);
      OS << "</classifier>";
    }

    OS << "<definition>";
    for (const auto *N : DLI->getDefinitionChildren()) {
      printASTNode(N);
    }
    OS << "</definition>";

    OS << "</definition_list_item>";
  }

  void printDefinitionList(const DefinitionList *DL) {
    OS << "<definition_list>";
    for (const auto *N : DL->getChildren()) {
      printASTNode(N);
    }
    OS << "</definition_list>";
  }

  void printField(const Field *F) {
    OS << "<field>";
    OS << "<field_name>";
    printASTNode(F->getName());
    OS << "</field_name>";
    OS << "<field_body>";
    for (const auto *N : F->getBodyChildren()) {
      printASTNode(N);
    }
    OS << "</field_body>";
    OS << "</field>";
  }

  void printFieldList(const FieldList *FL) {
    OS << "<field_list>";
    for (const auto *F : FL->getChildren()) {
      printASTNode(F);
    }
    OS << "</field_list>";
  }

  void printBlockQuote(const BlockQuote *BQ) {
    OS << "<block_quote>";
    for (const auto *N : BQ->getChildren()) {
      printASTNode(N);
    }
    OS << "</block_quote>";
  }

  void printTextAndInline(const TextAndInline *T) {
    if (T->isLinePart()) {
      LinePart LP = T->getLinePart();
      appendWithXMLEscaping(OS, LP.Text);
    } else {
      LineListRef LL = T->getLines();
      for (unsigned i = 0, e = LL.size(); i != e; ++i) {
        appendWithXMLEscaping(OS, LL[i].Text.drop_front(LL[i].FirstTextByte));
        if (i != e - 1)
          OS << '\n';
      }
    }
  }

  void printPrivateExtension(const PrivateExtension *PE) {
    OS << "<llvm:private_extension />";
  }
};

void llvm::rest::convertToDocutilsXML(const Document *D, raw_ostream &OS) {
  CommentToDocutilsXMLConverter Converter(OS);
  Converter.printASTNode(D);
}

void ReSTASTNode::dump() const {
  CommentToDocutilsXMLConverter Converter(llvm::errs());
  Converter.printASTNode(this);
  llvm::errs() << '\n';
}

static unsigned measureReSTWhitespace(StringRef Text) {
  unsigned i = 0;
  for (unsigned e = Text.size(); i != e; ++i) {
    if (!isReSTWhitespace(Text[i]))
      break;
  }
  return i;
}

static unsigned measureReSTWord(StringRef Text) {
  unsigned i = 0;
  for (unsigned e = Text.size(); i != e; ++i) {
    if (isReSTWhitespace(Text[i]))
      break;
  }
  return i;
}

std::pair<LinePart, LinePart> llvm::rest::extractWord(LinePart LP) {
  unsigned NumWordBytes = measureReSTWord(LP.Text);
  unsigned NumWhitespaceBytes =
      (NumWordBytes == 0)
          ? 0
          : measureReSTWhitespace(LP.Text.drop_front(NumWordBytes));
  LinePart Word = {
      LP.Text.substr(0, NumWordBytes),
      SourceRange(LP.Range.Start, LP.Range.Start.getAdvancedLoc(NumWordBytes))};
  LinePart Rest = {LP.Text.drop_front(NumWordBytes + NumWhitespaceBytes),
                   SourceRange(LP.Range.Start.getAdvancedLoc(
                                   NumWordBytes + NumWhitespaceBytes),
                               LP.Range.End)};

  return {Word, Rest};
}

std::pair<LinePart, LineListRef> llvm::rest::extractWord(LineListRef LL) {
  for (unsigned i = 0, e = LL.size(); i != e; ++i) {
    const Line &L = LL[i];
    StringRef Text = L.Text.drop_front(L.FirstTextByte);
    if (Text.empty())
      continue;
    unsigned NumWordBytes = measureReSTWord(Text);
    unsigned NumWhitespaceBytes =
        (NumWordBytes == 0)
            ? 0
            : measureReSTWhitespace(Text.drop_front(NumWordBytes));

    LinePart Word = {
        Text.substr(0, NumWordBytes),
        SourceRange(L.Range.Start, L.Range.Start.getAdvancedLoc(NumWordBytes))};
    LineListRef Rest = LL.subList(i, LL.size() - i);
    Rest.fromFirstLineDropFront(NumWordBytes + NumWhitespaceBytes);
    return {Word, Rest};
  }
  return {LinePart(), LL};
}

LinePart llvm::rest::extractWord(TextAndInline *TAI) {
  if (TAI->isLinePart()) {
    auto WordAndRest = ::extractWord(TAI->getLinePart());
    TAI->setLinePart(WordAndRest.second);
    return WordAndRest.first;
  } else {
    auto WordAndRest = ::extractWord(TAI->getLines());
    TAI->setLines(WordAndRest.second);
    return WordAndRest.first;
  }
}