//===--- ReSTTest.cpp - ReST parsing tests --------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #include "swift/ReST/LineList.h" #include "swift/ReST/Parser.h" #include "llvm/ADT/SmallString.h" #include "gtest/gtest.h" #include using namespace llvm; using namespace rest; using namespace llvm::rest::detail; // In the tests below, test cases that are marked "correct" produce completely // correct results and should not be changed without a good reason. struct ExtractBriefTestData { std::vector InText; std::string Brief; std::string DocutilsXML; }; static StringRef stripDocumentTag(StringRef DocutilsXML) { if (DocutilsXML.startswith("")) DocutilsXML = DocutilsXML.drop_front(10); if (DocutilsXML.endswith("")) DocutilsXML = DocutilsXML.drop_back(11); return DocutilsXML; } static std::string replaceAll(std::string S, std::string Original, std::string Replacement) { size_t I = S.find(Original); while (I != std::string::npos) { S.replace(I, Original.size(), Replacement); I = S.find(Original, I + Replacement.size()); } return S; } static std::vector replaceAll(std::vector Strings, std::string Original, std::string Replacement) { for (size_t i = 0, e = Strings.size(); i != e; ++i) Strings[i] = replaceAll(Strings[i], Original, Replacement); return Strings; } static bool inlineMarkupDelimitersMatch(StringRef StartString, StringRef EndString) { if (StartString == "*" || StartString == "**" || StartString == "``" || StartString == "|") return StartString == EndString; if (StartString == "`") return EndString == "`" || EndString == "`_"; if (StartString == "_`") return EndString == "`"; if (StartString == "[") return EndString == "]_"; llvm_unreachable("invalid arguments"); } struct ReSTTest : public ::testing::Test { SourceManager SM; LineList toLineList(ReSTContext &Context, StringRef Text) { LineListBuilder Result; Result.addLine(Text, SM.registerLine(Text, 0)); return Result.takeLineList(Context); } LineList toLineList(ReSTContext &Context, std::vector Lines) { LineListBuilder Result; for (auto S : Lines) { Result.addLine(S, SM.registerLine(S, 0)); } return Result.takeLineList(Context); } LineList toLineList(ReSTContext &Context, std::vector Lines) { LineListBuilder Result; for (auto S : Lines) { StringRef Copy = Context.allocateCopy(S); Result.addLine(Copy, SM.registerLine(Copy, 0)); } return Result.takeLineList(Context); } void checkInlineMarkup(const std::vector &InText, const std::string &ExpectedBrief, const std::string &ExpectedDocutilsXML) { ReSTContext Context; auto LL = toLineList(Context, InText); llvm::SmallString<64> Str; extractBrief(LL, Str); EXPECT_EQ(ExpectedBrief, Str.str().str()); Str.clear(); auto *TheDocument = parseDocument(Context, LL); { llvm::raw_svector_ostream OS(Str); convertToDocutilsXML(TheDocument, OS); } StringRef DocutilsXML = stripDocumentTag(Str.str()); EXPECT_EQ(ExpectedDocutilsXML, DocutilsXML.str()) << "ReST document: " << ::testing::PrintToString(InText); } void replaceText(std::vector &InText, std::string &ExpectedBrief, std::string &ExpectedDocutilsXML, const std::string &Original, const std::string &Replacement) { InText = replaceAll(InText, Original, Replacement); ExpectedBrief = replaceAll(ExpectedBrief, Original, Replacement); ExpectedDocutilsXML = replaceAll(ExpectedDocutilsXML, Original, Replacement); } void checkInlineMarkupWithReplacement(const ExtractBriefTestData &Test, std::string StartString, std::string EndString) { std::vector InText; for (auto Line : Test.InText) InText.push_back(Line); std::string ExpectedBrief = Test.Brief; std::string ExpectedDocutilsXML = Test.DocutilsXML; replaceText(InText, ExpectedBrief, ExpectedDocutilsXML, "S", StartString); replaceText(InText, ExpectedBrief, ExpectedDocutilsXML, "E", EndString); checkInlineMarkup(InText, ExpectedBrief, ExpectedDocutilsXML); } }; TEST_F(ReSTTest, LineList_getLinePart1) { ReSTContext Context; std::vector Text = { "abcd", "efg", "hi" }; LineListRef LL = toLineList(Context, Text); EXPECT_EQ("", LL.getLinePart(0, 0, 0).Text); EXPECT_EQ("a", LL.getLinePart(0, 0, 1).Text); EXPECT_EQ("ab", LL.getLinePart(0, 0, 2).Text); EXPECT_EQ("abc", LL.getLinePart(0, 0, 3).Text); EXPECT_EQ("abcd", LL.getLinePart(0, 0, 4).Text); EXPECT_EQ("", LL.getLinePart(0, 1, 0).Text); EXPECT_EQ("b", LL.getLinePart(0, 1, 1).Text); EXPECT_EQ("bc", LL.getLinePart(0, 1, 2).Text); EXPECT_EQ("bcd", LL.getLinePart(0, 1, 3).Text); EXPECT_EQ("cd", LL.getLinePart(0, 2, 2).Text); EXPECT_EQ("d", LL.getLinePart(0, 3, 1).Text); EXPECT_EQ("", LL.getLinePart(0, 4, 0).Text); EXPECT_EQ("", LL.getLinePart(1, 0, 0).Text); EXPECT_EQ("e", LL.getLinePart(1, 0, 1).Text); EXPECT_EQ("ef", LL.getLinePart(1, 0, 2).Text); EXPECT_EQ("efg", LL.getLinePart(1, 0, 3).Text); EXPECT_EQ("f", LL.getLinePart(1, 1, 1).Text); EXPECT_EQ("fg", LL.getLinePart(1, 1, 2).Text); EXPECT_EQ("", LL.getLinePart(2, 0, 0).Text); EXPECT_EQ("h", LL.getLinePart(2, 0, 1).Text); EXPECT_EQ("hi", LL.getLinePart(2, 0, 2).Text); } TEST_F(ReSTTest, LineList_getLinePart2) { ReSTContext Context; std::vector Text = { "zzz", "zabcd", "efg", "hi", "zzz" }; LineListRef LL = toLineList(Context, Text); LL = LL.dropFrontLines(1); LL = LL.subList(0, 3); LL.fromFirstLineDropFront(1); ASSERT_EQ(3u, LL.size()); EXPECT_EQ("zabcd", LL.getLinePart(0, 0, 5).Text); EXPECT_EQ("efg", LL.getLinePart(1, 0, 3).Text); EXPECT_EQ("hi", LL.getLinePart(2, 0, 2).Text); } struct LineListIndentationTestData { StringRef InText; unsigned FirstTextCol; unsigned FirstTextByte; }; struct LineListIndentationTest : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(LineListIndentationTest, Test) { const auto &Test = GetParam(); ReSTContext Context; auto LL = toLineList(Context, Test.InText); EXPECT_EQ(1u, LL.size()); EXPECT_EQ(Test.FirstTextCol, LL[0].FirstTextCol.Value); EXPECT_EQ(Test.FirstTextByte, LL[0].FirstTextByte); } struct LineListIndentationTestData LineListIndentationTests[] = { { "", 0, 0 }, { " ", 1, 1 }, { "\v", 1, 1 }, { "\f", 1, 1 }, { " ", 2, 2 }, { "\t", 8, 1 }, { " \t", 8, 2 }, { "\v\t", 8, 2 }, { "\f\t", 8, 2 }, { " \t\t", 16, 3 }, { " \t ", 9, 3 }, { " \t\v", 9, 3 }, { " \t\f", 9, 3 }, { "\t\t", 16, 2 }, { "aaa", 0, 0 }, { " aaa", 1, 1 }, { "\vaaa", 1, 1 }, { "\faaa", 1, 1 }, { " aaa ", 2, 2 }, { "\taaa", 8, 1 }, { " \t \t aaa", 17, 5 }, { " \t \t\vaaa", 17, 5 }, { " \t \t\faaa", 17, 5 }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, LineListIndentationTest, ::testing::ValuesIn(LineListIndentationTests)); struct ClassifyLineBlankTestData { StringRef InText; }; struct ClassifyLineBlankTest : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(ClassifyLineBlankTest, Test) { const auto &Test = GetParam(); ReSTContext Context; auto LL = toLineList(Context, Test.InText); auto Result = classifyLine(LL[0]); EXPECT_EQ(LineKind::Blank, Result.Kind); } // REST-FIXME: clarify that trailing whitespace is not significant in ReST. struct ClassifyLineBlankTestData ClassifyLineBlankTests[] = { { "" }, { " " }, { "\t" }, { "\v" }, { "\f" }, { " \t\v\f" }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, ClassifyLineBlankTest, ::testing::ValuesIn(ClassifyLineBlankTests)); struct ClassifyLineBulletListTestData { StringRef InText; LineKind Kind; unsigned BulletAndWhitespaceBytes; }; struct ClassifyLineBulletListTest : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(ClassifyLineBulletListTest, Test) { const auto &Test = GetParam(); ReSTContext Context; auto LL = toLineList(Context, Test.InText); auto Result = classifyLine(LL[0]); EXPECT_EQ(Test.Kind, Result.Kind); if (isBullet(Test.Kind)) { EXPECT_EQ(Test.BulletAndWhitespaceBytes, Result.getBulletAndWhitespaceBytes()); } } struct ClassifyLineBulletListTestData ClassifyLineBulletListTests[] = { { "* a", LineKind::BulletListAsterisk, 2 }, { " * a", LineKind::BulletListAsterisk, 2 }, { "\t* a", LineKind::BulletListAsterisk, 2 }, { "*\ta", LineKind::BulletListAsterisk, 2 }, { "*\va", LineKind::BulletListAsterisk, 2 }, { "*\fa", LineKind::BulletListAsterisk, 2 }, { "* a", LineKind::BulletListAsterisk, 3 }, { "* \ta", LineKind::BulletListAsterisk, 3 }, { "* \va", LineKind::BulletListAsterisk, 3 }, { "* \fa", LineKind::BulletListAsterisk, 3 }, { "*\t a", LineKind::BulletListAsterisk, 3 }, { "*\v a", LineKind::BulletListAsterisk, 3 }, { "*\f a", LineKind::BulletListAsterisk, 3 }, { "* \t a", LineKind::BulletListAsterisk, 4 }, { "* ", LineKind::BulletListAsterisk, 3 }, { "* ", LineKind::BulletListAsterisk, 2 }, { "*", LineKind::BulletListAsterisk, 1 }, { "*a", LineKind::Unknown, 0 }, { "*0", LineKind::Unknown, 0 }, { " *a", LineKind::Unknown, 0 }, { " *0", LineKind::Unknown, 0 }, // U+3000 IDEOGRAPHIC SPACE is not considered whitespace by ReST. { "*\xe3\x80\x80", LineKind::Unknown, 0 }, { "+ a", LineKind::BulletListPlus, 2 }, { "+\ta", LineKind::BulletListPlus, 2 }, { "+ ", LineKind::BulletListPlus, 2 }, { "+", LineKind::BulletListPlus, 1 }, { "- a", LineKind::BulletListHyphenMinus, 2 }, { "-\ta", LineKind::BulletListHyphenMinus, 2 }, { "- ", LineKind::BulletListHyphenMinus, 2 }, { "-", LineKind::BulletListHyphenMinus, 1 }, { "\xe2\x80\xa2 a", LineKind::BulletListBullet, 4 }, { "\xe2\x80\xa2\ta", LineKind::BulletListBullet, 4 }, { "\xe2\x80\xa2 ", LineKind::BulletListBullet, 4 }, { "\xe2\x80\xa2", LineKind::BulletListBullet, 3 }, { "\xe2\x80\xa3 a", LineKind::BulletListTriangularBullet, 4 }, { "\xe2\x80\xa3\ta", LineKind::BulletListTriangularBullet, 4 }, { "\xe2\x80\xa3 ", LineKind::BulletListTriangularBullet, 4 }, { "\xe2\x80\xa3", LineKind::BulletListTriangularBullet, 3 }, { "\xe2\x81\x83 a", LineKind::BulletListHyphenBullet, 4 }, { "\xe2\x81\x83\ta", LineKind::BulletListHyphenBullet, 4 }, { "\xe2\x81\x83 ", LineKind::BulletListHyphenBullet, 4 }, { "\xe2\x81\x83", LineKind::BulletListHyphenBullet, 3 }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, ClassifyLineBulletListTest, ::testing::ValuesIn(ClassifyLineBulletListTests)); struct ClassifyLineEnumeratedListTestData { StringRef InText; LineKind Kind; unsigned EnumeratorAndWhitespaceBytes; }; struct ClassifyLineEnumeratedListTest : public ReSTTest, public ::testing::WithParamInterface { }; TEST_P(ClassifyLineEnumeratedListTest, Test) { const auto &Test = GetParam(); ReSTContext Context; auto LL = toLineList(Context, Test.InText); auto Result = classifyLine(LL[0]); EXPECT_EQ(Test.Kind, Result.Kind); if (isEnumerated(Test.Kind)) { EXPECT_EQ(Test.EnumeratorAndWhitespaceBytes, Result.getEnumeratorAndWhitespaceBytes()); } } struct ClassifyLineEnumeratedListTestData ClassifyLineEnumeratedListTests[] = { { "#", LineKind::Unknown, 0 }, { "#a", LineKind::Unknown, 0 }, { "# ", LineKind::Unknown, 0 }, { "# \t", LineKind::Unknown, 0 }, { "# a", LineKind::Unknown, 0 }, { "# \ta", LineKind::Unknown, 0 }, { "#.", LineKind::EnumeratedListAuto, 2 }, { "#.a", LineKind::Unknown, 0 }, { "#. ", LineKind::EnumeratedListAuto, 3 }, { "#. a", LineKind::EnumeratedListAuto, 3 }, { "#. \t", LineKind::EnumeratedListAuto, 4 }, { "#. \ta", LineKind::EnumeratedListAuto, 4 }, { "#)", LineKind::EnumeratedListAuto, 2 }, { "#)a", LineKind::Unknown, 0 }, { "#) ", LineKind::EnumeratedListAuto, 3 }, { "#) a", LineKind::EnumeratedListAuto, 3 }, { "#) \t", LineKind::EnumeratedListAuto, 4 }, { "#) \ta", LineKind::EnumeratedListAuto, 4 }, { "(#", LineKind::Unknown, 0 }, { "(#a", LineKind::Unknown, 0 }, { "(# ", LineKind::Unknown, 0 }, { "(# a", LineKind::Unknown, 0 }, { "(# \t", LineKind::Unknown, 0 }, { "(# \ta", LineKind::Unknown, 0 }, { "(#)", LineKind::EnumeratedListAuto, 3 }, { "(#)a", LineKind::Unknown, 0 }, { "(#) ", LineKind::EnumeratedListAuto, 4 }, { "(#) a", LineKind::EnumeratedListAuto, 4 }, { "(#) \t", LineKind::EnumeratedListAuto, 5 }, { "(#) \ta", LineKind::EnumeratedListAuto, 5 }, { ".", LineKind::Unknown, 0 }, { "(", LineKind::Unknown, 0 }, { ")", LineKind::Unknown, 0 }, { "1", LineKind::Unknown, 0 }, { "1a", LineKind::Unknown, 0 }, { "1 ", LineKind::Unknown, 0 }, { "1 a", LineKind::Unknown, 0 }, { "1 \t", LineKind::Unknown, 0 }, { "1 \ta", LineKind::Unknown, 0 }, { "1.", LineKind::EnumeratedListArabic, 2 }, { "1.a", LineKind::Unknown, 0 }, { "1. ", LineKind::EnumeratedListArabic, 3 }, { "1. a", LineKind::EnumeratedListArabic, 3 }, { "1. \t", LineKind::EnumeratedListArabic, 4 }, { "1. \ta", LineKind::EnumeratedListArabic, 4 }, { "1)", LineKind::EnumeratedListArabic, 2 }, { "1)a", LineKind::Unknown, 0 }, { "1) ", LineKind::EnumeratedListArabic, 3 }, { "1) a", LineKind::EnumeratedListArabic, 3 }, { "1) \t", LineKind::EnumeratedListArabic, 4 }, { "1) \ta", LineKind::EnumeratedListArabic, 4 }, { "(1", LineKind::Unknown, 0 }, { "(1a", LineKind::Unknown, 0 }, { "(1 ", LineKind::Unknown, 0 }, { "(1 a", LineKind::Unknown, 0 }, { "(1 \t", LineKind::Unknown, 0 }, { "(1 \ta", LineKind::Unknown, 0 }, { "(1)", LineKind::EnumeratedListArabic, 3 }, { "(1)a", LineKind::Unknown, 0 }, { "(1) ", LineKind::EnumeratedListArabic, 4 }, { "(1) a", LineKind::EnumeratedListArabic, 4 }, { "(1) \t", LineKind::EnumeratedListArabic, 5 }, { "(1) \ta", LineKind::EnumeratedListArabic, 5 }, { "12", LineKind::Unknown, 0 }, { "12a", LineKind::Unknown, 0 }, { "12 ", LineKind::Unknown, 0 }, { "12 a", LineKind::Unknown, 0 }, { "12 \t", LineKind::Unknown, 0 }, { "12 \ta", LineKind::Unknown, 0 }, { "12.", LineKind::EnumeratedListArabic, 3 }, { "12.a", LineKind::Unknown, 0 }, { "12. ", LineKind::EnumeratedListArabic, 4 }, { "12. a", LineKind::EnumeratedListArabic, 4 }, { "12. \t", LineKind::EnumeratedListArabic, 5 }, { "12. \ta", LineKind::EnumeratedListArabic, 5 }, { "12)", LineKind::EnumeratedListArabic, 3 }, { "12)a", LineKind::Unknown, 0 }, { "12) ", LineKind::EnumeratedListArabic, 4 }, { "12) a", LineKind::EnumeratedListArabic, 4 }, { "12) \t", LineKind::EnumeratedListArabic, 5 }, { "12) \ta", LineKind::EnumeratedListArabic, 5 }, { "(12", LineKind::Unknown, 0 }, { "(12a", LineKind::Unknown, 0 }, { "(12 ", LineKind::Unknown, 0 }, { "(12 a", LineKind::Unknown, 0 }, { "(12 \t", LineKind::Unknown, 0 }, { "(12 \ta", LineKind::Unknown, 0 }, { "(12)", LineKind::EnumeratedListArabic, 4 }, { "(12)a", LineKind::Unknown, 4 }, { "(12) ", LineKind::EnumeratedListArabic, 5 }, { "(12) a", LineKind::EnumeratedListArabic, 5 }, { "(12) \t", LineKind::EnumeratedListArabic, 6 }, { "(12) \ta", LineKind::EnumeratedListArabic, 6 }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, ClassifyLineEnumeratedListTest, ::testing::ValuesIn(ClassifyLineEnumeratedListTests)); struct ClassifyLineFieldListTestData { StringRef InText; LineKind Kind; unsigned FieldNameBytes; unsigned FieldMarkerAndWhitespaceBytes; }; struct ClassifyLineFieldListTest : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(ClassifyLineFieldListTest, Test) { const auto &Test = GetParam(); ReSTContext Context; auto LL = toLineList(Context, Test.InText); auto Result = classifyLine(LL[0]); EXPECT_EQ(Test.Kind, Result.Kind); if (Test.Kind == LineKind::FieldList) { EXPECT_EQ(Test.FieldNameBytes, Result.getFieldNameBytes()); EXPECT_EQ(Test.FieldMarkerAndWhitespaceBytes, Result.getFieldMarkerAndWhitespaceBytes()); } } struct ClassifyLineFieldListTestData ClassifyLineFieldListTests[] = { // Missing terminating ':'. { ":", LineKind::Unknown, 0, 0 }, { ":a", LineKind::Unknown, 0, 0 }, { ":foo", LineKind::Unknown, 0, 0 }, { ":\xe4\xbe\x8b", LineKind::Unknown, 0, 0 }, // Field name can not be empty. { "::", LineKind::Unknown, 0, 0 }, { "::foo", LineKind::Unknown, 0, 0 }, { ":: foo", LineKind::Unknown, 0, 0 }, { "::: foo", LineKind::Unknown, 0, 0 }, // Differentiate between interpreted text roles and field lists. { ":foo:``", LineKind::Unknown, 0, 0 }, { ":foo: ``", LineKind::FieldList, 3, 6 }, { ":foo:`bar`", LineKind::Unknown, 0, 0 }, { ":foo: `bar`", LineKind::FieldList, 3, 6 }, { ":foo:bar", LineKind::Unknown, 0, 0 }, { ":foo: bar", LineKind::FieldList, 3, 6 }, // OK. { ":a:", LineKind::FieldList, 1, 3 }, { ": a:", LineKind::FieldList, 2, 4 }, { ":a :", LineKind::FieldList, 2, 4 }, { ": a :", LineKind::FieldList, 3, 5 }, { ":bb:", LineKind::FieldList, 2, 4 }, { ":\xe4\xbe\x8b:", LineKind::FieldList, 3, 5 }, { ":a*b:", LineKind::FieldList, 3, 5 }, { ":a *b*:", LineKind::FieldList, 5, 7 }, { ":a *b:", LineKind::FieldList, 4, 6 }, { ":a`b:", LineKind::FieldList, 3, 5 }, { ":a `b`:", LineKind::FieldList, 5, 7 }, // Count whitespace after the field marker. { ":foo:", LineKind::FieldList, 3, 5 }, { ":foo: ", LineKind::FieldList, 3, 6 }, { ":foo:\t", LineKind::FieldList, 3, 6 }, { ":foo:\v", LineKind::FieldList, 3, 6 }, { ":foo:\f", LineKind::FieldList, 3, 6 }, { ":foo: a", LineKind::FieldList, 3, 6 }, { ":foo:\ta", LineKind::FieldList, 3, 6 }, { ":foo:\va", LineKind::FieldList, 3, 6 }, { ":foo:\fa", LineKind::FieldList, 3, 6 }, { ":foo:\t ", LineKind::FieldList, 3, 7 }, { ":foo: \t", LineKind::FieldList, 3, 7 }, { ":foo: \t a", LineKind::FieldList, 3, 8 }, // Escaping. { ":\\", LineKind::Unknown, 0, 0 }, { ":\\:", LineKind::Unknown, 0, 0 }, { ":\\a", LineKind::Unknown, 0, 0 }, { ":\\\\", LineKind::Unknown, 0, 0 }, { ":foo\\", LineKind::Unknown, 0, 0 }, { ":foo\\: bar", LineKind::Unknown, 0, 0 }, { ":f\\oo\\: bar", LineKind::Unknown, 0, 0 }, { ":f\\oo\\: bar\\", LineKind::Unknown, 0, 0 }, { ":\\::", LineKind::FieldList, 2, 4 }, { ":\\a:", LineKind::FieldList, 2, 4 }, { ":\\\\:", LineKind::FieldList, 2, 4 }, { ":foo\\::", LineKind::FieldList, 5, 7 }, { ":a\\bc\\:\\:def\\ ghi:", LineKind::FieldList, 16, 18 }, { ":abc\\:def: foo:bar:baz", LineKind::FieldList, 8, 11 }, { ":\\\xe4\xbe\x8b:", LineKind::FieldList, 4, 6 }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, ClassifyLineFieldListTest, ::testing::ValuesIn(ClassifyLineFieldListTests)); struct ExtractBriefTest : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(ExtractBriefTest, Test) { const auto &Test = GetParam(); ReSTContext Context; auto LL = toLineList(Context, Test.InText); llvm::SmallString<64> Str; extractBrief(LL, Str); EXPECT_EQ(Test.Brief, Str.str().str()) << "ReST document: " << ::testing::PrintToString(Test.InText); Str.clear(); auto *TheDocument = parseDocument(Context, LL); { llvm::raw_svector_ostream OS(Str); convertToDocutilsXML(TheDocument, OS); } StringRef DocutilsXML = stripDocumentTag(Str.str()); EXPECT_EQ(Test.DocutilsXML, DocutilsXML.str()) << "ReST document: " << ::testing::PrintToString(Test.InText); } struct ExtractBriefTestData ExtractBriefTests[] = { { {}, "", "" }, // Correct. { { "" }, "", "" }, // Correct. { { "aaa" }, "aaa", "aaa" }, // Correct. { { "", "aaa" }, "aaa", "aaa" }, // Correct. { { "", "", "aaa" }, "aaa", "aaa" }, // Correct. { { "aaa", "bbb" }, "aaa bbb", "aaa\nbbb" }, // Correct. { { "& < > \" '" }, "& < > \" '", "& < > " '" }, // Correct. { { "aaa", " " }, "aaa", "aaa" }, // Correct. { { "aaa", "", "bbb" }, "aaa", "aaa" "bbb" }, // Correct. { { "aaa", "", "* bbb" }, "aaa", "aaa" "" "bbb" "" }, // Correct. { { "aaa", "", "1. bbb" }, "aaa", "aaa" "" "bbb" "" }, // Correct. { { "aaa", "", "(1) bbb" }, "aaa", "aaa" "" "bbb" "" }, // Correct. { { "aaa", "* bbb" }, "aaa * bbb", "aaa\n* bbb" }, // Correct. { { "aaa", "1. bbb" }, "aaa 1. bbb", "aaa\n1. bbb" }, // Correct. { { "aaa", "(1) bbb" }, "aaa (1) bbb", "aaa\n(1) bbb" }, // Correct. { { "aaa", ":bbb: ccc" }, "aaa :bbb: ccc", "aaa\n:bbb: ccc" }, // Correct. // Bullet list. { { "* aaa", "bbb" }, "", // FIXME: missing diagnostic. "" "aaa" "" "bbb" }, // Correct. { { " * aaa", " bbb" }, "", // FIXME: missing diagnostic. "" "" "aaa" "" "bbb" "" }, // Correct. { { " * aaa", " bbb" }, "", "" "" "aaa\nbbb" "" "" }, // Correct. { { " * aaa", "bbb" }, "", // FIXME: missing diagnostic. "" "" "aaa" "" "" "bbb" }, // Correct. { { " * aaa", " bbb" }, "", "" "" "" "" "" "aaa" "bbb" "" "" "" "" "" }, { { " * aaa", " bbb", " ccc" }, "", "" "" "" "" "" "aaa" "bbb\nccc" "" "" "" "" "" }, { { " * aaa", "", "bbb" }, "", "" "" "aaa" "" "" "bbb" }, // Correct. { { " * aaa", " bbb" }, "", "" "" "aaa\nbbb" "" "" }, // Correct. { { "*\taaa", "\tbbb" }, "", "" "aaa\nbbb" "" }, // Correct. { { "* \taaa", " \tbbb", "\tccc" }, "", "" "aaa\nbbb\nccc" "" }, // Correct. { { "* aaa", "* bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { "* aaa", "", "* bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { "* aaa", "", "", "* bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { "* aaa", " bbb", "* ccc" }, "", "" "aaa\nbbb" "ccc" "" }, // Correct. { { "* aaa", " bbb", "", "* ccc" }, "", "" "aaa\nbbb" "ccc" "" }, // Correct. { { "* aaa", " bbb", "", "", "* ccc" }, "", "" "aaa\nbbb" "ccc" "" }, // Correct. // Bullet list without text immediately after the bullet. { { "*" }, "", "" "" "" }, // Correct. { { "*", "*", "*" }, "", "" "" "" "" "" }, // Correct. { { "*", "", " aaa" }, "", "" "aaa" "" }, // Correct. { { "*", "", " aaa", " bbb" }, "", "" "aaa\nbbb" "" }, // Correct. { { "*", "", " aaa", "", " bbb" }, "", "" "aaabbb" "" }, // Correct. // Bullet list. Different bullets. { { "* aaa", "+ bbb" }, "", // FIXME: missing diagnostic. "" "aaa" "" "" "bbb" "" }, // Correct. { { "* aaa", "- bbb" }, "", // FIXME: missing diagnostic. "" "aaa" "" "" "bbb" "" }, // Correct. { { "* aaa", "\xe2\x80\xa2 bbb" }, "", // FIXME: missing diagnostic. "" "aaa" "" "" "bbb" "" }, // Correct. { { "* aaa", "\xe2\x80\xa3 bbb" }, "", // FIXME: missing diagnostic. "" "aaa" "" "" "bbb" "" }, // Correct. { { "* aaa", "\xe2\x81\x83 bbb" }, "", // FIXME: missing diagnostic. "" "aaa" "" "" "bbb" "" }, // Correct. // Not parsed as enumerated lists because indentation of the second line is // incorrect. { { "1. aaa", "bbb" }, "1. aaa bbb", "1. aaa\nbbb" }, // Correct. { { "(1) aaa", "bbb" }, "(1) aaa bbb", "(1) aaa\nbbb" }, // Correct. { { "(1) aaa", "* bbb" }, "(1) aaa * bbb", "(1) aaa\n* bbb" }, // Correct. { { "(1) aaa", ":bbb:" }, "(1) aaa :bbb:", "(1) aaa\n:bbb:" }, // Correct. { { "(1) aaa", ":bbb: ccc" }, "(1) aaa :bbb: ccc", "(1) aaa\n:bbb: ccc" }, // Correct. // Not parsed as an enumerated list because the second line is not a NEELEL. { { "1. aaa", "2." }, "1. aaa 2.", "1. aaa\n2." }, // Correct. // Enumerated list. { { "1. aaa" }, "", "" "aaa" "" }, // Correct. { { " 1. aaa" }, "", "" "" "aaa" "" "" }, // Correct. { { "1. aaa", "2. bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { " 1. aaa", " 2. bbb" }, "", "" "" "aaa" "bbb" "" "" }, // Correct. { { "1. aaa", "", "2. bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { " 1. aaa", "", " 2. bbb" }, "", "" "" "aaa" "bbb" "" "" }, // Correct. { { "1. aaa", "", "", "2. bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { " 1. aaa", "", "", " 2. bbb" }, "", "" "" "aaa" "bbb" "" "" }, // Correct. { { "1. aaa", " ", " ", "2. bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { "1. aaa", " bbb" }, "", "" "aaa\nbbb" "" }, // Correct. { { "(1) aaa", " bbb" }, "", "" "aaa\nbbb" "" }, // Correct. { { " 1. aaa", " bbb" }, "", "" "" "aaa\nbbb" "" "" }, // Correct. { { " 1. \taaa", " \tbbb", " \tccc", "\tddd" }, "", "" "" "aaa\nbbb\nccc\nddd" "" "" }, // Correct. { { "1. aaa", " bbb" }, "", "" "" "" "" "aaa" "bbb" "" "" "" "" }, // Correct. { { "1. aaa", "", " bbb" }, "", "" "" "aaa" "bbb" "" "" }, // Correct. { { "1. aaa", " bbb", " ccc" }, "", "" "" "" "" "aaa" "bbb\nccc" "" "" "" "" }, // Correct. { { "1. aaa", "2. bbb" }, "", "" "aaa" "bbb" "" }, // Correct. { { "1. aaa", " bbb", "2. ccc" }, "", "" "aaa\nbbb" "ccc" "" }, // Correct. { { "1. aaa", " bbb", "", "2. ccc" }, "", "" "aaa\nbbb" "ccc" "" }, // Correct. { { "1. aaa", " bbb", "2. ccc" }, "", "" "" "" "" "aaa" "bbb" "" "" "" "ccc" "" }, // Correct. { { "1. aaa", "", " bbb", "2. ccc" }, "", "" "" "aaa" "bbb" "" "ccc" "" }, // Correct. // Bullet list without text immediately after the bullet. { { "1." }, "", "" "" "" }, // Correct. { { "1.", "2.", "3." }, "1. 2. 3.", "1.\n2.\n3." }, // Correct. { { "1.", "", " aaa" }, "", "" "aaa" "" }, // Correct. { { "1.", "", " aaa", " bbb" }, "", "" "aaa\nbbb" "" }, // Correct. { { "1.", "", " aaa", "", " bbb" }, "", "" "aaabbb" "" }, // Correct. // Enumerated list. Different marker styles. { { "1. aaa", "(2) bbb" }, "", "1. aaa\n(2) bbb" }, // Correct. { { "1. aaa", "2) bbb" }, "", "1. aaa\n2) bbb" }, // Correct. { { "(1) aaa", "2. bbb" }, "", "(1) aaa\n2. bbb" }, // Correct. { { "(1) aaa", "2) bbb" }, "", "(1) aaa\n2) bbb" }, // Correct. { { "1) aaa", "2. bbb" }, "", "1) aaa\n2. bbb" }, // Correct. { { "1) aaa", "(2) bbb" }, "", "1) aaa\n(2) bbb" }, // Correct. { { "1. aaa", "2. bbb", "(3) ccc" }, "", "" "aaa" "" "2. bbb\n(3) ccc" }, // Correct. { { "1. aaa", "2. bbb", "3) ccc" }, "", "" "aaa" "" "2. bbb\n3) ccc" }, // Correct. { { "(1) aaa", "(2) bbb", "3. ccc" }, "", "" "aaa" "" "(2) bbb\n3. ccc" }, // Correct. { { "(1) aaa", "(2) bbb", "3) ccc" }, "", "" "aaa" "" "(2) bbb\n3) ccc" }, // Correct. { { "1) aaa", "2) bbb", "3. ccc" }, "", "" "aaa" "" "2) bbb\n3. ccc" }, // Correct. { { "1) aaa", "2) bbb", "(3) ccc" }, "", "" "aaa" "" "2) bbb\n(3) ccc" }, // Correct. { { "1. aaa", " bbb", "2) ccc" }, "", // FIXME: missing diagnostic. "" "aaa\nbbb" "" "" "ccc" "" }, // Correct. // Nested lists. { { "(1) (1) aaa", " (2) bbb", "(2) ccc" }, "", "" "" "" "aaa" "bbb" "" "" "ccc" "" }, // Correct. { { "* aaa", " + bbb", " + ccc", "* ddd" }, "", "" "aaa\n+ bbb\n+ ccc" "ddd" "" }, // Correct. { { "* aaa", "", " + bbb", " + ccc", "", "* ddd" }, "", "" "" "aaa" "" "bbb" "ccc" "" "" "ddd" "" }, // Correct. // Field list. { { ":aaa:" }, "", "" "" "aaa" "" "" "" }, // Correct. { { ":aaa:", " bbb" }, "", "" "" "aaa" "bbb" "" "" }, // Correct. { { ":aaa:", " bbb", " ccc" }, "", "" "" "aaa" "bbb\nccc" "" "" }, // Correct. { { ":aaa:", " bbb", " ccc" }, "", "" "" "aaa" "bbb\nccc" "" "" }, // Correct. { { ":aaa: bbb", }, "", "" "" "aaa" "bbb" "" "" }, // Correct. { { ":aaa: bbb", " ccc" }, "", "" "" "aaa" "bbb\nccc" "" "" }, // Correct. { { ":aaa: bbb", ":ccc: ddd", }, "", "" "" "aaa" "bbb" "" "" "ccc" "ddd" "" "" }, // Correct. { { ":aaa: bbb", "", ":ccc: ddd", }, "", "" "" "aaa" "bbb" "" "" "ccc" "ddd" "" "" }, // Correct. { { ":aaa: bbb", "", ":ccc: ddd", " eee" }, "", "" "" "aaa" "bbb" "" "" "ccc" "ddd\neee" "" "" }, // Correct. { { ":aaa: bbb", "bbb", ":ddd: eee", }, "", // FIXME: missing diagnostic. "" "" "aaa" "bbb" "" "" "bbb\n:ddd: eee" }, // Correct. { { ":aaa: bbb", " ccc", ":ddd: eee", " fff" }, "", "" "" "aaa" "bbb\nccc" "" "" "ddd" "eee\nfff" "" "" }, // Correct. { { ":aaa: bbb", " ccc", ":ddddd: eee", " fff" }, "", "" "" "aaa" "bbb\nccc" "" "" "ddddd" "eee\nfff" "" "" }, // Correct. { { ":aaa: bbb", " ccc", " ddd" }, "", "" "" "aaa" "bbb\nccc\nddd" "" "" }, // Correct. { { ":aaa: bbb", " ccc", " ddd" }, "", "" "" "aaa" "" "" "bbb\nccc" "" "ddd" "" "" "" // REST-FIXME: LLVM-REST-DIFFERENCE: Docutils parses the above as // [field_list ... [field_body [definition_list ...] [paragraph "ddd"]]] // The definition list does not make any sense in this context, it is // clearly a block quote -- the "ddd" line has less indentation, not more. }, { { ":aaa: bbb", " ccc" }, "", // Note: this should be parsed without the nested definition list, because // in a field list (unlike bullet and enumerated lists), the second line // determines the indentation of the field body. "" "" "aaa" "bbb\nccc" "" "" }, // Correct. { { ":aaa: bbb", " ccc", " ddd" }, "", // Note: similarly to the case above, this should be parsed without the // nested definition list. "" "" "aaa" "bbb\nccc\nddd" "" "" }, // Correct. { { ":foo: bar", " * aaa", " * bbb" }, "", "" "" "foo" "" "bar\n* aaa\n* bbb" "" "" "" }, // Correct. { { ":foo: bar", "", " * aaa", " * bbb" }, "", "" "" "foo" "" "bar" "" "aaa" "bbb" "" "" "" "" }, // Correct. { { ":foo: bar", " (1) aaa", " (2) bbb" }, "", "" "" "foo" "" "bar\n(1) aaa\n(2) bbb" "" "" "" }, // Correct. { { ":foo: bar", "", " (1) aaa", " (2) bbb" }, "", "" "" "foo" "" "bar" "" "aaa" "bbb" "" "" "" "" }, // Correct. { { ":foo: bar", "", "(1) aaa", "(2) bbb" }, "", "" "" "foo" "" "bar" "" "" "" "" "aaa" "bbb" "" }, // Correct. { { "* aaa", " :bbb: ccc", " :ddd: eee", " :fff: ggg" }, "", "" "" "aaa\n:bbb: ccc\n:ddd: eee\n:fff: ggg" "" "" }, // Correct. { { "* aaa", "", " :bbb: ccc", " :ddd: eee", " :fff: ggg" }, "", "" "" "aaa" "" "" "bbb" "" "ccc" "" "" "" "ddd" "" "eee" "" "" "" "fff" "" "ggg" "" "" "" "" "" }, // Correct. { { "* aaa", "", " :bbb: ccc", "* ddd" }, "", "" "" "aaa" "" "" "bbb" "ccc" "" "" "" "" "ddd" "" "" }, // Correct. { { "* aaa", "", " :bbb: ccc", " :ddd: eee", "* fff" }, "", "" "" "aaa" "" "" "bbb" "ccc" "" "" "ddd" "eee" "" "" "" "" "fff" "" "" }, // Correct. // Definition lists. { { "aaa", " bbb" }, "", "" "" "aaa" "bbb" "" "" }, // Correct. { { "aaa", " bbb", "", " ccc" }, "", "" "" "aaa" "bbbccc" "" "" }, // Correct. { { "aaa", " bbb", "", " ccc" }, "", "" "" "aaa" "" "bbb" "ccc" "" "" "" }, // Correct. { { "aaa", " bbb", "", " ccc", "", " ddd", "", " eee", "", " fff" }, "", "" "" "aaa" "" "" "bbb" "ccc" "" "ddd" "eee" "fff" "" "" "" }, // Correct. { { "aaa", " * bbb" }, "", "" "" "aaa" "" "" "bbb" "" "" "" "" }, // Correct. // Definition lists with classifiers. // FIXME: classifiers are not recognized. { { "aaa : xxx", " bbb" }, "", "" "" "aaa : xxx" "bbb" "" "" }, // Incorrect: classifiers. { { "aaa : xxx : yyy", " bbb" }, "", // REST-FIXME: the spec states that the content model for // definition_list_item is // // (term, classifier?, definition) // // which should say 'classifier*' instead. "" "" "aaa : xxx : yyy" "bbb" "" "" }, // Incorrect: classifiers. { { "aaa : xxx", " bbb", "", "ccc : yyy", " ddd" }, "", "" "" "aaa : xxx" "bbb" "" "" "ccc : yyy" "ddd" "" "" }, { { "aaa : xxx", " bbb", "", " ccc : yyy", " ddd" }, "", "" "" "aaa : xxx" "" "bbb" "" "" "ccc : yyy" "ddd" "" "" "" "" "" }, // Incorrect: classifiers. Nesting is correct. // Definition lists with inline markup inside the term line. { { "``aaa`` : xxx", " bbb" }, "", "" "" "aaa : xxx" "bbb" "" "" }, // Incorrect: classifiers. { { "aaa : ``xxx``", " bbb" }, "", // REST-FIXME: The spec states that: // [ReST/Syntax Details/Body Elements/Definition Lists] // Quote: // // Inline markup is parsed in the term line before the classifier // delimiter (" : ") is recognized. // // But contrary to that, docutils implementation recognizes inline markup // everywhere in the term line. So does LLVM ReST. "" "" "aaa : xxx" "bbb" "" "" }, // Incorrect: classifiers. { { "``aaa`` : ``xxx``", " bbb" }, "", "" "" "aaa : xxx" "bbb" "" "" }, // Incorrect: classifiers. { { "``aaa`` : ``xxx`` : **yyy**", " bbb" }, "", "" "" "aaa : xxx : yyy" "bbb" "" "" }, // Incorrect: classifiers. { { "``aaa : xxx``", " bbb" }, "", // Classifier delimiter inside inline markup is not recognized. "" "" "aaa : xxx" "bbb" "" "" }, // Correct. // Block quotes. { { " aaa", "", "bbb" }, "aaa", "" "aaa" "" "bbb" }, // Correct. { { " aaa", "", " bbb", "", "ccc" }, "aaa", "" "" "aaa" "" "bbb" "" "ccc" }, // Correct. { { " aaa", " ", " bbb" }, "aaa", "" "aaa" "bbb" "" }, // Correct. { { " aaa", "", " bbb" }, "aaa", "" "" "aaa" "" "bbb" "" }, // Correct. { { " aaa", "", " bbb" }, "aaa", "" "aaa" "" "bbb" "" "" }, // Correct. { { " aaa", " bbb" }, "aaa bbb", "" "aaa\nbbb" "" }, // Unexpected indentation. { { "aaa", "bbb", " ccc" }, "aaa bbb", "aaa\nbbb" "" "ccc" "" }, // Correct. { { "aaa", "bbb", " * ccc" }, "aaa bbb", "aaa\nbbb" "" "" "ccc" "" "" }, // Correct. { { "aaa", "bbb", " 1. ccc" }, "aaa bbb", "aaa\nbbb" "" "" "ccc" "" "" }, // Correct. { { "aaa", "bbb", " (1) ccc" }, "aaa bbb", "aaa\nbbb" "" "" "ccc" "" "" }, // Correct. // // Inline markup. // // Special cases where text inside inline markup could be confused for the // markup itself. // // FIXME: missing diagnostic (no end-string). { { "aaa _`xyz`_ bbb" }, "aaa _`xyz`_ bbb", "aaa _`xyz`_ bbb" }, // Correct. { { "*\\*" }, "*\\*", "*\\*" }, // FIXME: XML and brief should be unescaped. { { "*\\**" }, "*\\**", "\\*" }, // FIXME: XML should be unescaped, wrong brief. { { "**\\***" }, "**\\***", "\\*" }, // FIXME: XML should be unescaped, wrong brief. { { "**\\**" }, "**\\**", "**\\**" }, // FIXME: XML and brief should be unescaped. { { "***" }, "***", "***" }, // REST-FIXME: LLVM-REST-DIFFERENCE: docutils recognizes * with emphasis. // But this contradicts the spec, which says that ** (strong emphasis) is // recognized before * (emphasis). In this case, LLVM-REST is // recognizing ** (strong emphasis), followed by *, and does not find the // end-string. { { "*****" }, "*****", "*" }, // FIXME: XML is correct, wrong brief. { { "`\\`" }, "`\\`", "`\\`" }, // FIXME: XML and brief should be unescaped. { { "`\\``" }, "`\\``", "\\`" }, // FIXME: XML should be unescaped, wrong brief. { { "``\\```" }, "``\\```", "\\`" }, // FIXME: XML should be unescaped, wrong brief. { { "``\\``" }, "``\\``", "``\\``" }, // FIXME: XML and brief should be unescaped. { { "```" }, "```", "```" }, // Correct. Missing warning (missing end-string for ``). // Inline markup end-strings must be immediately preceded by non-whitespace. { { "*\\ *" }, "*\\ *", "*\\ *" }, // FIXME: XML and brief should be unescaped. { { "*\\ \\ *" }, "*\\ \\ *", "*\\ \\ *" }, // FIXME: XML and brief should be unescaped. { { "*aaa\\ *" }, "*aaa\\ *", "*aaa\\ *" }, // FIXME: XML and brief should be unescaped. { { "aaa *bbb\\ * ccc" }, "aaa *bbb\\ * ccc", "aaa *bbb\\ * ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa *bb\\b* ccc" }, "aaa *bb\\b* ccc", "aaa bb\\b ccc" }, // FIXME: XML and brief should be unescaped. // Start-string and end-string can be escaped. { { "aaa \\*bbb* ccc" }, "aaa \\*bbb* ccc", "aaa \\*bbb* ccc" }, // FIXME: XML and brief should be unescaped, missing warning. { { "aaa *bbb\\* ccc" }, "aaa *bbb\\* ccc", "aaa *bbb\\* ccc" }, // FIXME: XML and brief should be unescaped, missing warning. { { "aaa *bbb\\* ccc* ddd" }, "aaa *bbb\\* ccc* ddd", "aaa bbb\\* ccc ddd" }, // FIXME: XML and brief should be unescaped. // Start-string can be immediately preceded by certain characters. { { "aaa *bbb* ccc" }, "aaa *bbb* ccc", "aaa bbb ccc" }, // Correct. { { "aaa\\ *bbb* ccc" }, "aaa\\ *bbb* ccc", "aaa\\ bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa-*bbb* ccc" }, "aaa-*bbb* ccc", "aaa-bbb ccc" }, // Correct. { { "aaa\\-*bbb* ccc" }, "aaa\\-*bbb* ccc", "aaa\\-bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa:*bbb* ccc" }, "aaa:*bbb* ccc", "aaa:bbb ccc" }, // Correct. { { "aaa\\:*bbb* ccc" }, "aaa\\:*bbb* ccc", "aaa\\:bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa/*bbb* ccc" }, "aaa/*bbb* ccc", "aaa/bbb ccc" }, // Correct. { { "aaa\\/*bbb* ccc" }, "aaa\\/*bbb* ccc", "aaa\\/bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa'*bbb* ccc" }, "aaa'*bbb* ccc", "aaa'bbb ccc" }, // Correct. { { "aaa\\'*bbb* ccc" }, "aaa\\'*bbb* ccc", "aaa\\'bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa\"*bbb* ccc" }, "aaa\"*bbb* ccc", "aaa"bbb ccc" }, // Correct. { { "aaa\\\"*bbb* ccc" }, "aaa\\\"*bbb* ccc", "aaa\\"bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa<*bbb* ccc" }, "aaa<*bbb* ccc", "aaa<bbb ccc" }, // Correct. { { "aaa\\<*bbb* ccc" }, "aaa\\<*bbb* ccc", "aaa\\<bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa(*bbb* ccc" }, "aaa(*bbb* ccc", "aaa(bbb ccc" }, // Correct. { { "aaa\\(*bbb* ccc" }, "aaa\\(*bbb* ccc", "aaa\\(bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa[*bbb* ccc" }, "aaa[*bbb* ccc", "aaa[bbb ccc" }, // Correct. { { "aaa\\[*bbb* ccc" }, "aaa\\[*bbb* ccc", "aaa\\[bbb ccc" }, // FIXME: XML and brief should be unescaped. { { "aaa{*bbb* ccc" }, "aaa{*bbb* ccc", "aaa{bbb ccc" }, // Correct. { { "aaa\\{*bbb* ccc" }, "aaa\\{*bbb* ccc", "aaa\\{bbb ccc" }, // FIXME: XML and brief should be unescaped. // FIXME: same tests as above, but with spaces after 'aaa'. // Other special cases. { { "aaa * *bbb* ccc" }, "aaa * *bbb* ccc", "aaa * bbb ccc" }, // Correct. { { "aaa* *bbb* ccc" }, "aaa* *bbb* ccc", "aaa* bbb ccc" }, // Correct. // FIXME: substitution references sholud be substituted. { { "|aaa|" }, "|aaa|", "|aaa|" }, { { "|aaa|_" }, "|aaa|_", "|aaa|_" }, { { "|aaa|__" }, "|aaa|__", "|aaa|__" }, // FIXME: remove inline markup from brief comments. { { "_`aaa`" }, "_`aaa`", "aaa" }, { { "[1]_" }, "[1]_", "[1]_" }, { { "[12]_" }, "[12]_", "[12]_" }, { { "[#]_" }, "[#]_", "[#]_" }, { { "[#aaa]_" }, "[#aaa]_", "[#aaa]_" }, { { "[*]_" }, "[*]_", "[*]_" }, { { "[aaa]_" }, "[aaa]_", "[aaa]_" }, { { "aaa_" }, "aaa_", "aaa_" }, { { "`aaa`_" }, "`aaa`_", "aaa" }, { { "aaa__" }, "aaa__", "aaa__" }, { { "`aaa`__" }, "`aaa`__", "`aaa`__" }, { { "`aaa `_" }, "`aaa `_", "" "aaa <http://example.org/>" "" }, { { "`aaa `__" }, "`aaa `__", "`aaa <foo.txt\\_>`__" }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, ExtractBriefTest, ::testing::ValuesIn(ExtractBriefTests)); struct ExtractBriefTest_UnicodeSubstitutions : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(ExtractBriefTest_UnicodeSubstitutions, Test) { const auto &Test = GetParam(); std::vector Replacements = { // U+0041 LATIN CAPITAL LETTER A "\x41", // U+0283 LATIN SMALL LETTER ESH "\xca\x83", // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B "\xe4\xbe\x8b", // U+E0100 VARIATION SELECTOR-17 "\xf3\xa0\x84\x80", }; for (auto XReplacement : Replacements) { for (auto YReplacement : Replacements) { for (auto ZReplacement : Replacements) { std::vector InText; for (auto Line : Test.InText) InText.push_back(Line); std::string ExpectedBrief = Test.Brief; std::string ExpectedDocutilsXML = Test.DocutilsXML; replaceText(InText, ExpectedBrief, ExpectedDocutilsXML, "X", XReplacement); replaceText(InText, ExpectedBrief, ExpectedDocutilsXML, "Y", YReplacement); replaceText(InText, ExpectedBrief, ExpectedDocutilsXML, "Z", ZReplacement); checkInlineMarkup(InText, ExpectedBrief, ExpectedDocutilsXML); } } } } struct ExtractBriefTestData ExtractBriefTests_UnicodeSubstitutions[] = { { { "XYZ" }, "XYZ", "XYZ" }, // // Inline markup. // // FIXME: filter inline markup from brief comments. // Every kind of inline markup with 0, 1, 2 and 3 characters between markup // markers. // Emphasis. // REST-FIXME: LLVM-REST-DIFFERENCE: Docutils emits a diagnostic (no // end-string), but it is pointless in this case. { { "**" }, "**", "**" }, // Correct. { { "*X*" }, "*X*", "X" }, // Correct. { { "*XY*" }, "*XY*", "XY" }, // Correct. { { "*XYZ*" }, "*XYZ*", "XYZ" }, // Correct. { { "** aaa" }, "** aaa", "** aaa" }, // Correct. { { "*X* aaa" }, "*X* aaa", "X aaa" }, // Correct. { { "*XY* aaa" }, "*XY* aaa", "XY aaa" }, // Correct. { { "*XYZ* aaa" }, "*XYZ* aaa", "XYZ aaa" }, // Correct. { { "aaa ** bbb" }, "aaa ** bbb", "aaa ** bbb" }, // Correct. { { "aaa *X* bbb" }, "aaa *X* bbb", "aaa X bbb" }, // Correct. { { "aaa *XY* bbb" }, "aaa *XY* bbb", "aaa XY bbb" }, // Correct. { { "aaa *XYZ* bbb" }, "aaa *XYZ* bbb", "aaa XYZ bbb" }, // Correct. { { "aaa *XYZ", "XYZ* bbb" }, "aaa *XYZ XYZ* bbb", "aaa XYZ\nXYZ bbb" }, // Correct. { { "aaa *bbb* ccc *XYZ", "XYZ* ddd *eee* fff" }, "aaa *bbb* ccc *XYZ XYZ* ddd *eee* fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // Correct. { { "aaa *X", "Z* bbb" }, "aaa *X Z* bbb", "aaa X\nZ bbb" }, // Correct. { { "aaa *bbb* ccc *X", "Z* ddd *eee* fff" }, "aaa *bbb* ccc *X Z* ddd *eee* fff", "" "aaa bbb ccc X\n" "Z ddd eee fff" "" }, // Correct. { { "aaa *XYZ", "XYZ bbb*" }, "aaa *XYZ XYZ bbb*", "aaa XYZ\nXYZ bbb" }, // Correct. { { "aaa *bbb* *XYZ", "XYZ ccc*" }, "aaa *bbb* *XYZ XYZ ccc*", "" "aaa bbb XYZ\n" "XYZ ccc" "" }, // Correct. { { "aaa *XYZ", "bbb ccc", "XYZ ddd* eee" }, "aaa *XYZ bbb ccc XYZ ddd* eee", "" "aaa XYZ\n" "bbb ccc\n" "XYZ ddd eee" "" }, // Correct. // FIXME: missing diagnostic (no end-string). { { "aaa *X", "* bbb" }, "aaa *X * bbb", "aaa *X\n* bbb" }, // Correct, missing diagnostic. { { "aaa *", "X* bbb" }, "aaa * X* bbb", "aaa *\nX* bbb" }, // Correct, no diagnostic required. { { "aaa *", "* bbb" }, "aaa * * bbb", "aaa *\n* bbb" }, // Correct, no diagnostic required. // Strong emphasis. { { "****" }, "****", "****" }, // Correct. { { "**X**" }, "**X**", "X" }, // Correct. { { "**XY**" }, "**XY**", "XY" }, // Correct. { { "**XYZ**" }, "**XYZ**", "XYZ" }, // Correct. { { "**** aaa" }, "**** aaa", "**** aaa" }, // Correct. { { "**X** aaa" }, "**X** aaa", "X aaa" }, // Correct. { { "**XY** aaa" }, "**XY** aaa", "XY aaa" }, // Correct. { { "**XYZ** aaa" }, "**XYZ** aaa", "XYZ aaa" }, // Correct. { { "aaa **** bbb" }, "aaa **** bbb", "aaa **** bbb" }, // Correct. { { "aaa **X** bbb" }, "aaa **X** bbb", "aaa X bbb" }, // Correct. { { "aaa **XY** bbb" }, "aaa **XY** bbb", "aaa XY bbb" }, // Correct. { { "aaa **XYZ** bbb" }, "aaa **XYZ** bbb", "aaa XYZ bbb" }, // Correct. { { "aaa **bbb** ccc **XYZ", "XYZ** ddd **eee** fff" }, "aaa **bbb** ccc **XYZ XYZ** ddd **eee** fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // Correct. // Interpreted text. { { "``" }, "``", "``" }, // Correct. { { "`X`" }, "`X`", "X" }, // Correct. { { "`XY`" }, "`XY`", "XY" }, // Correct. { { "`XYZ`" }, "`XYZ`", "XYZ" }, // Correct. { { "`` aaa" }, "`` aaa", "`` aaa" }, // Correct. { { "`X` aaa" }, "`X` aaa", "X aaa" }, // Correct. { { "`XY` aaa" }, "`XY` aaa", "XY aaa" }, // Correct. { { "`XYZ` aaa" }, "`XYZ` aaa", "XYZ aaa" }, // Correct. { { "aaa `` bbb" }, "aaa `` bbb", "aaa `` bbb" }, // Correct. { { "aaa `X` bbb" }, "aaa `X` bbb", "aaa X bbb" }, // Correct. { { "aaa `XY` bbb" }, "aaa `XY` bbb", "aaa XY bbb" }, // Correct. { { "aaa `XYZ` bbb" }, "aaa `XYZ` bbb", "aaa XYZ bbb" }, // Correct. { { "aaa `bbb` ccc `XYZ", "XYZ` ddd `eee` fff" }, "aaa `bbb` ccc `XYZ XYZ` ddd `eee` fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // Correct. // Inline literal. { { "````" }, "````", "````" }, // Correct. { { "``X``" }, "``X``", "X" }, // Correct. { { "``XY``" }, "``XY``", "XY" }, // Correct. { { "``XYZ``" }, "``XYZ``", "XYZ" }, // Correct. { { "```` aaa" }, "```` aaa", "```` aaa" }, // Correct. { { "``X`` aaa" }, "``X`` aaa", "X aaa" }, // Correct. { { "``XY`` aaa" }, "``XY`` aaa", "XY aaa" }, // Correct. { { "``XYZ`` aaa" }, "``XYZ`` aaa", "XYZ aaa" }, // Correct. { { "aaa ```` bbb" }, "aaa ```` bbb", "aaa ```` bbb" }, // Correct. { { "aaa ``X`` bbb" }, "aaa ``X`` bbb", "aaa X bbb" }, // Correct. { { "aaa ``XY`` bbb" }, "aaa ``XY`` bbb", "aaa XY bbb" }, // Correct. { { "aaa ``XYZ`` bbb" }, "aaa ``XYZ`` bbb", "aaa XYZ bbb" }, // Correct. { { "aaa ``bbb`` ccc ``XYZ", "XYZ`` ddd ``eee`` fff" }, "aaa ``bbb`` ccc ``XYZ XYZ`` ddd ``eee`` fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // Correct. // Hyperlink reference. { { "``_" }, "``_", "``_" }, // Correct. { { "`X`_" }, "`X`_", "X" }, // Correct. { { "`XY`_" }, "`XY`_", "XY" }, // Correct. { { "`XYZ`_" }, "`XYZ`_", "XYZ" }, // Correct. { { "``_ aaa" }, "``_ aaa", "``_ aaa" }, // Correct. { { "`X`_ aaa" }, "`X`_ aaa", "X aaa" }, // Correct. { { "`XY`_ aaa" }, "`XY`_ aaa", "XY aaa" }, // Correct. { { "`XYZ`_ aaa" }, "`XYZ`_ aaa", "XYZ aaa" }, // Correct. { { "aaa ``_ bbb" }, "aaa ``_ bbb", "aaa ``_ bbb" }, // Correct. { { "aaa `X`_ bbb" }, "aaa `X`_ bbb", "aaa X bbb" }, // Correct. { { "aaa `XY`_ bbb" }, "aaa `XY`_ bbb", "aaa XY bbb" }, // Correct. { { "aaa `XYZ`_ bbb" }, "aaa `XYZ`_ bbb", "aaa XYZ bbb" }, // Correct. { { "aaa `bbb`_ ccc `XYZ", "XYZ`_ ddd `eee`_ fff" }, "aaa `bbb`_ ccc `XYZ XYZ`_ ddd `eee`_ fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // Correct. // Inline hyperlink target. { { "_``" }, "_``", "_``" }, // Correct. { { "_`X`" }, "_`X`", "X" }, // Correct. { { "_`XY`" }, "_`XY`", "XY" }, // Correct. { { "_`XYZ`" }, "_`XYZ`", "XYZ" }, // Correct. { { "_`` aaa" }, "_`` aaa", "_`` aaa" }, // Correct. { { "_`X` aaa" }, "_`X` aaa", "X aaa" }, // Correct. { { "_`XY` aaa" }, "_`XY` aaa", "XY aaa" }, // Correct. { { "_`XYZ` aaa" }, "_`XYZ` aaa", "XYZ aaa" }, // Correct. { { "aaa _`` bbb" }, "aaa _`` bbb", "aaa _`` bbb" }, // Correct. { { "aaa _`X` bbb" }, "aaa _`X` bbb", "aaa X bbb" }, // Correct. { { "aaa _`XY` bbb" }, "aaa _`XY` bbb", "aaa XY bbb" }, // Correct. { { "aaa _`XYZ` bbb" }, "aaa _`XYZ` bbb", "aaa XYZ bbb" }, // Correct. { { "aaa _`bbb` ccc _`XYZ", "XYZ` ddd _`eee` fff" }, "aaa _`bbb` ccc _`XYZ XYZ` ddd _`eee` fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // Correct. /* FIXME // Footnote reference. { { "[]_" }, "[]_", "[]_" }, // Correct. { { "[X]_" }, "[X]_", "X" }, // Correct. { { "[XY]_" }, "[XY]_", "XY" }, // Correct. { { "[XYZ]_" }, "[XYZ]_", "XYZ" }, // Correct. { { "[]_ aaa" }, "[]_ aaa", "[]_ aaa" }, // Correct. { { "[X]_ aaa" }, "[X]_ aaa", "X aaa" }, // Correct. { { "[XY]_ aaa" }, "[XY]_ aaa", "XY aaa" }, // Correct. { { "[XYZ]_ aaa" }, "[XYZ]_ aaa", "XYZ aaa" }, // Correct. { { "aaa []_ bbb" }, "aaa []_ bbb", "aaa []_ bbb" }, // Correct. { { "aaa [X]_ bbb" }, "aaa [X]_ bbb", "aaa X bbb" }, // Correct. { { "aaa [XY]_ bbb" }, "aaa [XY]_ bbb", "aaa XY bbb" }, // Correct. { { "aaa [XYZ]_ bbb" }, "aaa [XYZ]_ bbb", "aaa XYZ bbb" }, // Correct. { { "aaa [bbb]_ ccc [XYZ", "XYZ]_ ddd [eee]_ fff" }, "aaa [bbb]_ ccc [XYZ XYZ]_ ddd [eee]_ fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // FIXME: verify */ // FIXME: citation reference. // Substitution reference. // FIXME: should resolve substitutions. { { "||" }, "||", "||" }, // Correct. { { "|X|" }, "|X|", "|X|" }, // Wrong. { { "|XY|" }, "|XY|", "|XY|" }, // Wrong. { { "|XYZ|" }, "|XYZ|", "|XYZ|" }, // Wrong. { { "|| aaa" }, "|| aaa", "|| aaa" }, // Wrong. { { "|X| aaa" }, "|X| aaa", "|X| aaa" }, // Wrong. { { "|XY| aaa" }, "|XY| aaa", "|XY| aaa" }, // Wrong. { { "|XYZ| aaa" }, "|XYZ| aaa", "|XYZ| aaa" }, // Wrong. { { "aaa || bbb" }, "aaa || bbb", "aaa || bbb" }, // Wrong. { { "aaa |X| bbb" }, "aaa |X| bbb", "aaa |X| bbb" }, // Wrong. { { "aaa |XY| bbb" }, "aaa |XY| bbb", "aaa |XY| bbb" }, // Wrong. { { "aaa |XYZ| bbb" }, "aaa |XYZ| bbb", "aaa |XYZ| bbb" }, // Wrong. /* { { "aaa |bbb| ccc |XYZ", "XYZ| ddd |eee| fff" }, "aaa |bbb| ccc |XYZ XYZ| ddd |eee| fff", "" "aaa bbb ccc XYZ\n" "XYZ ddd eee fff" "" }, // FIXME: verify */ }; INSTANTIATE_TEST_CASE_P( ReSTTest, ExtractBriefTest_UnicodeSubstitutions, ::testing::ValuesIn(ExtractBriefTests_UnicodeSubstitutions)); struct ExtractBriefTest_UnterminatedInlineMarkup : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(ExtractBriefTest_UnterminatedInlineMarkup, Test) { const auto &Test = GetParam(); std::vector> StartEndStrings = { {"*", "*"}, {"**", "**"}, {"`", "`"}, {"``", "``"}, {"|", "|"}, {"_`", "`"}, {"[", "]_"}, {"`", "`_"}, }; for (auto Start : StartEndStrings) { auto StartString = Start.first; for (auto End : StartEndStrings) { auto EndString = End.second; if (inlineMarkupDelimitersMatch(StartString, EndString)) continue; checkInlineMarkupWithReplacement(Test, StartString, EndString); } } } struct ExtractBriefTestData ExtractBriefTests_UnterminatedInlineMarkup[] = { // Unterminated markup. // FIXME: missing diagnostic (no end-string). { { "*" }, "", "" }, // Correct. { { "**" }, "**", "**" }, // Correct. { { "`" }, "`", "`" }, // Correct. { { "``" }, "``", "``" }, // Correct. { { "|" }, "|", "|" }, // Incorrect: line block. { { "_`" }, "_`", "_`" }, // Correct. { { "[" }, "[", "[" }, // Correct. { { "Sxyz" }, "Sxyz", "Sxyz" }, { { "SxyzE" }, "SxyzE", "SxyzE" }, { { "Sxyz aaa" }, "Sxyz aaa", "Sxyz aaa" }, { { "SxyzE aaa" }, "SxyzE aaa", "SxyzE aaa" }, { { "aaa SxyzE bbb" }, "aaa SxyzE bbb", "aaa SxyzE bbb" }, { { "SxyzE aaa", "bbbE ccc" }, "SxyzE aaa bbbE ccc", "SxyzE aaa\nbbbE ccc" }, { { "aaa SxyzE bbb", "cccE ddd" }, "aaa SxyzE bbb cccE ddd", "aaa SxyzE bbb\ncccE ddd" }, { { "aaa bbb", "SxyzE cccE" }, "aaa bbb SxyzE cccE", "aaa bbb\nSxyzE cccE" }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, ExtractBriefTest_UnterminatedInlineMarkup, ::testing::ValuesIn(ExtractBriefTests_UnterminatedInlineMarkup)); struct ExtractBriefTest_NestedMarkup : public ReSTTest, public ::testing::WithParamInterface {}; TEST_P(ExtractBriefTest_NestedMarkup, Test) { const auto &Test = GetParam(); std::vector> StartEndStrings = { {"*", "*"}, {"**", "**"}, {"`", "`"}, {"``", "``"}, {"|", "|"}, {"_`", "`"}, {"[", "]_"}, {"`", "`_"}, }; for (auto StartEnd : StartEndStrings) { auto StartString = StartEnd.first; auto EndString = StartEnd.second; if (StringRef(Test.InText.front()).startswith(StartString) || StringRef(Test.InText.back()).endswith(EndString)) return; checkInlineMarkupWithReplacement(Test, StartString, EndString); } } struct ExtractBriefTestData ExtractBriefTests_NestedMarkup[] = { // Nested inline markup should not be recognized. { { "*aaa SxyzE bbb*" }, "*aaa SxyzE bbb*", "aaa SxyzE bbb" }, { { "*aaa S bbb*" }, "*aaa S bbb*", "aaa S bbb" }, { { "*aaa E bbb*" }, "*aaa E bbb*", "aaa E bbb" }, { { "aaa S *bbb* ccc" }, "aaa S *bbb* ccc", "aaa S bbb ccc" }, { { "aaaS *bbb* ccc" }, "aaaS *bbb* ccc", "aaaS bbb ccc" }, { { "**aaa SxyzE bbb**" }, "**aaa SxyzE bbb**", "aaa SxyzE bbb" }, { { "`aaa SxyzE bbb`" }, "`aaa SxyzE bbb`", "aaa SxyzE bbb" }, { { "``aaa SxyzE bbb``" }, "``aaa SxyzE bbb``", "aaa SxyzE bbb" }, { { "`aaa SxyzE bbb`_" }, "`aaa SxyzE bbb`_", "aaa SxyzE bbb" }, { { "_`aaa SxyzE bbb`" }, "_`aaa SxyzE bbb`", "aaa SxyzE bbb" }, { { "[aaa SxyzE bbb]_" }, "[aaa SxyzE bbb]_", "[aaa SxyzE bbb]_" }, // FIXME: citation reference. { { "|aaa SxyzE bbb|" }, "|aaa SxyzE bbb|", "|aaa SxyzE bbb|" }, }; INSTANTIATE_TEST_CASE_P( ReSTTest, ExtractBriefTest_NestedMarkup, ::testing::ValuesIn(ExtractBriefTests_NestedMarkup)); TEST_F(ReSTTest, ExtractWord_LinePart) { auto ToLinePart = [&](StringRef S) { return LinePart{S, SM.registerLine(S, 0)}; }; { LinePart LP = ToLinePart(""); EXPECT_FALSE(extractWord(LP).hasValue()); } { LinePart LP = ToLinePart("a"); auto R = extractWord(LP).getValue(); EXPECT_EQ("a", R.first.Text); EXPECT_EQ("", R.second.Text); } { LinePart LP = ToLinePart("abc"); auto R = extractWord(LP).getValue(); EXPECT_EQ("abc", R.first.Text); EXPECT_EQ("", R.second.Text); } { LinePart LP = ToLinePart("a "); auto R = extractWord(LP).getValue(); EXPECT_EQ("a", R.first.Text); EXPECT_EQ("", R.second.Text); } { LinePart LP = ToLinePart("abc d"); auto R = extractWord(LP).getValue(); EXPECT_EQ("abc", R.first.Text); EXPECT_EQ("d", R.second.Text); } { LinePart LP = ToLinePart("abc \td"); auto R = extractWord(LP).getValue(); EXPECT_EQ("abc", R.first.Text); EXPECT_EQ("d", R.second.Text); } } // Tests for bullet lists: // // "* aaa" // "+ bbb" // error: bullet list (*) ends without a blank line // // "* aaa" // " * bbb" // ok: [bullet list "aaa"], [block quote [bullet list "bbb"]] // // "* aaa" // " * bbb" // ok: [bullet list "aaa * bbb"] // // "* aaa" // " * bbb" // ok: [bullet list (text "aaa", block quote with [list "bbb"])] // // "aaa" // "* bbb" // ok: plain text // // "* aaa" // "" // "* bbb" // "" // " * ccc" // ok: bullet list ("aaa", "bbb"), followed by a block quote with [list "ccc"] // // "* aaa" // "" // " * ccc" // ok: bullet list ("aaa", [bullet list "bbb"]) // Bullet lists without text immediately after the bullet: // // "* " // "aaa" // warning: unexpected unindent // // "* " // " aaa" // ok: bullet list item with text "aaa" // note: the text is on the *next* column after the bullet. // // "* " // " aaa" // ok: bullet list item with text "aaa" // // "* " // " aaa" // " bbb" // ok: bullet list item with text "aaa bbb" // // "* " // " aaa" // " bbb" // ok: bullet list item with (text "aaa" + block quote "bbb") // // REST-FIXME: arguably, this is a bug ether in docutils, or in the spec. // According to [ReST/Syntax Details/Body Elements/Bullet Lists], the bullet // character should be immediately followed by whitespace. In order to avoid // requiring trailing whitespace to make empty list items, it makes sense to // relax the rule here. // // "*" // "" // "aaa" // ok: bullet list with one empty item, paragraph with text "aaa" // // "* " // "" // " bbb" // docutils: bullet list item with text "bbb" // REST-FIXME: the standard does not say anything in specifically about this, // but it does look weird, and there might be an ambiguity with block quotes. // Compare the example above to: // "* aaa" // "" // " bbb" // Here, "bbb" is clearly a block quote nested in a list item. // Also, [ReST/Syntax Details/Whitespace/Blank Lines] says: "Blank lines are // used to separate paragraphs and other elements." // // LLVM-REST-DIFFERENCE: For now, LLVM ReST will not use a strict reading of // the standard and will recognize a list item if the bullet is followed by a // newline. This allows one to strip trailing whitespace without affecting // semantics. // // LLVM-REST-DIFFERENCE: "with no blank lines in between" part in the text // below. // // REST-FIXME: If the line with the bullet does not have text, the text is // allowed to start on the next line (with no blank lines in between), however, // it should be indented relative to the bullet. // // "*" // // REST-FIXME: clarify the spec: if the line with the bullet does not have // text, and it is followed by an empty line, then it is an empty bullet item. // Tests for enumerated lists: // // "aaa" // "(1) bbb" // ok: plain text // // "1. aaa" // "" // "1. aaa" // ok: two lists // // "1. aaa" // "" // "2. aaa" // ok: one list // // "1. aaa" // "" // "3. aaa" // ok: two lists // warning: list starts with non-ordinal-1 enumerator // // "2. aaa" // "" // "3. aaa" // ok: one list // warning: list starts with non-ordinal-1 enumerator // // "1. aaa" // "" // "2) aaa" // ok: two lists // warning: list starts with non-ordinal-1 enumerator // // "1." // "2." // "3." // ok: plain text // REST-FIXME: this is inconsistent with bullet lists. If it was a bullet // list, then it would be parsed as three list items. // // "(1)" // "(2) a" // ok: list with two items // // "(1)" // "(2) a" // "(3)" // ok: list with one empty item "(1)", and a plain text paragraph + // warning about list ending without a blank line. // REST-FIXME: when compared with previous example, this behavior is // surprising. // // "1. a" // "" // "2." // "3. c" // ok: list with three items. // // REST-FIXME: it looks like an empty list item in an enumerated list is // allowed if it is preceeded by an empty line. // REST-FIXME: this is inconsistent with bullet lists. // // "(Ii) aaa" // ok: plain text // // "(iI) aaa" // ok: plain text // // "(v) aaa" // ok: list that starts with 22 // warning: list starts with non-ordinal-1 enumerator // // "(iv) a" // "(v) a" // ok: list that starts with 4 // warning: list starts with non-ordinal-1 enumerator // // "(v) a" // "(vi) a" // ok: plain text // REST-FIXME: this should at least emit a warning. // // "(v) a" // "(#) a" // ok: list that starts with 22 // warning: list starts with non-ordinal-1 enumerator // // "(v) a" // "23. a" // ok: plain text // REST-FIXME: this should at least emit a warning. // // "(v) a" // "" // "(vi) a" // ok: two lists // x2 warning: list starts with non-ordinal-1 enumerator // // "(v) a" // "a" // ok: plain text // // "(v) a" // " a" // ok: list + paragraph // warning: list ends without a blank line // // "(1) a" // "(2) b" // "c" // ok: list "a" + paragraph "(2) b c" // warning: list ends without a blank line // // "(1) a" // "(3) b" // "c" // ok: plain text // REST-FIXME: this should at least emit a warning. // // // "1. a" // "2." // " aaa" // "3. b" // ok: paragraph "1. a 2.", paragraph "aaa", list with one item "b" // warning: unexpected indentation (about "aaa") // warning: unexpected unindent, block quote ends without a blank line // // REST-FIXME: clarify exactly how the check on the next line is performed. It // looks like the exact rules are as follows. // // def: A NEELEL (non-empty enumerated list item line) is a line that has an // enumerator, followed by whitespace, followed by non-empty text. // // a. if the next line is blank, then current line is a list item. // // b. if the next line is on the same indentation level as the enumerator: // b.1. if the next line is a NEELEL, and enumeration sequence continues // without a gap (or the next line uses the auto-enumerator '#'), and the // enumeration sequence style and formatting match exactly, then *current line* // is a list item. (The check on the next line is performed separately.) // // b.2. if the next line is a NEELEL, but other conditions to continue the list // are not met, then current line is the start of a paragraph. // REST-FIXME: in this case the implementation should emit a warning. // // b.3. if the next line is not a NEELEL, then current line is the start of a // paragraph. // // c. if the next line has the same or greater amount of indentation as the // text after the enumerator, then the current line is a list item, text inside // the list item is subject to normal rules. // // d. if the next line has more indentation than the enumerator, but less than // the item text, then the current line is a list item, and the list ends // there. The next line is subject to normal rules (will be parsed as a block // quote). This case requires a diagnostic. Docutils uses the wording // "Enumerated list ends without a blank line". // // e. if the next line has less indentation than the enumerator, then the // current line is a list item, and the list ends there. The next line is subject // to normal rules, and should be matched with one of the containing blocks. // // Note: in the rules above it is essential that NEELEL has non-empty text, // because if it was empty, it would fail the previous-line check, and would // not be considered a list item. // // REST-FIXME: if the current line looks like an enumerated list item, but it // does not have text after the enumerator, then it needs to pass checks both // for previous and next lines. // // "0. a" // "1. b" // docutils/LLVM-REST: a list with two items. // REST-FIXME: the spec seems to disallow this implicitly, by presenting the // arabic numeral sequence as "arabic numerals: 1, 2, 3, ... (no upper limit)." // Probably this should be disallowed, but to simplify the implementation, we // allow it until the spec is clarified. // // "-1. a" // ok: plain text // Tests for field lists: // // :foo: bar // :*foo*: bar // :**foo**: bar // :`foo`: bar // :``foo``: bar // ok: as expected, inline markup is inside the field name // // :*foo: bar*: baz // :**foo: bar**: baz // :`foo: bar`: baz // :``foo: bar``: baz // error: inline markup start without end in field name // Field name only spans until "foo:". // // :*foo\: bar*: baz // :**foo\: bar**: baz // :`foo\: bar`: baz // ok: field name is "foo: bar" text with inline markup // // :``foo\: bar``: baz // ok: field name is "foo\: bar" text with inline markup // REST-FIXME: How do we express a field name with a colon inside ``...``, but // without getting a backslash in the output? // // :foo\: bar // ok: not a bullet list, text, literally ":foo: bar" // // :foo\ // ok: text ":foo" // Make sure we don't crash on this, trying to access the escaped character // after the end of the line. // // "aaa" // ":foo: bbb" // ok: plain text // Tests for block quotes: // // "* aaa" // "" // " bbb" // docutils: [list "aaa"] [blockquote [paragraph "bbb"]] // // ":aaa: bbb" // " ccc" // "" // " ddd" // docutils: [field list ["aaa", [definition list ]] [paragraph "ddd"]], // // Notice the inconsistency above. // REST-FIXME: both should be blockquotes, or blockquotes at the beginning of // every element should be disallowed. // Misc points: // [ReST/Syntax Details/Body Elements/Field Lists] // Quote: // Field names are case-insensitive when further processed or transformed. // // REST-FIXME: clarify what exactly this means for Unicode. A reasonable thing // would be to say that the above point only applies if the field name is // ASCII-only. // Test ideas for inline markup: // // * aaa* bbb no // // aaa * bbb* ccc no // aaa\ * bbb* ccc no // aaa-* bbb* ccc no // // aaa*bbb* ccc no // aaa\a*bbb* ccc no // aaa\*bbb* ccc no // aaa\\*bbb* ccc no // aaa\**bbb* ccc no // aaa \*bbb* ccc no // aaa +*bbb* ccc no // aaa >*bbb* ccc no // aaa )*bbb* ccc no // aaa ]*bbb* ccc no // aaa }*bbb* ccc no // aaa =*bbb* ccc no // aaa ~*bbb* ccc no // aaa @*bbb* ccc no // aaa #*bbb* ccc no // aaa $*bbb* ccc no // aaa %*bbb* ccc no // aaa ^*bbb* ccc no // aaa &*bbb* ccc no // aaa .*bbb* ccc no // aaa ,*bbb* ccc no // aaa ?*bbb* ccc no // aaa ;*bbb* ccc no // aaa **bbb* ccc no // warning: Inline strong start-string without end-string. // ... and same for Unicode // // same tests, but for characters that follow, "*aaa*\ bbb" // // aaa '*' ccc no // aaa "*" ccc no // aaa <*> ccc no // aaa (*) ccc no // aaa [*] ccc no // aaa {*} ccc no // ... and same for Unicode // // Test footnote and citation references when footnote text is non-ASCII.