[InterfacePrinting] Move utf8 sanitizer from SourceKit to ASTPrinter.

We sanitize comments by replacing invalid utf8 subsequence with � .

Swift SVN r30031
This commit is contained in:
Xi Ge
2015-07-09 18:26:02 +00:00
parent 16c8a3b6c0
commit 1eac2874b1
3 changed files with 32 additions and 3 deletions

View File

@@ -87,6 +87,9 @@ public:
PendingDeclLocCallback = D;
}
/// To sanitize a malformatted utf8 string to a well-formatted one.
static std::string sanitizeUtf8(StringRef Text);
private:
virtual void anchor();
};

View File

@@ -36,12 +36,38 @@
#include "clang/AST/Decl.h"
#include "clang/Basic/Module.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SaveAndRestore.h"
#include <algorithm>
using namespace swift;
std::string ASTPrinter::sanitizeUtf8(StringRef Text) {
llvm::SmallString<256> Builder;
Builder.reserve(Text.size());
const UTF8* Data = reinterpret_cast<const UTF8*>(Text.begin());
const UTF8* End = reinterpret_cast<const UTF8*>(Text.end());
StringRef Replacement = "\ufffd";
while (Data < End) {
auto Step = getNumBytesForUTF8(*Data);
if (Data + Step > End) {
Builder.append(Replacement);
break;
}
if (isLegalUTF8Sequence(Data, Data + Step)) {
Builder.append(Data, Data + Step);
} else {
// If malformatted, add replacement characters.
Builder.append(Replacement);
}
Data += Step;
}
return Builder.str();
}
void ASTPrinter::anchor() {}
void ASTPrinter::printIndent() {
@@ -186,7 +212,7 @@ class PrintAST : public ASTVisitor<PrintAST> {
trimLeadingWhitespaceFromLines(RawText, WhitespaceToTrim, Lines);
for (auto Line : Lines) {
Printer << Line;
Printer << ASTPrinter::sanitizeUtf8(Line);
Printer.printNewline();
}
}

View File

@@ -505,7 +505,7 @@ void ClangCommentPrinter::printDeclPre(const Decl *D) {
void ClangCommentPrinter::printDeclPost(const Decl *D) {
OtherPrinter.printDeclPost(D);
for (auto CommentText : PendingComments) {
*this << " " << CommentText;
*this << " " << ASTPrinter::sanitizeUtf8(CommentText);
}
PendingComments.clear();
if (auto ClangN = D->getClangNode())
@@ -596,7 +596,7 @@ void ClangCommentPrinter::printComment(StringRef RawText, unsigned StartCol) {
trimLeadingWhitespaceFromLines(RawText, WhitespaceToTrim, Lines);
for (auto Line : Lines) {
*this << Line << "\n";
*this << ASTPrinter::sanitizeUtf8(Line) << "\n";
printIndent();
}
}