Revert "Revert "[Regex] Switch regex match to Swift tuples.""

This commit is contained in:
Richard Wei
2022-02-11 01:32:45 -08:00
committed by GitHub
parent f75ed39f97
commit 1b83efb512
12 changed files with 41 additions and 94 deletions

View File

@@ -367,12 +367,6 @@ private:
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;
/// The maximum arity of `_StringProcessing.Tuple{n}`.
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
/// Cached `_StringProcessing.Tuple{n}` declarations.
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
StringProcessingTupleDecls;
/// Retrieve the allocator for the given arena.
llvm::BumpPtrAllocator &
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
@@ -629,14 +623,6 @@ public:
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
/// instantiated for.
unsigned getStringProcessingTupleDeclMaxArity() const;
/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
/// arity.
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
FuncDecl *getLessThanIntDecl() const;

View File

@@ -4834,9 +4834,6 @@ ERROR(string_processing_lib_missing,none,
ERROR(regex_capture_types_failed_to_decode,none,
"failed to decode capture types for regular expression literal; this may "
"be a compiler bug", ())
ERROR(regex_too_many_captures,none,
"too many captures in regular expression literal; the current limit is "
"%0", (unsigned))
//------------------------------------------------------------------------------
// MARK: Type Check Types

View File

@@ -49,6 +49,5 @@ KNOWN_SDK_TYPE_DECL(Distributed, RemoteCallTarget, StructDecl, 0)
// String processing
KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1)
KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0)
#undef KNOWN_SDK_TYPE_DECL

View File

@@ -1225,29 +1225,6 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
return ConcreteDeclRef(foundDecl, subs);
}
unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
return StringProcessingTupleDeclMaxArity;
}
StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
assert(arity >= 2);
if (arity > StringProcessingTupleDeclMaxArity)
return nullptr;
if (StringProcessingTupleDecls.empty())
StringProcessingTupleDecls.append(
StringProcessingTupleDeclMaxArity - 1, nullptr);
auto &decl = StringProcessingTupleDecls[arity - 2];
if (decl)
return decl;
SmallVector<ValueDecl *, 1> results;
auto *spModule = getLoadedModule(Id_StringProcessing);
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
spModule->lookupQualified(
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
assert(results.size() == 1);
return (decl = cast<StructDecl>(results[0]));
}
static
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
FuncDecl *&cached) {

View File

@@ -1267,27 +1267,20 @@ namespace {
ctx.Id_Regex.str());
return Type();
}
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
SmallVector<TupleTypeElt, 4> matchElements {ctx.getSubstringType()};
if (decodeRegexCaptureTypes(ctx,
E->getSerializedCaptureStructure(),
/*atomType*/ ctx.getSubstringType(),
matchTypes)) {
matchElements)) {
ctx.Diags.diagnose(E->getLoc(),
diag::regex_capture_types_failed_to_decode);
return Type();
}
if (matchTypes.size() == 1)
if (matchElements.size() == 1)
return BoundGenericStructType::get(
regexDecl, Type(), matchTypes.front());
// Form a `_StringProcessing.Tuple{n}<...>`.
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
if (!tupleDecl) {
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
ctx.getStringProcessingTupleDeclMaxArity() - 1);
return Type();
}
auto matchType = BoundGenericStructType::get(
tupleDecl, Type(), matchTypes);
regexDecl, Type(), matchElements.front().getType());
// Form a tuple.
auto matchType = TupleType::get(matchElements, ctx);
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
}

View File

@@ -33,7 +33,7 @@ using namespace swift;
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
ArrayRef<uint8_t> serialization,
Type atomType,
SmallVectorImpl<Type> &result) {
SmallVectorImpl<TupleTypeElt> &result) {
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
static const Version implVersion = 1;
unsigned size = serialization.size();
@@ -46,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
if (version != implVersion)
return true;
// Read contents.
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
unsigned offset = sizeof(Version);
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
auto rawValue = serialization[offset];
@@ -73,22 +73,21 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
if (length >= size - offset)
return true; // Unterminated string.
StringRef name(namePtr, length);
// The name is currently unused becuase we are forming a nominal
// `Tuple{n}` type. We will switch back to native tuples when there is
// variadic generics.
(void)name;
scopes.back().push_back(atomType);
scopes.back().push_back(
TupleTypeElt(atomType, ctx.getIdentifier(name)));
offset += length + /*NUL*/ 1;
break;
}
case RegexCaptureStructureCode::FormArray: {
auto &type = scopes.back().back();
type = ArraySliceType::get(type);
auto &element = scopes.back().back();
element = TupleTypeElt(ArraySliceType::get(element.getType()),
element.getName());
break;
}
case RegexCaptureStructureCode::FormOptional: {
auto &type = scopes.back().back();
type = OptionalType::get(type);
auto &element = scopes.back().back();
element = TupleTypeElt(OptionalType::get(element.getType()),
element.getName());
break;
}
case RegexCaptureStructureCode::BeginTuple:
@@ -96,10 +95,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
break;
case RegexCaptureStructureCode::EndTuple: {
auto children = scopes.pop_back_val();
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
return true;
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
auto type = TupleType::get(children, ctx);
scopes.back().push_back(type);
break;
}

View File

@@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
bool decodeRegexCaptureTypes(ASTContext &ctx,
llvm::ArrayRef<uint8_t> serialization,
Type atomType,
llvm::SmallVectorImpl<Type> &result);
llvm::SmallVectorImpl<TupleTypeElt> &result);
} // end namespace swift

View File

@@ -4,7 +4,8 @@
_ = '/abc/'
_ = ('/[*/', '/+]/', '/.]/')
// expected-error@-1 {{cannot parse regular expression}}
// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}}
// expected-error@-2 {{cannot parse regular expression: expected ']'}}
_ = '/\w+/'
_ = '/\'\\/'

View File

@@ -47,7 +47,7 @@ RegexBasicTests.test("Captures") {
"""
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
// Test inferred type.
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
let _: Regex<(Substring, Substring, Substring?, Substring)>.Type
= type(of: regex)
let match1 = input.expectMatch(regex)
expectEqual(input[...], input[match1.range])

View File

@@ -10,5 +10,5 @@ var s = '/abc/'
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>

View File

@@ -10,41 +10,39 @@ takesRegex('//') // okay
let r1 = '/.(.)/'
// Note: We test its type with a separate statement so that we know the type
// checker inferred the regex's type independently without contextual types.
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r1)
let _: Regex<(Substring, Substring)>.Type = type(of: r1)
struct S: MatchProtocol {
typealias Capture = Substring
}
// expected-error @+2 {{cannot assign value of type 'Regex<Tuple2<Substring, Substring>>' to type 'Regex<S>'}}
// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2<Substring, Substring>' and 'S') are expected to be equal}}
struct S {}
// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex<S>'}}
// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}}
let r2: Regex<S> = '/.(.)/'
let r3 = '/(.)(.)/'
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r3)
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3)
let r4 = '/(?<label>.)(.)/'
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r4)
let _: Regex<(Substring, label: Substring, Substring)>.Type = type(of: r4)
let r5 = '/(.(.(.)))/'
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r5)
let _: Regex<(Substring, Substring, Substring, Substring)>.Type = type(of: r5)
let r6 = '/(?'we'.(?'are'.(?'regex'.)))/'
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r6)
let r6 = '/(?'we'.(?'are'.(?'regex'.)+)?)/'
let _: Regex<(Substring, we: Substring, are: Substring?, regex: [Substring]?)>.Type = type(of: r6)
let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
// ^ 1
// ^ 2
// ^ 3
let _: Regex<Tuple4<Substring, [Substring]?, [Substring?]?, [[Substring]?]?>>.Type = type(of: r7)
let _: Regex<(Substring, [Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r8)
let _: Regex<(Substring, theres_no_single_element_tuple_what_can_we: Substring)>.Type = type(of: r8)
let r9 = '/(a)|(b)|(c)|d/'
let _: Regex<Tuple4<Substring, Substring?, Substring?, Substring?>>.Type = type(of: r9)
let _: Regex<(Substring, Substring?, Substring?, Substring?)>.Type = type(of: r9)
let r10 = '/(a)|b/'
let _: Regex<Tuple2<Substring, Substring?>>.Type = type(of: r10)
let _: Regex<(Substring, Substring?)>.Type = type(of: r10)
// expected-error @+1 {{too many captures in regular expression literal; the current limit is 7}}
let r11 = '/()()()()()()()()/' // 8 captures, too many for our prototype
let r11 = '/()()()()()()()()/'
let _: Regex<(Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring)>.Type = type(of: r11)

View File

@@ -123,7 +123,7 @@
"swift-cmark-gfm": "gfm",
"swift-nio": "2.31.2",
"swift-nio-ssl": "2.15.0",
"swift-experimental-string-processing": "dev/5"
"swift-experimental-string-processing": "dev/6"
}
},
"rebranch": {
@@ -157,7 +157,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/5"
"swift-experimental-string-processing": "dev/6"
}
},
"release/5.6": {
@@ -308,7 +308,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/5"
"swift-experimental-string-processing": "dev/6"
}
},
"release/5.4": {