mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Revert "Revert "[Regex] Switch regex match to Swift tuples.""
This commit is contained in:
@@ -367,12 +367,6 @@ private:
|
||||
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
|
||||
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;
|
||||
|
||||
/// The maximum arity of `_StringProcessing.Tuple{n}`.
|
||||
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
|
||||
/// Cached `_StringProcessing.Tuple{n}` declarations.
|
||||
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
|
||||
StringProcessingTupleDecls;
|
||||
|
||||
/// Retrieve the allocator for the given arena.
|
||||
llvm::BumpPtrAllocator &
|
||||
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
|
||||
@@ -629,14 +623,6 @@ public:
|
||||
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
|
||||
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
|
||||
|
||||
/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
|
||||
/// instantiated for.
|
||||
unsigned getStringProcessingTupleDeclMaxArity() const;
|
||||
|
||||
/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
|
||||
/// arity.
|
||||
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;
|
||||
|
||||
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
|
||||
FuncDecl *getLessThanIntDecl() const;
|
||||
|
||||
|
||||
@@ -4834,9 +4834,6 @@ ERROR(string_processing_lib_missing,none,
|
||||
ERROR(regex_capture_types_failed_to_decode,none,
|
||||
"failed to decode capture types for regular expression literal; this may "
|
||||
"be a compiler bug", ())
|
||||
ERROR(regex_too_many_captures,none,
|
||||
"too many captures in regular expression literal; the current limit is "
|
||||
"%0", (unsigned))
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// MARK: Type Check Types
|
||||
|
||||
@@ -49,6 +49,5 @@ KNOWN_SDK_TYPE_DECL(Distributed, RemoteCallTarget, StructDecl, 0)
|
||||
|
||||
// String processing
|
||||
KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1)
|
||||
KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0)
|
||||
|
||||
#undef KNOWN_SDK_TYPE_DECL
|
||||
|
||||
@@ -1225,29 +1225,6 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
|
||||
return ConcreteDeclRef(foundDecl, subs);
|
||||
}
|
||||
|
||||
unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
|
||||
return StringProcessingTupleDeclMaxArity;
|
||||
}
|
||||
|
||||
StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
|
||||
assert(arity >= 2);
|
||||
if (arity > StringProcessingTupleDeclMaxArity)
|
||||
return nullptr;
|
||||
if (StringProcessingTupleDecls.empty())
|
||||
StringProcessingTupleDecls.append(
|
||||
StringProcessingTupleDeclMaxArity - 1, nullptr);
|
||||
auto &decl = StringProcessingTupleDecls[arity - 2];
|
||||
if (decl)
|
||||
return decl;
|
||||
SmallVector<ValueDecl *, 1> results;
|
||||
auto *spModule = getLoadedModule(Id_StringProcessing);
|
||||
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
|
||||
spModule->lookupQualified(
|
||||
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
|
||||
assert(results.size() == 1);
|
||||
return (decl = cast<StructDecl>(results[0]));
|
||||
}
|
||||
|
||||
static
|
||||
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
|
||||
FuncDecl *&cached) {
|
||||
|
||||
@@ -1267,27 +1267,20 @@ namespace {
|
||||
ctx.Id_Regex.str());
|
||||
return Type();
|
||||
}
|
||||
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
|
||||
SmallVector<TupleTypeElt, 4> matchElements {ctx.getSubstringType()};
|
||||
if (decodeRegexCaptureTypes(ctx,
|
||||
E->getSerializedCaptureStructure(),
|
||||
/*atomType*/ ctx.getSubstringType(),
|
||||
matchTypes)) {
|
||||
matchElements)) {
|
||||
ctx.Diags.diagnose(E->getLoc(),
|
||||
diag::regex_capture_types_failed_to_decode);
|
||||
return Type();
|
||||
}
|
||||
if (matchTypes.size() == 1)
|
||||
if (matchElements.size() == 1)
|
||||
return BoundGenericStructType::get(
|
||||
regexDecl, Type(), matchTypes.front());
|
||||
// Form a `_StringProcessing.Tuple{n}<...>`.
|
||||
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
|
||||
if (!tupleDecl) {
|
||||
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
|
||||
ctx.getStringProcessingTupleDeclMaxArity() - 1);
|
||||
return Type();
|
||||
}
|
||||
auto matchType = BoundGenericStructType::get(
|
||||
tupleDecl, Type(), matchTypes);
|
||||
regexDecl, Type(), matchElements.front().getType());
|
||||
// Form a tuple.
|
||||
auto matchType = TupleType::get(matchElements, ctx);
|
||||
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ using namespace swift;
|
||||
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
|
||||
ArrayRef<uint8_t> serialization,
|
||||
Type atomType,
|
||||
SmallVectorImpl<Type> &result) {
|
||||
SmallVectorImpl<TupleTypeElt> &result) {
|
||||
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
|
||||
static const Version implVersion = 1;
|
||||
unsigned size = serialization.size();
|
||||
@@ -46,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
|
||||
if (version != implVersion)
|
||||
return true;
|
||||
// Read contents.
|
||||
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
|
||||
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
|
||||
unsigned offset = sizeof(Version);
|
||||
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
|
||||
auto rawValue = serialization[offset];
|
||||
@@ -73,22 +73,21 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
|
||||
if (length >= size - offset)
|
||||
return true; // Unterminated string.
|
||||
StringRef name(namePtr, length);
|
||||
// The name is currently unused becuase we are forming a nominal
|
||||
// `Tuple{n}` type. We will switch back to native tuples when there is
|
||||
// variadic generics.
|
||||
(void)name;
|
||||
scopes.back().push_back(atomType);
|
||||
scopes.back().push_back(
|
||||
TupleTypeElt(atomType, ctx.getIdentifier(name)));
|
||||
offset += length + /*NUL*/ 1;
|
||||
break;
|
||||
}
|
||||
case RegexCaptureStructureCode::FormArray: {
|
||||
auto &type = scopes.back().back();
|
||||
type = ArraySliceType::get(type);
|
||||
auto &element = scopes.back().back();
|
||||
element = TupleTypeElt(ArraySliceType::get(element.getType()),
|
||||
element.getName());
|
||||
break;
|
||||
}
|
||||
case RegexCaptureStructureCode::FormOptional: {
|
||||
auto &type = scopes.back().back();
|
||||
type = OptionalType::get(type);
|
||||
auto &element = scopes.back().back();
|
||||
element = TupleTypeElt(OptionalType::get(element.getType()),
|
||||
element.getName());
|
||||
break;
|
||||
}
|
||||
case RegexCaptureStructureCode::BeginTuple:
|
||||
@@ -96,10 +95,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
|
||||
break;
|
||||
case RegexCaptureStructureCode::EndTuple: {
|
||||
auto children = scopes.pop_back_val();
|
||||
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
|
||||
return true;
|
||||
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
|
||||
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
|
||||
auto type = TupleType::get(children, ctx);
|
||||
scopes.back().push_back(type);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
|
||||
bool decodeRegexCaptureTypes(ASTContext &ctx,
|
||||
llvm::ArrayRef<uint8_t> serialization,
|
||||
Type atomType,
|
||||
llvm::SmallVectorImpl<Type> &result);
|
||||
llvm::SmallVectorImpl<TupleTypeElt> &result);
|
||||
|
||||
} // end namespace swift
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
_ = '/abc/'
|
||||
|
||||
_ = ('/[*/', '/+]/', '/.]/')
|
||||
// expected-error@-1 {{cannot parse regular expression}}
|
||||
// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}}
|
||||
// expected-error@-2 {{cannot parse regular expression: expected ']'}}
|
||||
|
||||
_ = '/\w+/'
|
||||
_ = '/\'\\/'
|
||||
|
||||
@@ -47,7 +47,7 @@ RegexBasicTests.test("Captures") {
|
||||
"""
|
||||
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
|
||||
// Test inferred type.
|
||||
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
|
||||
let _: Regex<(Substring, Substring, Substring?, Substring)>.Type
|
||||
= type(of: regex)
|
||||
let match1 = input.expectMatch(regex)
|
||||
expectEqual(input[...], input[match1.range])
|
||||
|
||||
@@ -10,5 +10,5 @@ var s = '/abc/'
|
||||
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
|
||||
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]
|
||||
|
||||
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
|
||||
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
|
||||
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
|
||||
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
|
||||
|
||||
@@ -10,41 +10,39 @@ takesRegex('//') // okay
|
||||
let r1 = '/.(.)/'
|
||||
// Note: We test its type with a separate statement so that we know the type
|
||||
// checker inferred the regex's type independently without contextual types.
|
||||
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r1)
|
||||
let _: Regex<(Substring, Substring)>.Type = type(of: r1)
|
||||
|
||||
struct S: MatchProtocol {
|
||||
typealias Capture = Substring
|
||||
}
|
||||
// expected-error @+2 {{cannot assign value of type 'Regex<Tuple2<Substring, Substring>>' to type 'Regex<S>'}}
|
||||
// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2<Substring, Substring>' and 'S') are expected to be equal}}
|
||||
struct S {}
|
||||
// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex<S>'}}
|
||||
// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}}
|
||||
let r2: Regex<S> = '/.(.)/'
|
||||
|
||||
let r3 = '/(.)(.)/'
|
||||
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r3)
|
||||
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3)
|
||||
|
||||
let r4 = '/(?<label>.)(.)/'
|
||||
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r4)
|
||||
let _: Regex<(Substring, label: Substring, Substring)>.Type = type(of: r4)
|
||||
|
||||
let r5 = '/(.(.(.)))/'
|
||||
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r5)
|
||||
let _: Regex<(Substring, Substring, Substring, Substring)>.Type = type(of: r5)
|
||||
|
||||
let r6 = '/(?'we'.(?'are'.(?'regex'.)))/'
|
||||
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r6)
|
||||
let r6 = '/(?'we'.(?'are'.(?'regex'.)+)?)/'
|
||||
let _: Regex<(Substring, we: Substring, are: Substring?, regex: [Substring]?)>.Type = type(of: r6)
|
||||
|
||||
let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
|
||||
// ^ 1
|
||||
// ^ 2
|
||||
// ^ 3
|
||||
let _: Regex<Tuple4<Substring, [Substring]?, [Substring?]?, [[Substring]?]?>>.Type = type(of: r7)
|
||||
let _: Regex<(Substring, [Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
|
||||
|
||||
let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
|
||||
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r8)
|
||||
let _: Regex<(Substring, theres_no_single_element_tuple_what_can_we: Substring)>.Type = type(of: r8)
|
||||
|
||||
let r9 = '/(a)|(b)|(c)|d/'
|
||||
let _: Regex<Tuple4<Substring, Substring?, Substring?, Substring?>>.Type = type(of: r9)
|
||||
let _: Regex<(Substring, Substring?, Substring?, Substring?)>.Type = type(of: r9)
|
||||
|
||||
let r10 = '/(a)|b/'
|
||||
let _: Regex<Tuple2<Substring, Substring?>>.Type = type(of: r10)
|
||||
let _: Regex<(Substring, Substring?)>.Type = type(of: r10)
|
||||
|
||||
// expected-error @+1 {{too many captures in regular expression literal; the current limit is 7}}
|
||||
let r11 = '/()()()()()()()()/' // 8 captures, too many for our prototype
|
||||
let r11 = '/()()()()()()()()/'
|
||||
let _: Regex<(Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring)>.Type = type(of: r11)
|
||||
|
||||
@@ -123,7 +123,7 @@
|
||||
"swift-cmark-gfm": "gfm",
|
||||
"swift-nio": "2.31.2",
|
||||
"swift-nio-ssl": "2.15.0",
|
||||
"swift-experimental-string-processing": "dev/5"
|
||||
"swift-experimental-string-processing": "dev/6"
|
||||
}
|
||||
},
|
||||
"rebranch": {
|
||||
@@ -157,7 +157,7 @@
|
||||
"sourcekit-lsp": "main",
|
||||
"swift-format": "main",
|
||||
"swift-installer-scripts": "main",
|
||||
"swift-experimental-string-processing": "dev/5"
|
||||
"swift-experimental-string-processing": "dev/6"
|
||||
}
|
||||
},
|
||||
"release/5.6": {
|
||||
@@ -308,7 +308,7 @@
|
||||
"sourcekit-lsp": "main",
|
||||
"swift-format": "main",
|
||||
"swift-installer-scripts": "main",
|
||||
"swift-experimental-string-processing": "dev/5"
|
||||
"swift-experimental-string-processing": "dev/6"
|
||||
}
|
||||
},
|
||||
"release/5.4": {
|
||||
|
||||
Reference in New Issue
Block a user