mirror of
https://github.com/apple/swift.git
synced 2026-06-20 15:42:51 +02:00
e6e4bd6056
Add support for UTF8Span Also, refactor validation and grapheme breaking
296 lines
8.9 KiB
Swift
296 lines
8.9 KiB
Swift
// RUN: %target-run-stdlib-swift %S/Inputs/
|
|
|
|
// REQUIRES: executable_test
|
|
|
|
// FIXME: this test is currently broken
|
|
|
|
import Swift
|
|
import StdlibUnittest
|
|
|
|
var suite = TestSuite("UTF8.ValidationError")
|
|
defer { runAllTests() }
|
|
|
|
@available(SwiftStdlib 6.2, *)
|
|
extension Array {
|
|
func withSpan<R>(_ f: (Span<Element>) throws -> R) rethrows -> R {
|
|
try self.withUnsafeBufferPointer {
|
|
try f(Span(_unsafeElements: $0))
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
extension Range<Int> {
|
|
func _offset(by start: Int) -> Range<Int> {
|
|
start + lowerBound ..< start + upperBound
|
|
}
|
|
}
|
|
|
|
@available(SwiftStdlib 6.2, *)
|
|
private struct ValidationError {
|
|
var error: UTF8.ValidationError
|
|
|
|
// When fetching all errors, we'll get the error kind given. When
|
|
// slicing in order to get the next error (e.g.
|
|
// `UTF8Span.init(validating:))`, we'll get `.unexpectedContinuation`.
|
|
var errorStart: Bool
|
|
|
|
|
|
init(
|
|
_ error: UTF8.ValidationError,
|
|
errorStart: Bool
|
|
) {
|
|
self.error = error
|
|
self.errorStart = errorStart
|
|
}
|
|
|
|
public static func unexpectedContinuationByte(
|
|
at i: Int, errorStart: Bool = true
|
|
) -> Self {
|
|
Self(UTF8.ValidationError(.unexpectedContinuationByte, at: i), errorStart: errorStart)
|
|
}
|
|
|
|
public static func surrogateCodePointByte(
|
|
at i: Int, errorStart: Bool = true
|
|
) -> Self {
|
|
Self(UTF8.ValidationError(.surrogateCodePointByte, at: i), errorStart: errorStart)
|
|
}
|
|
|
|
public static func invalidNonSurrogateCodePointByte(
|
|
at i: Int, errorStart: Bool = true
|
|
) -> Self {
|
|
Self(UTF8.ValidationError(.invalidNonSurrogateCodePointByte, at: i), errorStart: errorStart)
|
|
}
|
|
|
|
public static func overlongEncodingByte(
|
|
at i: Int, errorStart: Bool = true
|
|
) -> Self {
|
|
Self(UTF8.ValidationError(.overlongEncodingByte, at: i), errorStart: errorStart)
|
|
}
|
|
|
|
public static func truncatedScalar(
|
|
_ range: Range<Int>, errorStart: Bool = true
|
|
) -> Self {
|
|
Self(UTF8.ValidationError(.truncatedScalar, range), errorStart: errorStart)
|
|
}
|
|
}
|
|
|
|
|
|
@available(SwiftStdlib 6.2, *)
|
|
private struct ValidationTestCase {
|
|
var bytes: [UInt8]
|
|
|
|
// When fetching all errors, we'll get the error kind given. When
|
|
// slicing in order to get the next error (e.g.
|
|
// `UTF8Span.init(validating:))`, we'll get `.unexpectedContinuation`.
|
|
var errors: [ValidationError]
|
|
|
|
var loc: SourceLocStack
|
|
|
|
init(
|
|
_ bytes: [UInt8],
|
|
file: String = #file,
|
|
line: UInt = #line,
|
|
_ errors: [ValidationError]
|
|
) {
|
|
self.bytes = bytes
|
|
self.errors = errors
|
|
self.loc = .init(SourceLoc(file, line))
|
|
}
|
|
|
|
func fetchError(
|
|
at i: Int, wasSliced: Bool
|
|
) -> UTF8.ValidationError {
|
|
let err = errors[i]
|
|
if wasSliced && !err.errorStart {
|
|
return .init(.unexpectedContinuationByte, err.error.byteOffsets)
|
|
}
|
|
return err.error
|
|
}
|
|
|
|
func expect<T: Equatable>(
|
|
_ lhs: T,
|
|
_ rhs: T,
|
|
file: String = #file,
|
|
line: UInt = #line
|
|
) {
|
|
expectEqual(
|
|
lhs,
|
|
rhs,
|
|
stackTrace: loc.withCurrentLoc(file: file, line: line))
|
|
}
|
|
func fail(
|
|
_ message: String,
|
|
file: String = #file,
|
|
line: UInt = #line
|
|
) {
|
|
expectationFailure(
|
|
message,
|
|
trace: "",
|
|
stackTrace: loc.with(.init(file, line)))
|
|
}
|
|
|
|
/// Test UTF8._checkAllErrors(), which matches directly against
|
|
/// the provided expected-errors.
|
|
func testAllErrors() {
|
|
let caughtErrors = Array(UTF8._checkAllErrors(bytes))
|
|
for i in 0..<Swift.min(caughtErrors.count, errors.count) {
|
|
expect(fetchError(at: i, wasSliced: false), caughtErrors[i])
|
|
}
|
|
expect(caughtErrors.count, errors.count)
|
|
}
|
|
|
|
/// Test UTF8Span validation. Surface subsequent errors by slicing the
|
|
/// input (which will convert the error-kind to .unexpectedContinuationByte)
|
|
func testSpanSlicedErrors() {
|
|
bytes.withSpan { span in
|
|
if errors.isEmpty {
|
|
do throws(UTF8.ValidationError) {
|
|
// No errors expected
|
|
_ = try UTF8Span(validating: span)
|
|
} catch {
|
|
fail("Unexpected error: \(error)")
|
|
}
|
|
return
|
|
}
|
|
|
|
// Check every error, by slicing (which will change error classification
|
|
// of continuation bytes in multi-byte errors to .unexpectedContinuation)
|
|
var currentPos = 0
|
|
var errorIdx = 0
|
|
while true {
|
|
do throws(UTF8.ValidationError) {
|
|
// print("extracting \(currentPos)")
|
|
_ = try UTF8Span(validating: span._extracting(currentPos...))
|
|
|
|
if errorIdx != errors.endIndex {
|
|
fail("Expected a thrown UTF-8 encoding error")
|
|
}
|
|
break
|
|
} catch {
|
|
guard errorIdx < errors.endIndex else {
|
|
fail("Found unexpected subsequent error \(error)")
|
|
break
|
|
}
|
|
|
|
let expectedError = fetchError(at: errorIdx, wasSliced: true)
|
|
// print(currentPos)
|
|
// print(error)
|
|
|
|
// print(error.byteOffsets._offset(by: currentPos))
|
|
|
|
|
|
let adjustedErr = UTF8.ValidationError(
|
|
error.kind,
|
|
error.byteOffsets._offset(by: currentPos)
|
|
)
|
|
expect(expectedError, adjustedErr)
|
|
|
|
currentPos = adjustedErr.byteOffsets.upperBound
|
|
errorIdx += 1
|
|
}
|
|
|
|
}
|
|
|
|
// Rest of input should be error-free
|
|
if let start = errors.last?.error.byteOffsets.upperBound,
|
|
start < bytes.count
|
|
{
|
|
do throws(UTF8.ValidationError) {
|
|
_ = try UTF8Span(validating: span._extracting(start...))
|
|
} catch {
|
|
fail("Found subsequent error \(error)")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func run() {
|
|
testSpanSlicedErrors()
|
|
testAllErrors()
|
|
}
|
|
}
|
|
|
|
if #available(SwiftStdlib 6.2, *) {
|
|
suite.test("UTF8Span/encoding errors") {
|
|
func test(
|
|
_ bytes: Array<UInt8>,
|
|
_ file: String = #file, line: UInt = #line,
|
|
_ errors: ValidationError...
|
|
) {
|
|
ValidationTestCase(
|
|
bytes, file: file, line: line, errors
|
|
).run()
|
|
}
|
|
|
|
// Valid string
|
|
// test(Array("abcde\u{301}f😀🇺🇸🧟♀️🧟♀️".utf8), [])
|
|
|
|
// Bad URL
|
|
// test(
|
|
// Array("http://servername/scripts/..".utf8)
|
|
// + [0xC0, 0xAF]
|
|
// + Array("../winnt/system32/cmd.exe".utf8),
|
|
// [.overlongEncodingByte(at: 28), // C0
|
|
// .overlongEncodingByte(at: 29, errorStart: false), // AF
|
|
// ])
|
|
|
|
// test(
|
|
// [0xC0, 0xAF, 0xE0, 0x80, 0xBF, 0xF0, 0x81, 0x82, 0x41],
|
|
// [.overlongEncodingByte(at: 0), // C0
|
|
// .overlongEncodingByte(at: 1, errorStart: false), // AF
|
|
// .overlongEncodingByte(at: 2), // E0
|
|
// .overlongEncodingByte(at: 3, errorStart: false), // 80
|
|
// .overlongEncodingByte(at: 4, errorStart: false), // BF
|
|
// .overlongEncodingByte(at: 5), // F0
|
|
// .overlongEncodingByte(at: 6, errorStart: false), // 81
|
|
// .overlongEncodingByte(at: 7, errorStart: false), // 82
|
|
// ])
|
|
// test(
|
|
// [0x41, 0xC0, 0xAF, 0x41, 0xF4, 0x80, 0x80, 0x41],
|
|
// [.overlongEncodingByte(at: 1), // C0
|
|
// .overlongEncodingByte(at: 2, errorStart: false), // AF
|
|
// .truncatedScalar(4...6), // F4 80 80
|
|
// ])
|
|
// test(
|
|
// [0xED, 0xAF, 0x41],
|
|
// [.surrogateCodePointByte(at: 0), // ED
|
|
// .surrogateCodePointByte(at: 1, errorStart: false), // AF
|
|
// ])
|
|
// test(
|
|
// [0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF, 0xED, 0xAF, 0x41],
|
|
// [.surrogateCodePointByte(at: 0), // ED
|
|
// .surrogateCodePointByte(at: 1, errorStart: false), // A0
|
|
// .surrogateCodePointByte(at: 2, errorStart: false), // 80
|
|
// .surrogateCodePointByte(at: 3), // ED
|
|
// .surrogateCodePointByte(at: 4, errorStart: false), // BF
|
|
// .surrogateCodePointByte(at: 5, errorStart: false), // BF
|
|
// .surrogateCodePointByte(at: 6), // ED
|
|
// .surrogateCodePointByte(at: 7, errorStart: false), // AF
|
|
// ])
|
|
// test(
|
|
// [0xF4, 0x91, 0x92, 0x93, 0xFF, 0x41, 0x80, 0xBF, 0x42],
|
|
// [.invalidNonSurrogateCodePointByte(at: 0), // F4
|
|
// .invalidNonSurrogateCodePointByte(at: 1, errorStart: false), // 91
|
|
// .invalidNonSurrogateCodePointByte(at: 2, errorStart: false), // 92
|
|
// .invalidNonSurrogateCodePointByte(at: 3, errorStart: false), // 93
|
|
// .invalidNonSurrogateCodePointByte(at: 4), // FF
|
|
// .unexpectedContinuationByte(at: 6), // 80
|
|
// .unexpectedContinuationByte(at: 7), // BF
|
|
// ])
|
|
// test(
|
|
// [0xE1, 0x80, 0xE2, 0xF0, 0x91, 0x92, 0xF1, 0xBF, 0x41],
|
|
// [.truncatedScalar(0...1), // E1 80
|
|
// .truncatedScalar(2...2), // E2
|
|
// .truncatedScalar(3...5), // F0 91 92
|
|
// .truncatedScalar(6...7), // F1 BF
|
|
// ])
|
|
// test(
|
|
// [0xE0, 0x81, 0x80],
|
|
// [.overlongEncodingByte(at: 0), // E0
|
|
// .overlongEncodingByte(at: 1, errorStart: false), // 81
|
|
// .overlongEncodingByte(at: 2, errorStart: false), // 80
|
|
// ])
|
|
}
|
|
}
|