Files
swift-mirror/test/stdlib/UTF8EncodingErrorTests.swift
Michael Ilseman e6e4bd6056 UTF8Span (#78531)
Add support for UTF8Span

Also, refactor validation and grapheme breaking
2025-04-11 16:11:11 -06:00

296 lines
8.9 KiB
Swift

// RUN: %target-run-stdlib-swift %S/Inputs/
// REQUIRES: executable_test
// FIXME: this test is currently broken
import Swift
import StdlibUnittest
var suite = TestSuite("UTF8.ValidationError")
defer { runAllTests() }
@available(SwiftStdlib 6.2, *)
extension Array {
func withSpan<R>(_ f: (Span<Element>) throws -> R) rethrows -> R {
try self.withUnsafeBufferPointer {
try f(Span(_unsafeElements: $0))
}
}
}
extension Range<Int> {
func _offset(by start: Int) -> Range<Int> {
start + lowerBound ..< start + upperBound
}
}
@available(SwiftStdlib 6.2, *)
private struct ValidationError {
var error: UTF8.ValidationError
// When fetching all errors, we'll get the error kind given. When
// slicing in order to get the next error (e.g.
// `UTF8Span.init(validating:))`, we'll get `.unexpectedContinuation`.
var errorStart: Bool
init(
_ error: UTF8.ValidationError,
errorStart: Bool
) {
self.error = error
self.errorStart = errorStart
}
public static func unexpectedContinuationByte(
at i: Int, errorStart: Bool = true
) -> Self {
Self(UTF8.ValidationError(.unexpectedContinuationByte, at: i), errorStart: errorStart)
}
public static func surrogateCodePointByte(
at i: Int, errorStart: Bool = true
) -> Self {
Self(UTF8.ValidationError(.surrogateCodePointByte, at: i), errorStart: errorStart)
}
public static func invalidNonSurrogateCodePointByte(
at i: Int, errorStart: Bool = true
) -> Self {
Self(UTF8.ValidationError(.invalidNonSurrogateCodePointByte, at: i), errorStart: errorStart)
}
public static func overlongEncodingByte(
at i: Int, errorStart: Bool = true
) -> Self {
Self(UTF8.ValidationError(.overlongEncodingByte, at: i), errorStart: errorStart)
}
public static func truncatedScalar(
_ range: Range<Int>, errorStart: Bool = true
) -> Self {
Self(UTF8.ValidationError(.truncatedScalar, range), errorStart: errorStart)
}
}
@available(SwiftStdlib 6.2, *)
private struct ValidationTestCase {
var bytes: [UInt8]
// When fetching all errors, we'll get the error kind given. When
// slicing in order to get the next error (e.g.
// `UTF8Span.init(validating:))`, we'll get `.unexpectedContinuation`.
var errors: [ValidationError]
var loc: SourceLocStack
init(
_ bytes: [UInt8],
file: String = #file,
line: UInt = #line,
_ errors: [ValidationError]
) {
self.bytes = bytes
self.errors = errors
self.loc = .init(SourceLoc(file, line))
}
func fetchError(
at i: Int, wasSliced: Bool
) -> UTF8.ValidationError {
let err = errors[i]
if wasSliced && !err.errorStart {
return .init(.unexpectedContinuationByte, err.error.byteOffsets)
}
return err.error
}
func expect<T: Equatable>(
_ lhs: T,
_ rhs: T,
file: String = #file,
line: UInt = #line
) {
expectEqual(
lhs,
rhs,
stackTrace: loc.withCurrentLoc(file: file, line: line))
}
func fail(
_ message: String,
file: String = #file,
line: UInt = #line
) {
expectationFailure(
message,
trace: "",
stackTrace: loc.with(.init(file, line)))
}
/// Test UTF8._checkAllErrors(), which matches directly against
/// the provided expected-errors.
func testAllErrors() {
let caughtErrors = Array(UTF8._checkAllErrors(bytes))
for i in 0..<Swift.min(caughtErrors.count, errors.count) {
expect(fetchError(at: i, wasSliced: false), caughtErrors[i])
}
expect(caughtErrors.count, errors.count)
}
/// Test UTF8Span validation. Surface subsequent errors by slicing the
/// input (which will convert the error-kind to .unexpectedContinuationByte)
func testSpanSlicedErrors() {
bytes.withSpan { span in
if errors.isEmpty {
do throws(UTF8.ValidationError) {
// No errors expected
_ = try UTF8Span(validating: span)
} catch {
fail("Unexpected error: \(error)")
}
return
}
// Check every error, by slicing (which will change error classification
// of continuation bytes in multi-byte errors to .unexpectedContinuation)
var currentPos = 0
var errorIdx = 0
while true {
do throws(UTF8.ValidationError) {
// print("extracting \(currentPos)")
_ = try UTF8Span(validating: span._extracting(currentPos...))
if errorIdx != errors.endIndex {
fail("Expected a thrown UTF-8 encoding error")
}
break
} catch {
guard errorIdx < errors.endIndex else {
fail("Found unexpected subsequent error \(error)")
break
}
let expectedError = fetchError(at: errorIdx, wasSliced: true)
// print(currentPos)
// print(error)
// print(error.byteOffsets._offset(by: currentPos))
let adjustedErr = UTF8.ValidationError(
error.kind,
error.byteOffsets._offset(by: currentPos)
)
expect(expectedError, adjustedErr)
currentPos = adjustedErr.byteOffsets.upperBound
errorIdx += 1
}
}
// Rest of input should be error-free
if let start = errors.last?.error.byteOffsets.upperBound,
start < bytes.count
{
do throws(UTF8.ValidationError) {
_ = try UTF8Span(validating: span._extracting(start...))
} catch {
fail("Found subsequent error \(error)")
}
}
}
}
func run() {
testSpanSlicedErrors()
testAllErrors()
}
}
if #available(SwiftStdlib 6.2, *) {
suite.test("UTF8Span/encoding errors") {
func test(
_ bytes: Array<UInt8>,
_ file: String = #file, line: UInt = #line,
_ errors: ValidationError...
) {
ValidationTestCase(
bytes, file: file, line: line, errors
).run()
}
// Valid string
// test(Array("abcde\u{301}f😀🇺🇸🧟‍♀️🧟‍♀️".utf8), [])
// Bad URL
// test(
// Array("http://servername/scripts/..".utf8)
// + [0xC0, 0xAF]
// + Array("../winnt/system32/cmd.exe".utf8),
// [.overlongEncodingByte(at: 28), // C0
// .overlongEncodingByte(at: 29, errorStart: false), // AF
// ])
// test(
// [0xC0, 0xAF, 0xE0, 0x80, 0xBF, 0xF0, 0x81, 0x82, 0x41],
// [.overlongEncodingByte(at: 0), // C0
// .overlongEncodingByte(at: 1, errorStart: false), // AF
// .overlongEncodingByte(at: 2), // E0
// .overlongEncodingByte(at: 3, errorStart: false), // 80
// .overlongEncodingByte(at: 4, errorStart: false), // BF
// .overlongEncodingByte(at: 5), // F0
// .overlongEncodingByte(at: 6, errorStart: false), // 81
// .overlongEncodingByte(at: 7, errorStart: false), // 82
// ])
// test(
// [0x41, 0xC0, 0xAF, 0x41, 0xF4, 0x80, 0x80, 0x41],
// [.overlongEncodingByte(at: 1), // C0
// .overlongEncodingByte(at: 2, errorStart: false), // AF
// .truncatedScalar(4...6), // F4 80 80
// ])
// test(
// [0xED, 0xAF, 0x41],
// [.surrogateCodePointByte(at: 0), // ED
// .surrogateCodePointByte(at: 1, errorStart: false), // AF
// ])
// test(
// [0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF, 0xED, 0xAF, 0x41],
// [.surrogateCodePointByte(at: 0), // ED
// .surrogateCodePointByte(at: 1, errorStart: false), // A0
// .surrogateCodePointByte(at: 2, errorStart: false), // 80
// .surrogateCodePointByte(at: 3), // ED
// .surrogateCodePointByte(at: 4, errorStart: false), // BF
// .surrogateCodePointByte(at: 5, errorStart: false), // BF
// .surrogateCodePointByte(at: 6), // ED
// .surrogateCodePointByte(at: 7, errorStart: false), // AF
// ])
// test(
// [0xF4, 0x91, 0x92, 0x93, 0xFF, 0x41, 0x80, 0xBF, 0x42],
// [.invalidNonSurrogateCodePointByte(at: 0), // F4
// .invalidNonSurrogateCodePointByte(at: 1, errorStart: false), // 91
// .invalidNonSurrogateCodePointByte(at: 2, errorStart: false), // 92
// .invalidNonSurrogateCodePointByte(at: 3, errorStart: false), // 93
// .invalidNonSurrogateCodePointByte(at: 4), // FF
// .unexpectedContinuationByte(at: 6), // 80
// .unexpectedContinuationByte(at: 7), // BF
// ])
// test(
// [0xE1, 0x80, 0xE2, 0xF0, 0x91, 0x92, 0xF1, 0xBF, 0x41],
// [.truncatedScalar(0...1), // E1 80
// .truncatedScalar(2...2), // E2
// .truncatedScalar(3...5), // F0 91 92
// .truncatedScalar(6...7), // F1 BF
// ])
// test(
// [0xE0, 0x81, 0x80],
// [.overlongEncodingByte(at: 0), // E0
// .overlongEncodingByte(at: 1, errorStart: false), // 81
// .overlongEncodingByte(at: 2, errorStart: false), // 80
// ])
}
}