mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
In theory there could be a "fixed-layout" enum that's not exhaustive but promises not to add any more cases with payloads, but we don't need that distinction today. (Note that @objc enums are still "fixed-layout" in the actual sense of "having a compile-time known layout". There's just no special way to spell that.)
187 lines
5.4 KiB
Swift
187 lines
5.4 KiB
Swift
//===--- UnicodeParser.swift ----------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
extension Unicode {
|
|
/// The result of attempting to parse a `T` from some input.
|
|
@_frozen // FIXME(sil-serialize-all)
|
|
public enum ParseResult<T> {
|
|
/// A `T` was parsed successfully
|
|
case valid(T)
|
|
|
|
/// The input was entirely consumed.
|
|
case emptyInput
|
|
|
|
/// An encoding error was detected.
|
|
///
|
|
/// `length` is the number of underlying code units consumed by this
|
|
/// error (the length of the longest prefix of a valid encoding
|
|
/// sequence that could be recognized).
|
|
case error(length: Int)
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned
|
|
internal var _valid: T? {
|
|
if case .valid(let result) = self { return result }
|
|
return nil
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned
|
|
internal var _error: Int? {
|
|
if case .error(let result) = self { return result }
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Types that separate streams of code units into encoded Unicode
|
|
/// scalar values.
|
|
public protocol _UnicodeParser {
|
|
/// The encoding with which this parser is associated
|
|
associatedtype Encoding : _UnicodeEncoding
|
|
|
|
/// Constructs an instance that can be used to begin parsing `CodeUnit`s at
|
|
/// any Unicode scalar boundary.
|
|
init()
|
|
|
|
/// Parses a single Unicode scalar value from `input`.
|
|
mutating func parseScalar<I : IteratorProtocol>(
|
|
from input: inout I
|
|
) -> Unicode.ParseResult<Encoding.EncodedScalar>
|
|
where I.Element == Encoding.CodeUnit
|
|
}
|
|
|
|
extension _UnicodeParser {
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned
|
|
@inline(__always)
|
|
@discardableResult
|
|
internal static func _parse<I: IteratorProtocol>(
|
|
_ input: inout I,
|
|
repairingIllFormedSequences makeRepairs: Bool = true,
|
|
into output: (Encoding.EncodedScalar)->Void
|
|
) -> Int
|
|
where I.Element == Encoding.CodeUnit
|
|
{
|
|
var errorCount = 0
|
|
var d = Self()
|
|
while true {
|
|
switch d.parseScalar(from: &input) {
|
|
case let .valid(scalarContent):
|
|
output(scalarContent)
|
|
case .error:
|
|
if _slowPath(!makeRepairs) { return 1 }
|
|
errorCount += 1
|
|
output(Encoding.encodedReplacementCharacter)
|
|
case .emptyInput:
|
|
return errorCount
|
|
}
|
|
}
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@inline(__always)
|
|
@discardableResult
|
|
public static func _decode<I: IteratorProtocol>(
|
|
_ input: inout I,
|
|
repairingIllFormedSequences makeRepairs: Bool,
|
|
into output: (Unicode.Scalar)->Void
|
|
) -> Int
|
|
where I.Element == Encoding.CodeUnit
|
|
{
|
|
return _parse(&input, repairingIllFormedSequences: makeRepairs) {
|
|
output(Encoding.decode($0))
|
|
}
|
|
}
|
|
}
|
|
|
|
extension Unicode {
|
|
public typealias Parser = _UnicodeParser
|
|
}
|
|
|
|
extension Unicode {
|
|
@_fixed_layout
|
|
public // @testable
|
|
struct _ParsingIterator<
|
|
CodeUnitIterator : IteratorProtocol,
|
|
Parser: Unicode.Parser
|
|
> where Parser.Encoding.CodeUnit == CodeUnitIterator.Element {
|
|
@inline(__always)
|
|
@_inlineable
|
|
public init(codeUnits: CodeUnitIterator, parser: Parser) {
|
|
self.codeUnits = codeUnits
|
|
self.parser = parser
|
|
}
|
|
public var codeUnits: CodeUnitIterator
|
|
public var parser: Parser
|
|
}
|
|
}
|
|
|
|
extension Unicode._ParsingIterator : IteratorProtocol, Sequence {
|
|
@inline(__always)
|
|
@_inlineable
|
|
public mutating func next() -> Parser.Encoding.EncodedScalar? {
|
|
switch parser.parseScalar(from: &codeUnits) {
|
|
case let .valid(scalarContent): return scalarContent
|
|
case .error: return Parser.Encoding.encodedReplacementCharacter
|
|
case .emptyInput: return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
extension Unicode {
|
|
@_fixed_layout
|
|
@_versioned
|
|
internal struct _TranscodingIterator<
|
|
SourceCodeUnits : IteratorProtocol,
|
|
Parser : Unicode.Parser,
|
|
TargetEncoding : Unicode.Encoding
|
|
> where Parser.Encoding.CodeUnit == SourceCodeUnits.Element {
|
|
|
|
@inline(__always)
|
|
@_inlineable
|
|
public init(source: SourceCodeUnits, parser: Parser) {
|
|
_scalars = _ParsingIterator(codeUnits: source, parser: parser)
|
|
let firstScalar_ = _scalars.next()
|
|
if _fastPath(firstScalar_ != nil), let firstScalar = firstScalar_ {
|
|
_codeUnits = TargetEncoding._transcode(
|
|
firstScalar, from: Parser.Encoding.self).makeIterator()
|
|
}
|
|
else {
|
|
_codeUnits = TargetEncoding._encode(
|
|
UnicodeScalar(_unchecked: 0)).makeIterator()
|
|
while _codeUnits.next() != nil { }
|
|
return
|
|
}
|
|
}
|
|
|
|
internal var _scalars: _ParsingIterator<SourceCodeUnits, Parser>
|
|
internal var _codeUnits: TargetEncoding.EncodedScalar.Iterator
|
|
}
|
|
}
|
|
|
|
|
|
extension Unicode._TranscodingIterator : IteratorProtocol, Sequence {
|
|
@inline(__always)
|
|
@_inlineable
|
|
mutating public func next() -> TargetEncoding.CodeUnit? {
|
|
if let x = _codeUnits.next() { return x }
|
|
let nextScalar_ = _scalars.next()
|
|
if _fastPath(nextScalar_ != nil), let nextScalar = nextScalar_ {
|
|
_codeUnits = TargetEncoding._transcode(
|
|
nextScalar, from: Parser.Encoding.self).makeIterator()
|
|
}
|
|
return _codeUnits.next()
|
|
}
|
|
}
|
|
*/
|