[stdlib] conversions to String.UnicodeScalarIndex

Also expanded testing to include checking translation of end indices
between views.

Swift SVN r24067
This commit is contained in:
Dave Abrahams
2014-12-22 01:13:46 +00:00
parent 8175e8b709
commit a9d7577b5f
4 changed files with 141 additions and 20 deletions

View File

@@ -226,14 +226,10 @@ extension String.UTF16View.Index {
_ sourceIndex: String.UTF8Index, within utf16: String.UTF16View
) {
let core = utf16._core
let sourceView = String.UTF8View(core)
_precondition(
sourceIndex._coreIndex >= 0 && (
sourceIndex._coreIndex < core.endIndex
|| sourceIndex._coreIndex == core.endIndex
&& sourceIndex._isOnUnicodeScalarBoundary
), "Invalid String.UTF8Index for this UTF-16 view")
sourceIndex._coreIndex >= 0 && sourceIndex._coreIndex <= core.endIndex,
"Invalid String.UTF8Index for this UTF-16 view")
// Detect positions that have no corresponding index.
if !sourceIndex._isOnUnicodeScalarBoundary {
@@ -256,4 +252,10 @@ extension String.UTF16View.Index {
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: otherView)
}
public func samePositionIn(
otherView: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: otherView)
}
}

View File

@@ -146,7 +146,8 @@ extension String {
/// True iff the index is at the end of its view
internal var _isAtEnd : Bool {
return _coreIndex == _core.endIndex
return _buffer == Index._emptyBuffer
&& _coreIndex == _core.endIndex
}
/// The value of the buffer when it is empty
@@ -323,4 +324,9 @@ extension String.UTF8View.Index {
) -> String.UTF16View.Index? {
return String.UTF16View.Index(self, within: otherView)
}
public func samePositionIn(
otherView: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: otherView)
}
}

View File

@@ -339,7 +339,56 @@ extension String.UnicodeScalarView : RangeReplaceableCollectionType {
}
// Index conversions
extension String.UnicodeScalarView.Index {
extension String.UnicodeScalarIndex {
public init?(
_ sourceIndex: String.UTF16Index,
within unicodeScalars: String.UnicodeScalarView
) {
let sourceView = String.UTF16View(unicodeScalars._core)
if sourceIndex != sourceView.startIndex
&& sourceIndex != sourceView.endIndex {
_precondition(
sourceIndex >= sourceView.startIndex
&& sourceIndex <= sourceView.endIndex,
"Invalid String.UTF16Index for this UnicodeScalar view")
// Detect positions that have no corresponding index. Note that
// we have to check before and after, because an unpaired
// surrogate will be decoded as a single replacement character,
// thus making the corresponding position valid.
if UTF16.isTrailSurrogate(sourceView[sourceIndex])
&& UTF16.isLeadSurrogate(sourceView[sourceIndex.predecessor()]) {
return nil
}
}
self.init(sourceIndex._offset, unicodeScalars._core)
}
public init?(
_ sourceIndex: String.UTF8Index,
within unicodeScalars: String.UnicodeScalarView
) {
let core = unicodeScalars._core
_precondition(
sourceIndex._coreIndex >= 0 && sourceIndex._coreIndex <= core.endIndex,
"Invalid String.UTF8Index for this UnicodeScalar view")
// Detect positions that have no corresponding index.
if !sourceIndex._isOnUnicodeScalarBoundary {
return nil
}
self.init(sourceIndex._coreIndex, core)
}
public init(
_ sourceIndex: String.Index,
within unicodeScalars: String.UnicodeScalarView
) {
self.init(sourceIndex._base._position, unicodeScalars._core)
}
public func samePositionIn(
otherView: String.UTF8View
) -> String.UTF8View.Index {

View File

@@ -22,6 +22,8 @@ println("testing...")
// grapheme clusters composed of multiple
let winter = "🏂☃❅❆❄︎⛄️❄️"
let summer = "school's out!"
let summerBytes: [UInt8] = [
0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, 0x74, 0x21]
func printHexSequence<
S:SequenceType where S.Generator.Element : IntegerType
@@ -53,14 +55,12 @@ tests.test("decoding") {
)
expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75,
0x74, 0x21],
summerBytes,
summer.utf8
)
expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75,
0x74, 0x21],
summerBytes.map {UTF16.CodeUnit($0)},
summer.utf16
)
}
@@ -81,7 +81,7 @@ func utf8UTF16Indices(s: String) -> [String.UTF8Index?] {
// [f0 9f 8f 82] [e2 98 83] [e2 9d 85] [e2 9d 86] [e2 9d 84 | ef b8 8e]
// [e2 9b 84 | ef b8 8f] [e2 9d 84 | ef b8 8f]
tests.test("index mapping") {
tests.test("index-mapping/character-to-utf8") {
// the first four utf8 code units at the start of each grapheme
// cluster
expectEqualSequence(
@@ -98,12 +98,17 @@ tests.test("index mapping") {
i in (0..<4).map { winter.utf8[advance(i, $0)] }
}, ==)
expectEqual(winter.utf8.endIndex, winter.endIndex.samePositionIn(winter.utf8))
expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75,
0x74, 0x21],
summerBytes,
utf8GraphemeClusterIndices(summer).map { summer.utf8[$0] }
)
expectEqual(summer.utf8.endIndex, summer.endIndex.samePositionIn(summer.utf8))
}
tests.test("index-mapping/unicode-scalar-to-utf8") {
// the first three utf8 code units at the start of each unicode
// scalar
expectEqualSequence(
@@ -124,12 +129,21 @@ tests.test("index mapping") {
i in (0..<3).map { winter.utf8[advance(i, $0)] }
}, ==)
expectEqual(
winter.utf8.endIndex,
winter.unicodeScalars.endIndex.samePositionIn(winter.utf8))
expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75,
0x74, 0x21],
summerBytes,
utf8UnicodeScalarIndices(summer).map { summer.utf8[$0] }
)
expectEqual(
summer.utf8.endIndex,
summer.unicodeScalars.endIndex.samePositionIn(summer.utf8))
}
tests.test("index-mapping/utf16-to-utf8") {
// check the first three utf8 code units at the start of each utf16
// code unit
expectEqualSequence(
@@ -152,13 +166,63 @@ tests.test("index mapping") {
} ?? []
}, ==)
expectNotEmpty(winter.utf16.endIndex.samePositionIn(winter.utf8))
expectEqual(
winter.utf8.endIndex,
winter.utf16.endIndex.samePositionIn(winter.utf8)!)
expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75,
0x74, 0x21],
summerBytes,
utf8UTF16Indices(summer).map { summer.utf8[$0!] }
)
expectNotEmpty(summer.utf16.endIndex.samePositionIn(summer.utf8))
expectEqual(
summer.utf8.endIndex,
summer.utf16.endIndex.samePositionIn(summer.utf8)!)
}
tests.test("index-mapping/utf8-to-unicode-scalar") {
// Define expectation separately to help the type-checker, which
// otherwise runs out of time solving.
let winterUtf8UnicodeScalars: [UnicodeScalar?] = [
UnicodeScalar(0x1f3c2), nil, nil, nil,
UnicodeScalar(0x2603), nil, nil,
UnicodeScalar(0x2745), nil, nil,
UnicodeScalar(0x2746), nil, nil,
UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0e), nil, nil,
UnicodeScalar(0x26c4), nil, nil, UnicodeScalar(0xfe0f), nil, nil,
UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0f), nil, nil
]
expectEqualSequence(
winterUtf8UnicodeScalars,
map(indices(winter.utf8)) {
i in i.samePositionIn(winter.unicodeScalars).map {
winter.unicodeScalars[$0]
}
}, ==
)
expectNotEmpty(winter.utf8.endIndex.samePositionIn(winter.unicodeScalars))
expectEqual(
winter.unicodeScalars.endIndex,
winter.utf8.endIndex.samePositionIn(winter.unicodeScalars)!)
expectEqualSequence(
map(summerBytes) { UnicodeScalar($0) as UnicodeScalar? },
map(indices(summer.utf8)) {
i in i.samePositionIn(summer.unicodeScalars).map {
summer.unicodeScalars[$0]
}
}, ==
)
expectNotEmpty(summer.utf8.endIndex.samePositionIn(summer.unicodeScalars))
expectEqual(
summer.unicodeScalars.endIndex,
summer.utf8.endIndex.samePositionIn(summer.unicodeScalars)!)
}
func expectEquality<T: Equatable>(x: T, y: T, expected: Bool) {
let actual = x == y