[stdlib] conversions to String.UnicodeScalarIndex

Also expanded testing to include checking translation of end indices
between views.

Swift SVN r24067
This commit is contained in:
Dave Abrahams
2014-12-22 01:13:46 +00:00
parent 8175e8b709
commit a9d7577b5f
4 changed files with 141 additions and 20 deletions

View File

@@ -226,14 +226,10 @@ extension String.UTF16View.Index {
_ sourceIndex: String.UTF8Index, within utf16: String.UTF16View _ sourceIndex: String.UTF8Index, within utf16: String.UTF16View
) { ) {
let core = utf16._core let core = utf16._core
let sourceView = String.UTF8View(core)
_precondition( _precondition(
sourceIndex._coreIndex >= 0 && ( sourceIndex._coreIndex >= 0 && sourceIndex._coreIndex <= core.endIndex,
sourceIndex._coreIndex < core.endIndex "Invalid String.UTF8Index for this UTF-16 view")
|| sourceIndex._coreIndex == core.endIndex
&& sourceIndex._isOnUnicodeScalarBoundary
), "Invalid String.UTF8Index for this UTF-16 view")
// Detect positions that have no corresponding index. // Detect positions that have no corresponding index.
if !sourceIndex._isOnUnicodeScalarBoundary { if !sourceIndex._isOnUnicodeScalarBoundary {
@@ -256,4 +252,10 @@ extension String.UTF16View.Index {
) -> String.UTF8View.Index? { ) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: otherView) return String.UTF8View.Index(self, within: otherView)
} }
public func samePositionIn(
otherView: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: otherView)
}
} }

View File

@@ -146,7 +146,8 @@ extension String {
/// True iff the index is at the end of its view /// True iff the index is at the end of its view
internal var _isAtEnd : Bool { internal var _isAtEnd : Bool {
return _coreIndex == _core.endIndex return _buffer == Index._emptyBuffer
&& _coreIndex == _core.endIndex
} }
/// The value of the buffer when it is empty /// The value of the buffer when it is empty
@@ -323,4 +324,9 @@ extension String.UTF8View.Index {
) -> String.UTF16View.Index? { ) -> String.UTF16View.Index? {
return String.UTF16View.Index(self, within: otherView) return String.UTF16View.Index(self, within: otherView)
} }
public func samePositionIn(
otherView: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: otherView)
}
} }

View File

@@ -339,7 +339,56 @@ extension String.UnicodeScalarView : RangeReplaceableCollectionType {
} }
// Index conversions // Index conversions
extension String.UnicodeScalarView.Index { extension String.UnicodeScalarIndex {
public init?(
_ sourceIndex: String.UTF16Index,
within unicodeScalars: String.UnicodeScalarView
) {
let sourceView = String.UTF16View(unicodeScalars._core)
if sourceIndex != sourceView.startIndex
&& sourceIndex != sourceView.endIndex {
_precondition(
sourceIndex >= sourceView.startIndex
&& sourceIndex <= sourceView.endIndex,
"Invalid String.UTF16Index for this UnicodeScalar view")
// Detect positions that have no corresponding index. Note that
// we have to check before and after, because an unpaired
// surrogate will be decoded as a single replacement character,
// thus making the corresponding position valid.
if UTF16.isTrailSurrogate(sourceView[sourceIndex])
&& UTF16.isLeadSurrogate(sourceView[sourceIndex.predecessor()]) {
return nil
}
}
self.init(sourceIndex._offset, unicodeScalars._core)
}
public init?(
_ sourceIndex: String.UTF8Index,
within unicodeScalars: String.UnicodeScalarView
) {
let core = unicodeScalars._core
_precondition(
sourceIndex._coreIndex >= 0 && sourceIndex._coreIndex <= core.endIndex,
"Invalid String.UTF8Index for this UnicodeScalar view")
// Detect positions that have no corresponding index.
if !sourceIndex._isOnUnicodeScalarBoundary {
return nil
}
self.init(sourceIndex._coreIndex, core)
}
public init(
_ sourceIndex: String.Index,
within unicodeScalars: String.UnicodeScalarView
) {
self.init(sourceIndex._base._position, unicodeScalars._core)
}
public func samePositionIn( public func samePositionIn(
otherView: String.UTF8View otherView: String.UTF8View
) -> String.UTF8View.Index { ) -> String.UTF8View.Index {

View File

@@ -22,6 +22,8 @@ println("testing...")
// grapheme clusters composed of multiple // grapheme clusters composed of multiple
let winter = "🏂☃❅❆❄︎⛄️❄️" let winter = "🏂☃❅❆❄︎⛄️❄️"
let summer = "school's out!" let summer = "school's out!"
let summerBytes: [UInt8] = [
0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, 0x74, 0x21]
func printHexSequence< func printHexSequence<
S:SequenceType where S.Generator.Element : IntegerType S:SequenceType where S.Generator.Element : IntegerType
@@ -53,14 +55,12 @@ tests.test("decoding") {
) )
expectEqualSequence( expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, summerBytes,
0x74, 0x21],
summer.utf8 summer.utf8
) )
expectEqualSequence( expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, summerBytes.map {UTF16.CodeUnit($0)},
0x74, 0x21],
summer.utf16 summer.utf16
) )
} }
@@ -81,7 +81,7 @@ func utf8UTF16Indices(s: String) -> [String.UTF8Index?] {
// [f0 9f 8f 82] [e2 98 83] [e2 9d 85] [e2 9d 86] [e2 9d 84 | ef b8 8e] // [f0 9f 8f 82] [e2 98 83] [e2 9d 85] [e2 9d 86] [e2 9d 84 | ef b8 8e]
// [e2 9b 84 | ef b8 8f] [e2 9d 84 | ef b8 8f] // [e2 9b 84 | ef b8 8f] [e2 9d 84 | ef b8 8f]
tests.test("index mapping") { tests.test("index-mapping/character-to-utf8") {
// the first four utf8 code units at the start of each grapheme // the first four utf8 code units at the start of each grapheme
// cluster // cluster
expectEqualSequence( expectEqualSequence(
@@ -98,12 +98,17 @@ tests.test("index mapping") {
i in (0..<4).map { winter.utf8[advance(i, $0)] } i in (0..<4).map { winter.utf8[advance(i, $0)] }
}, ==) }, ==)
expectEqual(winter.utf8.endIndex, winter.endIndex.samePositionIn(winter.utf8))
expectEqualSequence( expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, summerBytes,
0x74, 0x21],
utf8GraphemeClusterIndices(summer).map { summer.utf8[$0] } utf8GraphemeClusterIndices(summer).map { summer.utf8[$0] }
) )
expectEqual(summer.utf8.endIndex, summer.endIndex.samePositionIn(summer.utf8))
}
tests.test("index-mapping/unicode-scalar-to-utf8") {
// the first three utf8 code units at the start of each unicode // the first three utf8 code units at the start of each unicode
// scalar // scalar
expectEqualSequence( expectEqualSequence(
@@ -124,12 +129,21 @@ tests.test("index mapping") {
i in (0..<3).map { winter.utf8[advance(i, $0)] } i in (0..<3).map { winter.utf8[advance(i, $0)] }
}, ==) }, ==)
expectEqual(
winter.utf8.endIndex,
winter.unicodeScalars.endIndex.samePositionIn(winter.utf8))
expectEqualSequence( expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, summerBytes,
0x74, 0x21],
utf8UnicodeScalarIndices(summer).map { summer.utf8[$0] } utf8UnicodeScalarIndices(summer).map { summer.utf8[$0] }
) )
expectEqual(
summer.utf8.endIndex,
summer.unicodeScalars.endIndex.samePositionIn(summer.utf8))
}
tests.test("index-mapping/utf16-to-utf8") {
// check the first three utf8 code units at the start of each utf16 // check the first three utf8 code units at the start of each utf16
// code unit // code unit
expectEqualSequence( expectEqualSequence(
@@ -151,14 +165,64 @@ tests.test("index mapping") {
i8 in (0..<3).map { winter.utf8[advance(i8, $0)] } i8 in (0..<3).map { winter.utf8[advance(i8, $0)] }
} ?? [] } ?? []
}, ==) }, ==)
expectNotEmpty(winter.utf16.endIndex.samePositionIn(winter.utf8))
expectEqual(
winter.utf8.endIndex,
winter.utf16.endIndex.samePositionIn(winter.utf8)!)
expectEqualSequence( expectEqualSequence(
[0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, summerBytes,
0x74, 0x21],
utf8UTF16Indices(summer).map { summer.utf8[$0!] } utf8UTF16Indices(summer).map { summer.utf8[$0!] }
) )
}
expectNotEmpty(summer.utf16.endIndex.samePositionIn(summer.utf8))
expectEqual(
summer.utf8.endIndex,
summer.utf16.endIndex.samePositionIn(summer.utf8)!)
}
tests.test("index-mapping/utf8-to-unicode-scalar") {
// Define expectation separately to help the type-checker, which
// otherwise runs out of time solving.
let winterUtf8UnicodeScalars: [UnicodeScalar?] = [
UnicodeScalar(0x1f3c2), nil, nil, nil,
UnicodeScalar(0x2603), nil, nil,
UnicodeScalar(0x2745), nil, nil,
UnicodeScalar(0x2746), nil, nil,
UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0e), nil, nil,
UnicodeScalar(0x26c4), nil, nil, UnicodeScalar(0xfe0f), nil, nil,
UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0f), nil, nil
]
expectEqualSequence(
winterUtf8UnicodeScalars,
map(indices(winter.utf8)) {
i in i.samePositionIn(winter.unicodeScalars).map {
winter.unicodeScalars[$0]
}
}, ==
)
expectNotEmpty(winter.utf8.endIndex.samePositionIn(winter.unicodeScalars))
expectEqual(
winter.unicodeScalars.endIndex,
winter.utf8.endIndex.samePositionIn(winter.unicodeScalars)!)
expectEqualSequence(
map(summerBytes) { UnicodeScalar($0) as UnicodeScalar? },
map(indices(summer.utf8)) {
i in i.samePositionIn(summer.unicodeScalars).map {
summer.unicodeScalars[$0]
}
}, ==
)
expectNotEmpty(summer.utf8.endIndex.samePositionIn(summer.unicodeScalars))
expectEqual(
summer.unicodeScalars.endIndex,
summer.utf8.endIndex.samePositionIn(summer.unicodeScalars)!)
}
func expectEquality<T: Equatable>(x: T, y: T, expected: Bool) { func expectEquality<T: Equatable>(x: T, y: T, expected: Bool) {
let actual = x == y let actual = x == y