[stdlib] Speed up Character construction from CharacterView.subscript (#9252)

This adds a fast path for single-code-unit Character
construction. Rather than use the general purpose String based
initializer (which then repeats grapheme breaking to ensure a trap,
amongst other inefficiencies), just make the Character from the single
unicode scalar value directly.

This also speeds up simple iteration of BMP strings when the optimizer
is unable to eliminate the subscript. Around 2x for ASCII, and around
20% for BMP UTF16.
This commit is contained in:
Michael Ilseman
2017-05-04 06:59:30 -07:00
committed by Ben Cohen
parent 8610e7a096
commit 47d0247476

View File

@@ -439,6 +439,22 @@ extension String.CharacterView : BidirectionalCollection {
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
public subscript(i: Index) -> Character {
if i._countUTF16 == 1 {
// For single-code-unit graphemes, we can construct a Character directly
// from a single unicode scalar (if sub-surrogate).
let relativeOffset = i._base._position - _coreOffset
if _core.isASCII {
let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
return Character(UnicodeScalar(asciiBuffer[relativeOffset]))
} else if _core._baseAddress != nil {
let cu = _core._nthContiguous(relativeOffset)
// Only constructible if sub-surrogate
if (cu < 0xd800) {
return Character(UnicodeScalar(cu)._unsafelyUnwrappedUnchecked)
}
}
}
return Character(String(unicodeScalars[i._base..<i._endBase]))
}
}