[stdlib] Speed up Character construction from CharacterView.subscript (#9252)

This adds a fast path for single-code-unit Character construction. Rather than use the general purpose String based initializer (which then repeats grapheme breaking to ensure a trap, amongst other inefficiencies), just make the Character from the single unicode scalar value directly. This also speeds up simple iteration of BMP strings when the optimizer is unable to eliminate the subscript. Around 2x for ASCII, and around 20% for BMP UTF16.
2025-12-14 20:36:38 +01:00 · 2017-05-04 06:59:30 -07:00
parent 8610e7a096
commit 47d0247476
1 changed files with 16 additions and 0 deletions
--- a/stdlib/public/core/StringCharacterView.swift
+++ b/stdlib/public/core/StringCharacterView.swift
@@ -439,6 +439,22 @@ extension String.CharacterView : BidirectionalCollection {
  /// - Parameter position: A valid index of the character view. `position`
  ///   must be less than the view's end index.
  public subscript(i: Index) -> Character {
+    if i._countUTF16 == 1 {
+      // For single-code-unit graphemes, we can construct a Character directly
+      // from a single unicode scalar (if sub-surrogate).
+      let relativeOffset = i._base._position - _coreOffset
+      if _core.isASCII {
+        let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
+        return Character(UnicodeScalar(asciiBuffer[relativeOffset]))
+      } else if _core._baseAddress != nil {
+        let cu = _core._nthContiguous(relativeOffset)
+        // Only constructible if sub-surrogate
+        if (cu < 0xd800) {
+          return Character(UnicodeScalar(cu)._unsafelyUnwrappedUnchecked)
+        }
+      }
+    }
+
    return Character(String(unicodeScalars[i._base..<i._endBase]))
  }
 }