//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // A custom trie implementation to quickly retrieve Unicode property values. // //===----------------------------------------------------------------------===// %{ # Note: keep these constants synchronized with the data that it is actually # generated. There is a runtime check for this, but it is only performed in # builds with INTERNAL_CHECKS_ENABLED. BMPFirstLevelIndexBits = 8 BMPDataOffsetBits = 8 SuppFirstLevelIndexBits = 5 SuppSecondLevelIndexBits = 8 SuppDataOffsetBits = 8 BMPLookupBytesPerEntry = 1 BMPDataBytesPerEntry = 1 SuppLookup1BytesPerEntry = 1 SuppLookup2BytesPerEntry = 1 SuppDataBytesPerEntry = 1 TrieSize = 18961 BMPLookupBytesOffset = 0 BMPDataBytesOffset = 256 SuppLookup1BytesOffset = 12032 SuppLookup2BytesOffset = 12049 SuppDataBytesOffset = 12817 }% import SwiftShims // These case names must be kept in sync with the 'GraphemeClusterBreakProperty' // enum in C++ and with the names in the GYBUnicodeDataUtils script. public // @testable enum _GraphemeClusterBreakPropertyValue : Int { case Other = 0 case CR = 1 case LF = 2 case Control = 3 case Extend = 4 case Regional_Indicator = 5 case Prepend = 6 case SpacingMark = 7 case L = 8 case V = 9 case T = 10 case LV = 11 case LVT = 12 } // It is expensive to convert a raw enum value to an enum, so we use this type // safe wrapper around the raw property value to avoid paying the conversion // cost in hot code paths. struct _GraphemeClusterBreakPropertyRawValue { init(_ rawValue: UInt${BMPDataBytesPerEntry * 8}) { self.rawValue = rawValue } var rawValue: UInt${BMPDataBytesPerEntry * 8} // Use with care: this operation is expensive (even with optimization // turned on the compiler generates code for a switch). var cookedValue: _GraphemeClusterBreakPropertyValue { return _GraphemeClusterBreakPropertyValue(rawValue: Int(rawValue))! } } public // @testable struct _UnicodeGraphemeClusterBreakPropertyTrie { static func _checkParameters() { let metadata = _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata _sanityCheck(metadata.BMPFirstLevelIndexBits == ${BMPFirstLevelIndexBits}) _sanityCheck(metadata.BMPDataOffsetBits == ${BMPDataOffsetBits}) _sanityCheck(metadata.SuppFirstLevelIndexBits == ${SuppFirstLevelIndexBits}) _sanityCheck(metadata.SuppSecondLevelIndexBits == ${SuppSecondLevelIndexBits}) _sanityCheck(metadata.SuppDataOffsetBits == ${SuppDataOffsetBits}) _sanityCheck(metadata.BMPLookupBytesPerEntry == ${BMPLookupBytesPerEntry}) _sanityCheck(metadata.BMPDataBytesPerEntry == ${BMPDataBytesPerEntry}) _sanityCheck(metadata.SuppLookup1BytesPerEntry == ${SuppLookup1BytesPerEntry}) _sanityCheck(metadata.SuppLookup2BytesPerEntry == ${SuppLookup2BytesPerEntry}) _sanityCheck(metadata.SuppDataBytesPerEntry == ${SuppDataBytesPerEntry}) _sanityCheck(metadata.TrieSize == ${TrieSize}) _sanityCheck(metadata.BMPLookupBytesOffset == ${BMPLookupBytesOffset}) _sanityCheck(metadata.BMPDataBytesOffset == ${BMPDataBytesOffset}) _sanityCheck(metadata.SuppLookup1BytesOffset == ${SuppLookup1BytesOffset}) _sanityCheck(metadata.SuppLookup2BytesOffset == ${SuppLookup2BytesOffset}) _sanityCheck(metadata.SuppDataBytesOffset == ${SuppDataBytesOffset}) } let _trieData: UnsafePointer % if BMPLookupBytesPerEntry == 1: @_transparent var _bmpLookup: UnsafePointer { return _trieData + ${BMPLookupBytesOffset} } % end % if BMPDataBytesPerEntry == 1: @_transparent var _bmpData: UnsafePointer { return _trieData + ${BMPDataBytesOffset} } % end % if SuppLookup1BytesPerEntry == 1: @_transparent var _suppLookup1: UnsafePointer { return _trieData + ${SuppLookup1BytesOffset} } % end % if SuppLookup2BytesPerEntry == 1: @_transparent var _suppLookup2: UnsafePointer { return _trieData + ${SuppLookup2BytesOffset} } % end % if SuppDataBytesPerEntry == 1: @_transparent var _suppData: UnsafePointer { return _trieData + ${SuppDataBytesOffset} } % end public // @testable init() { _UnicodeGraphemeClusterBreakPropertyTrie._checkParameters() _trieData = _swift_stdlib_GraphemeClusterBreakPropertyTrie } @_transparent func _getBMPFirstLevelIndex(_ cp: UInt32) -> Int { return Int(cp >> ${BMPFirstLevelIndexBits}) } @_transparent func _getBMPDataOffset(_ cp: UInt32) -> Int { return Int(cp & ((1 << ${BMPDataOffsetBits}) - 1)) } @_transparent func _getSuppFirstLevelIndex(_ cp: UInt32) -> Int { return Int(cp >> (${SuppSecondLevelIndexBits} + ${SuppDataOffsetBits})) } @_transparent func _getSuppSecondLevelIndex(_ cp: UInt32) -> Int { return Int((cp >> ${SuppDataOffsetBits}) & ((1 << ${SuppSecondLevelIndexBits}) - 1)) } @_transparent func _getSuppDataOffset(_ cp: UInt32) -> Int { return Int(cp & ((1 << ${SuppDataOffsetBits}) - 1)) } func getPropertyRawValue( _ codePoint: UInt32 ) -> _GraphemeClusterBreakPropertyRawValue { // Note: for optimization, the code below uses '&+' instead of '+' to avoid // a few branches. There is no possibility of overflow here. // // The optimizer could figure this out, but right now it keeps extra checks // if '+' is used. if _fastPath(codePoint <= 0xffff) { let dataBlockIndex = Int(_bmpLookup[_getBMPFirstLevelIndex(codePoint)]) return _GraphemeClusterBreakPropertyRawValue( _bmpData[ (dataBlockIndex << ${BMPDataOffsetBits}) &+ _getBMPDataOffset(codePoint)]) } else { _precondition(codePoint <= 0x10ffff) let secondLookupIndex = Int(_suppLookup1[_getSuppFirstLevelIndex(codePoint)]) let dataBlockIndex = Int(_suppLookup2[ (secondLookupIndex << ${SuppSecondLevelIndexBits}) &+ _getSuppSecondLevelIndex(codePoint)]) return _GraphemeClusterBreakPropertyRawValue( _suppData[ (dataBlockIndex << ${SuppDataOffsetBits}) &+ _getSuppDataOffset(codePoint)]) } } public // @testable func getPropertyValue( _ codePoint: UInt32 ) -> _GraphemeClusterBreakPropertyValue { return getPropertyRawValue(codePoint).cookedValue } } // FIXME(ABI)#74 : don't mark this type versioned, or any of its APIs inlineable. // Grapheme cluster segmentation uses a completely different algorithm in // Unicode 9.0. internal struct _UnicodeExtendedGraphemeClusterSegmenter { let _noBoundaryRulesMatrix: UnsafePointer init() { _noBoundaryRulesMatrix = _swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix } /// Returns `true` if there is always a grapheme cluster break after a code /// point with a given `Grapheme_Cluster_Break` property value. func isBoundaryAfter(_ gcb: _GraphemeClusterBreakPropertyRawValue) -> Bool { let ruleRow = _noBoundaryRulesMatrix[Int(gcb.rawValue)] return ruleRow == 0 } /// Returns `true` if there is a grapheme cluster break between code points /// with given `Grapheme_Cluster_Break` property values. func isBoundary( _ gcb1: _GraphemeClusterBreakPropertyRawValue, _ gcb2: _GraphemeClusterBreakPropertyRawValue ) -> Bool { let ruleRow = _noBoundaryRulesMatrix[Int(gcb1.rawValue)] return (ruleRow & (1 << UInt16(gcb2.rawValue))) == 0 } } // ${'Local Variables'}: // eval: (read-only-mode 1) // End: