mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
241 lines
7.7 KiB
Swift
241 lines
7.7 KiB
Swift
//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// A custom trie implementation to quickly retrieve Unicode property values.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
%{
|
|
|
|
# Note: keep these constants synchronized with the data that it is actually
|
|
# generated. There is a runtime check for this, but it is only performed in
|
|
# builds with INTERNAL_CHECKS_ENABLED.
|
|
|
|
BMPFirstLevelIndexBits = 8
|
|
BMPDataOffsetBits = 8
|
|
SuppFirstLevelIndexBits = 5
|
|
SuppSecondLevelIndexBits = 8
|
|
SuppDataOffsetBits = 8
|
|
|
|
BMPLookupBytesPerEntry = 1
|
|
BMPDataBytesPerEntry = 1
|
|
SuppLookup1BytesPerEntry = 1
|
|
SuppLookup2BytesPerEntry = 1
|
|
SuppDataBytesPerEntry = 1
|
|
|
|
TrieSize = 18961
|
|
|
|
BMPLookupBytesOffset = 0
|
|
BMPDataBytesOffset = 256
|
|
SuppLookup1BytesOffset = 12032
|
|
SuppLookup2BytesOffset = 12049
|
|
SuppDataBytesOffset = 12817
|
|
|
|
}%
|
|
|
|
import SwiftShims
|
|
|
|
// These case names must be kept in sync with the 'GraphemeClusterBreakProperty'
|
|
// enum in C++ and with the names in the GYBUnicodeDataUtils script.
|
|
public // @testable
|
|
enum _GraphemeClusterBreakPropertyValue : Int {
|
|
case Other = 0
|
|
case CR = 1
|
|
case LF = 2
|
|
case Control = 3
|
|
case Extend = 4
|
|
case Regional_Indicator = 5
|
|
case Prepend = 6
|
|
case SpacingMark = 7
|
|
case L = 8
|
|
case V = 9
|
|
case T = 10
|
|
case LV = 11
|
|
case LVT = 12
|
|
}
|
|
|
|
// It is expensive to convert a raw enum value to an enum, so we use this type
|
|
// safe wrapper around the raw property value to avoid paying the conversion
|
|
// cost in hot code paths.
|
|
struct _GraphemeClusterBreakPropertyRawValue {
|
|
init(_ rawValue: UInt${BMPDataBytesPerEntry * 8}) {
|
|
self.rawValue = rawValue
|
|
}
|
|
|
|
var rawValue: UInt${BMPDataBytesPerEntry * 8}
|
|
|
|
// Use with care: this operation is expensive (even with optimization
|
|
// turned on the compiler generates code for a switch).
|
|
var cookedValue: _GraphemeClusterBreakPropertyValue {
|
|
return _GraphemeClusterBreakPropertyValue(rawValue: Int(rawValue))!
|
|
}
|
|
}
|
|
|
|
public // @testable
|
|
struct _UnicodeGraphemeClusterBreakPropertyTrie {
|
|
static func _checkParameters() {
|
|
let metadata = _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata
|
|
|
|
_sanityCheck(metadata.BMPFirstLevelIndexBits == ${BMPFirstLevelIndexBits})
|
|
_sanityCheck(metadata.BMPDataOffsetBits == ${BMPDataOffsetBits})
|
|
_sanityCheck(metadata.SuppFirstLevelIndexBits == ${SuppFirstLevelIndexBits})
|
|
_sanityCheck(metadata.SuppSecondLevelIndexBits == ${SuppSecondLevelIndexBits})
|
|
_sanityCheck(metadata.SuppDataOffsetBits == ${SuppDataOffsetBits})
|
|
|
|
_sanityCheck(metadata.BMPLookupBytesPerEntry == ${BMPLookupBytesPerEntry})
|
|
_sanityCheck(metadata.BMPDataBytesPerEntry == ${BMPDataBytesPerEntry})
|
|
_sanityCheck(metadata.SuppLookup1BytesPerEntry == ${SuppLookup1BytesPerEntry})
|
|
_sanityCheck(metadata.SuppLookup2BytesPerEntry == ${SuppLookup2BytesPerEntry})
|
|
_sanityCheck(metadata.SuppDataBytesPerEntry == ${SuppDataBytesPerEntry})
|
|
|
|
_sanityCheck(metadata.TrieSize == ${TrieSize})
|
|
|
|
_sanityCheck(metadata.BMPLookupBytesOffset == ${BMPLookupBytesOffset})
|
|
_sanityCheck(metadata.BMPDataBytesOffset == ${BMPDataBytesOffset})
|
|
_sanityCheck(metadata.SuppLookup1BytesOffset == ${SuppLookup1BytesOffset})
|
|
_sanityCheck(metadata.SuppLookup2BytesOffset == ${SuppLookup2BytesOffset})
|
|
_sanityCheck(metadata.SuppDataBytesOffset == ${SuppDataBytesOffset})
|
|
}
|
|
|
|
let _trieData: UnsafePointer<UInt8>
|
|
|
|
% if BMPLookupBytesPerEntry == 1:
|
|
@_transparent var _bmpLookup: UnsafePointer<UInt8> {
|
|
return _trieData + ${BMPLookupBytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if BMPDataBytesPerEntry == 1:
|
|
@_transparent var _bmpData: UnsafePointer<UInt8> {
|
|
return _trieData + ${BMPDataBytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if SuppLookup1BytesPerEntry == 1:
|
|
@_transparent var _suppLookup1: UnsafePointer<UInt8> {
|
|
return _trieData + ${SuppLookup1BytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if SuppLookup2BytesPerEntry == 1:
|
|
@_transparent var _suppLookup2: UnsafePointer<UInt8> {
|
|
return _trieData + ${SuppLookup2BytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if SuppDataBytesPerEntry == 1:
|
|
@_transparent var _suppData: UnsafePointer<UInt8> {
|
|
return _trieData + ${SuppDataBytesOffset}
|
|
}
|
|
% end
|
|
|
|
public // @testable
|
|
init() {
|
|
_UnicodeGraphemeClusterBreakPropertyTrie._checkParameters()
|
|
_trieData = _swift_stdlib_GraphemeClusterBreakPropertyTrie
|
|
}
|
|
|
|
@_transparent
|
|
func _getBMPFirstLevelIndex(_ cp: UInt32) -> Int {
|
|
return Int(cp >> ${BMPFirstLevelIndexBits})
|
|
}
|
|
|
|
@_transparent
|
|
func _getBMPDataOffset(_ cp: UInt32) -> Int {
|
|
return Int(cp & ((1 << ${BMPDataOffsetBits}) - 1))
|
|
}
|
|
|
|
@_transparent
|
|
func _getSuppFirstLevelIndex(_ cp: UInt32) -> Int {
|
|
return Int(cp >> (${SuppSecondLevelIndexBits} + ${SuppDataOffsetBits}))
|
|
}
|
|
|
|
@_transparent
|
|
func _getSuppSecondLevelIndex(_ cp: UInt32) -> Int {
|
|
return Int((cp >> ${SuppDataOffsetBits}) &
|
|
((1 << ${SuppSecondLevelIndexBits}) - 1))
|
|
}
|
|
|
|
@_transparent
|
|
func _getSuppDataOffset(_ cp: UInt32) -> Int {
|
|
return Int(cp & ((1 << ${SuppDataOffsetBits}) - 1))
|
|
}
|
|
|
|
func getPropertyRawValue(
|
|
_ codePoint: UInt32
|
|
) -> _GraphemeClusterBreakPropertyRawValue {
|
|
// Note: for optimization, the code below uses '&+' instead of '+' to avoid
|
|
// a few branches. There is no possibility of overflow here.
|
|
//
|
|
// The optimizer could figure this out, but right now it keeps extra checks
|
|
// if '+' is used.
|
|
|
|
if _fastPath(codePoint <= 0xffff) {
|
|
let dataBlockIndex = Int(_bmpLookup[_getBMPFirstLevelIndex(codePoint)])
|
|
return _GraphemeClusterBreakPropertyRawValue(
|
|
_bmpData[
|
|
(dataBlockIndex << ${BMPDataOffsetBits}) &+
|
|
_getBMPDataOffset(codePoint)])
|
|
} else {
|
|
_precondition(codePoint <= 0x10ffff)
|
|
let secondLookupIndex = Int(_suppLookup1[_getSuppFirstLevelIndex(codePoint)])
|
|
let dataBlockIndex = Int(_suppLookup2[
|
|
(secondLookupIndex << ${SuppSecondLevelIndexBits}) &+
|
|
_getSuppSecondLevelIndex(codePoint)])
|
|
return _GraphemeClusterBreakPropertyRawValue(
|
|
_suppData[
|
|
(dataBlockIndex << ${SuppDataOffsetBits}) &+
|
|
_getSuppDataOffset(codePoint)])
|
|
}
|
|
}
|
|
|
|
public // @testable
|
|
func getPropertyValue(
|
|
_ codePoint: UInt32
|
|
) -> _GraphemeClusterBreakPropertyValue {
|
|
return getPropertyRawValue(codePoint).cookedValue
|
|
}
|
|
}
|
|
|
|
// FIXME(ABI)#74 : don't mark this type versioned, or any of its APIs inlineable.
|
|
// Grapheme cluster segmentation uses a completely different algorithm in
|
|
// Unicode 9.0.
|
|
internal struct _UnicodeExtendedGraphemeClusterSegmenter {
|
|
let _noBoundaryRulesMatrix: UnsafePointer<UInt16>
|
|
|
|
init() {
|
|
_noBoundaryRulesMatrix =
|
|
_swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix
|
|
}
|
|
|
|
/// Returns `true` if there is always a grapheme cluster break after a code
|
|
/// point with a given `Grapheme_Cluster_Break` property value.
|
|
func isBoundaryAfter(_ gcb: _GraphemeClusterBreakPropertyRawValue) -> Bool {
|
|
let ruleRow = _noBoundaryRulesMatrix[Int(gcb.rawValue)]
|
|
return ruleRow == 0
|
|
}
|
|
|
|
/// Returns `true` if there is a grapheme cluster break between code points
|
|
/// with given `Grapheme_Cluster_Break` property values.
|
|
func isBoundary(
|
|
_ gcb1: _GraphemeClusterBreakPropertyRawValue,
|
|
_ gcb2: _GraphemeClusterBreakPropertyRawValue
|
|
) -> Bool {
|
|
let ruleRow = _noBoundaryRulesMatrix[Int(gcb1.rawValue)]
|
|
return (ruleRow & (1 << UInt16(gcb2.rawValue))) == 0
|
|
}
|
|
}
|
|
|
|
// ${'Local Variables'}:
|
|
// eval: (read-only-mode 1)
|
|
// End:
|