mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
trie parameters and fix a few bugs The bugs did not affect correctness of the particular instance of trie created for grapheme cluster property, because trie parameters that were confused with each other happened to be equal. Also, fix a trie size bug: we were creating a trie large enough to store information for 0x200000 code points, but there are only 0x10ffff. It saved only 15 bytes in the grapheme cluster tree, because that extra information was compressed with some supplementary planes that also had default values. This also improved trie generation time by almost 2x. Swift SVN r19457
263 lines
7.9 KiB
Swift
263 lines
7.9 KiB
Swift
//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// A custom trie implementation to quickly retrive Unicode property values.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
%{
|
|
|
|
# Note: keep these constants synchronized with the data that it is actually
|
|
# generated. There is a runtime check for this, but it is only performed in
|
|
# builds with INTERNAL_CHECKS_ENABLED.
|
|
|
|
BMPFirstLevelIndexBits = 8
|
|
BMPDataOffsetBits = 8
|
|
SuppFirstLevelIndexBits = 5
|
|
SuppSecondLevelIndexBits = 8
|
|
SuppDataOffsetBits = 8
|
|
|
|
BMPLookupBytesPerEntry = 1
|
|
BMPDataBytesPerEntry = 1
|
|
SuppLookup1BytesPerEntry = 1
|
|
SuppLookup2BytesPerEntry = 1
|
|
SuppDataBytesPerEntry = 1
|
|
|
|
TrieSize = 15889
|
|
|
|
BMPLookupBytesOffset = 0
|
|
BMPDataBytesOffset = 256
|
|
SuppLookup1BytesOffset = 12032
|
|
SuppLookup2BytesOffset = 12049
|
|
SuppDataBytesOffset = 12817
|
|
|
|
}%
|
|
|
|
import SwiftShims
|
|
|
|
@internal enum _GraphemeClusterBreakPropertyValue : Int, Printable {
|
|
case Other = 0
|
|
case CR = 1
|
|
case LF = 2
|
|
case Control = 3
|
|
case Extend = 4
|
|
case Regional_Indicator = 5
|
|
case Prepend = 6
|
|
case SpacingMark = 7
|
|
case L = 8
|
|
case V = 9
|
|
case T = 10
|
|
case LV = 11
|
|
case LVT = 12
|
|
|
|
var description: String {
|
|
switch self {
|
|
case Other:
|
|
return "Other"
|
|
case CR:
|
|
return "CR"
|
|
case LF:
|
|
return "LF"
|
|
case Control:
|
|
return "Control"
|
|
case Extend:
|
|
return "Extend"
|
|
case Regional_Indicator:
|
|
return "Regional_Indicator"
|
|
case Prepend:
|
|
return "Prepend"
|
|
case SpacingMark:
|
|
return "SpacingMark"
|
|
case L:
|
|
return "L"
|
|
case V:
|
|
return "V"
|
|
case T:
|
|
return "T"
|
|
case LV:
|
|
return "LV"
|
|
case LVT:
|
|
return "LVT"
|
|
}
|
|
}
|
|
}
|
|
|
|
// It is expensive to convert a raw enum value to an enum, so we use this type
|
|
// safe wrapper around the raw property value to avoid paying the conversion
|
|
// cost in hot code paths.
|
|
struct _GraphemeClusterBreakPropertyRawValue {
|
|
init(_ rawValue: UInt${BMPDataBytesPerEntry * 8}) {
|
|
self.rawValue = rawValue
|
|
}
|
|
|
|
var rawValue: UInt${BMPDataBytesPerEntry * 8}
|
|
|
|
// Use with care: this operation is expensive (even with optimization
|
|
// turned on the compiler generates code for a switch).
|
|
var cookedValue: _GraphemeClusterBreakPropertyValue {
|
|
return _GraphemeClusterBreakPropertyValue.fromRaw(Int(rawValue))!
|
|
}
|
|
}
|
|
|
|
@internal struct _UnicodeGraphemeClusterBreakPropertyTrie {
|
|
static func _checkParameters() {
|
|
let metadata = _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata
|
|
|
|
_sanityCheck(metadata.BMPFirstLevelIndexBits == ${BMPFirstLevelIndexBits})
|
|
_sanityCheck(metadata.BMPDataOffsetBits == ${BMPDataOffsetBits})
|
|
_sanityCheck(metadata.SuppFirstLevelIndexBits == ${SuppFirstLevelIndexBits})
|
|
_sanityCheck(metadata.SuppSecondLevelIndexBits == ${SuppSecondLevelIndexBits})
|
|
_sanityCheck(metadata.SuppDataOffsetBits == ${SuppDataOffsetBits})
|
|
|
|
_sanityCheck(metadata.BMPLookupBytesPerEntry == ${BMPLookupBytesPerEntry})
|
|
_sanityCheck(metadata.BMPDataBytesPerEntry == ${BMPDataBytesPerEntry})
|
|
_sanityCheck(metadata.SuppLookup1BytesPerEntry == ${SuppLookup1BytesPerEntry})
|
|
_sanityCheck(metadata.SuppLookup2BytesPerEntry == ${SuppLookup2BytesPerEntry})
|
|
_sanityCheck(metadata.SuppDataBytesPerEntry == ${SuppDataBytesPerEntry})
|
|
|
|
_sanityCheck(metadata.TrieSize == ${TrieSize})
|
|
|
|
_sanityCheck(metadata.BMPLookupBytesOffset == ${BMPLookupBytesOffset})
|
|
_sanityCheck(metadata.BMPDataBytesOffset == ${BMPDataBytesOffset})
|
|
_sanityCheck(metadata.SuppLookup1BytesOffset == ${SuppLookup1BytesOffset})
|
|
_sanityCheck(metadata.SuppLookup2BytesOffset == ${SuppLookup2BytesOffset})
|
|
_sanityCheck(metadata.SuppDataBytesOffset == ${SuppDataBytesOffset})
|
|
}
|
|
|
|
let _trieData: ConstUnsafePointer<UInt8>
|
|
|
|
% if BMPLookupBytesPerEntry == 1:
|
|
@transparent var _BMPLookup: ConstUnsafePointer<UInt8> {
|
|
return _trieData + ${BMPLookupBytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if BMPDataBytesPerEntry == 1:
|
|
@transparent var _BMPData: ConstUnsafePointer<UInt8> {
|
|
return _trieData + ${BMPDataBytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if SuppLookup1BytesPerEntry == 1:
|
|
@transparent var _SuppLookup1: ConstUnsafePointer<UInt8> {
|
|
return _trieData + ${SuppLookup1BytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if SuppLookup2BytesPerEntry == 1:
|
|
@transparent var _SuppLookup2: ConstUnsafePointer<UInt8> {
|
|
return _trieData + ${SuppLookup2BytesOffset}
|
|
}
|
|
% end
|
|
|
|
% if SuppDataBytesPerEntry == 1:
|
|
@transparent var _SuppData: ConstUnsafePointer<UInt8> {
|
|
return _trieData + ${SuppDataBytesOffset}
|
|
}
|
|
% end
|
|
|
|
init() {
|
|
_UnicodeGraphemeClusterBreakPropertyTrie._checkParameters()
|
|
_trieData = _swift_stdlib_GraphemeClusterBreakPropertyTrie
|
|
}
|
|
|
|
@transparent
|
|
func _getBMPFirstLevelIndex(cp: UInt32) -> Int {
|
|
return Int(cp >> ${BMPFirstLevelIndexBits})
|
|
}
|
|
|
|
@transparent
|
|
func _getBMPDataOffset(cp: UInt32) -> Int {
|
|
return Int(cp & ((1 << ${BMPDataOffsetBits}) - 1))
|
|
}
|
|
|
|
@transparent
|
|
func _getSuppFirstLevelIndex(cp: UInt32) -> Int {
|
|
return Int(cp >> (${SuppSecondLevelIndexBits} + ${SuppDataOffsetBits}))
|
|
}
|
|
|
|
@transparent
|
|
func _getSuppSecondLevelIndex(cp: UInt32) -> Int {
|
|
return Int((cp >> ${SuppDataOffsetBits}) &
|
|
((1 << ${SuppSecondLevelIndexBits}) - 1))
|
|
}
|
|
|
|
@transparent
|
|
func _getSuppDataOffset(cp: UInt32) -> Int {
|
|
return Int(cp & ((1 << ${SuppDataOffsetBits}) - 1))
|
|
}
|
|
|
|
func getPropertyRawValue(
|
|
codePoint: UInt32
|
|
) -> _GraphemeClusterBreakPropertyRawValue {
|
|
// Note: for optimization, the code below uses '&+' instead of '+' to avoid
|
|
// a few branches. There is no possibility of overflow here.
|
|
//
|
|
// The optimizer could figure this out, but right now it keeps extra checks
|
|
// if '+' is used.
|
|
|
|
if _fastPath(codePoint <= 0xffff) {
|
|
let dataBlockIndex = Int(_BMPLookup[_getBMPFirstLevelIndex(codePoint)])
|
|
return _GraphemeClusterBreakPropertyRawValue(
|
|
_BMPData[
|
|
(dataBlockIndex << ${BMPDataOffsetBits}) &+
|
|
_getBMPDataOffset(codePoint)])
|
|
} else {
|
|
_precondition(codePoint <= 0x10ffff)
|
|
let secondLookupIndex = Int(_SuppLookup1[_getSuppFirstLevelIndex(codePoint)])
|
|
let dataBlockIndex = Int(_SuppLookup2[
|
|
(secondLookupIndex << ${SuppSecondLevelIndexBits}) &+
|
|
_getSuppSecondLevelIndex(codePoint)])
|
|
return _GraphemeClusterBreakPropertyRawValue(
|
|
_SuppData[
|
|
(dataBlockIndex << ${SuppDataOffsetBits}) &+
|
|
_getSuppDataOffset(codePoint)])
|
|
}
|
|
}
|
|
|
|
func getPropertyValue(
|
|
codePoint: UInt32
|
|
) -> _GraphemeClusterBreakPropertyValue {
|
|
return getPropertyRawValue(codePoint).cookedValue
|
|
}
|
|
}
|
|
|
|
@internal struct _UnicodeExtendedGraphemeClusterSegmenter {
|
|
let _noBoundaryRulesMatrix: ConstUnsafePointer<UInt16>
|
|
|
|
init() {
|
|
_noBoundaryRulesMatrix =
|
|
_swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix
|
|
}
|
|
|
|
/// Returns `true` if there is always a grapheme cluster break after a code
|
|
/// point with a given `Grapheme_Cluster_Break` property value.
|
|
func isBoundaryAfter(gcb: _GraphemeClusterBreakPropertyRawValue) -> Bool {
|
|
let ruleRow = _noBoundaryRulesMatrix[Int(gcb.rawValue)]
|
|
return ruleRow == 0
|
|
}
|
|
|
|
/// Returns `true` if there is a grapheme cluster break between code points
|
|
/// with given `Grapheme_Cluster_Break` property values.
|
|
func isBoundary(
|
|
gcb1: _GraphemeClusterBreakPropertyRawValue,
|
|
_ gcb2: _GraphemeClusterBreakPropertyRawValue
|
|
) -> Bool {
|
|
let ruleRow = _noBoundaryRulesMatrix[Int(gcb1.rawValue)]
|
|
return (ruleRow & (1 << UInt16(gcb2.rawValue))) == 0
|
|
}
|
|
}
|
|
|
|
// ${'Local Variables'}:
|
|
// eval: (read-only-mode 1)
|
|
// End:
|