Files
swift-mirror/stdlib/core/UnicodeTrie.swift.gyb
Dmitri Hrybenko 3f2006a00b stdlib/UnicodeTrie: add more comments, replace a _precondition()/#if
pair with _sanityCheck

Swift SVN r19395
2014-07-01 13:18:20 +00:00

263 lines
7.9 KiB
Swift

//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// A custom trie implementation to quickly retrive Unicode property values.
//
//===----------------------------------------------------------------------===//
%{
# Note: keep these constants synchronized with the data that it is actually
# generated. There is a runtime check for this, but it is only performed in
# builds with INTERNAL_CHECKS_ENABLED.
BMPFirstLevelIndexBits = 8
BMPDataOffsetBits = 8
SuppFirstLevelIndexBits = 5
SuppSecondLevelIndexBits = 8
SuppDataOffsetBits = 8
BMPLookupBytesPerEntry = 1
BMPDataBytesPerEntry = 1
SuppLookup1BytesPerEntry = 1
SuppLookup2BytesPerEntry = 1
SuppDataBytesPerEntry = 1
TrieSize = 15904
BMPLookupBytesOffset = 0
BMPDataBytesOffset = 256
SuppLookup1BytesOffset = 12032
SuppLookup2BytesOffset = 12064
SuppDataBytesOffset = 12832
}%
import SwiftShims
@internal enum _GraphemeClusterBreakPropertyValue : Int, Printable {
case Other = 0
case CR = 1
case LF = 2
case Control = 3
case Extend = 4
case Regional_Indicator = 5
case Prepend = 6
case SpacingMark = 7
case L = 8
case V = 9
case T = 10
case LV = 11
case LVT = 12
var description: String {
switch self {
case Other:
return "Other"
case CR:
return "CR"
case LF:
return "LF"
case Control:
return "Control"
case Extend:
return "Extend"
case Regional_Indicator:
return "Regional_Indicator"
case Prepend:
return "Prepend"
case SpacingMark:
return "SpacingMark"
case L:
return "L"
case V:
return "V"
case T:
return "T"
case LV:
return "LV"
case LVT:
return "LVT"
}
}
}
// It is expensive to convert a raw enum value to an enum, so we use this type
// safe wrapper around the raw property value to avoid paying the conversion
// cost in hot code paths.
struct _GraphemeClusterBreakPropertyRawValue {
init(_ rawValue: UInt${BMPDataBytesPerEntry * 8}) {
self.rawValue = rawValue
}
var rawValue: UInt${BMPDataBytesPerEntry * 8}
// Use with care: this operation is expensive (even with optimization
// turned on the compiler generates code for a switch).
var cookedValue: _GraphemeClusterBreakPropertyValue {
return _GraphemeClusterBreakPropertyValue.fromRaw(Int(rawValue))!
}
}
@internal struct _UnicodeGraphemeClusterBreakPropertyTrie {
static func _checkParameters() {
let metadata = _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata
_sanityCheck(metadata.BMPFirstLevelIndexBits == ${BMPFirstLevelIndexBits})
_sanityCheck(metadata.BMPDataOffsetBits == ${BMPDataOffsetBits})
_sanityCheck(metadata.SuppFirstLevelIndexBits == ${SuppFirstLevelIndexBits})
_sanityCheck(metadata.SuppSecondLevelIndexBits == ${SuppSecondLevelIndexBits})
_sanityCheck(metadata.SuppDataOffsetBits == ${SuppDataOffsetBits})
_sanityCheck(metadata.BMPLookupBytesPerEntry == ${BMPLookupBytesPerEntry})
_sanityCheck(metadata.BMPDataBytesPerEntry == ${BMPDataBytesPerEntry})
_sanityCheck(metadata.SuppLookup1BytesPerEntry == ${SuppLookup1BytesPerEntry})
_sanityCheck(metadata.SuppLookup2BytesPerEntry == ${SuppLookup2BytesPerEntry})
_sanityCheck(metadata.SuppDataBytesPerEntry == ${SuppDataBytesPerEntry})
_sanityCheck(metadata.TrieSize == ${TrieSize})
_sanityCheck(metadata.BMPLookupBytesOffset == ${BMPLookupBytesOffset})
_sanityCheck(metadata.BMPDataBytesOffset == ${BMPDataBytesOffset})
_sanityCheck(metadata.SuppLookup1BytesOffset == ${SuppLookup1BytesOffset})
_sanityCheck(metadata.SuppLookup2BytesOffset == ${SuppLookup2BytesOffset})
_sanityCheck(metadata.SuppDataBytesOffset == ${SuppDataBytesOffset})
}
let _trieData: ConstUnsafePointer<UInt8>
% if BMPLookupBytesPerEntry == 1:
@transparent var _BMPLookup: ConstUnsafePointer<UInt8> {
return _trieData + ${BMPLookupBytesOffset}
}
% end
% if BMPDataBytesPerEntry == 1:
@transparent var _BMPData: ConstUnsafePointer<UInt8> {
return _trieData + ${BMPDataBytesOffset}
}
% end
% if SuppLookup1BytesPerEntry == 1:
@transparent var _SuppLookup1: ConstUnsafePointer<UInt8> {
return _trieData + ${SuppLookup1BytesOffset}
}
% end
% if SuppLookup2BytesPerEntry == 1:
@transparent var _SuppLookup2: ConstUnsafePointer<UInt8> {
return _trieData + ${SuppLookup2BytesOffset}
}
% end
% if SuppDataBytesPerEntry == 1:
@transparent var _SuppData: ConstUnsafePointer<UInt8> {
return _trieData + ${SuppDataBytesOffset}
}
% end
init() {
_UnicodeGraphemeClusterBreakPropertyTrie._checkParameters()
_trieData = _swift_stdlib_GraphemeClusterBreakPropertyTrie
}
@transparent
func _getBMPFirstLevelIndex(cp: UInt32) -> Int {
return Int(cp >> ${BMPFirstLevelIndexBits})
}
@transparent
func _getBMPDataOffset(cp: UInt32) -> Int {
return Int(cp & ((1 << ${BMPDataOffsetBits}) - 1))
}
@transparent
func _getSuppFirstLevelIndex(cp: UInt32) -> Int {
return Int(cp >> (${SuppSecondLevelIndexBits} + ${SuppDataOffsetBits}))
}
@transparent
func _getSuppSecondLevelIndex(cp: UInt32) -> Int {
return Int((cp >> ${SuppDataOffsetBits}) &
((1 << ${SuppSecondLevelIndexBits}) - 1))
}
@transparent
func _getSuppDataOffset(cp: UInt32) -> Int {
return Int(cp & ((1 << ${SuppDataOffsetBits}) - 1))
}
func getPropertyRawValue(
codePoint: UInt32
) -> _GraphemeClusterBreakPropertyRawValue {
// Note: for optimization, the code below uses '&+' instead of '+' to avoid
// a few branches. There is no possibility of overflow here.
//
// The optimizer could figure this out, but right now it keeps extra checks
// if '+' is used.
if _fastPath(codePoint <= 0xffff) {
let dataBlockIndex = Int(_BMPLookup[_getBMPFirstLevelIndex(codePoint)])
return _GraphemeClusterBreakPropertyRawValue(
_BMPData[
(dataBlockIndex << ${BMPDataOffsetBits}) &+
_getBMPDataOffset(codePoint)])
} else {
_precondition(codePoint <= 0x10ffff)
let secondLookupIndex = Int(_SuppLookup1[_getSuppFirstLevelIndex(codePoint)])
let dataBlockIndex = Int(_SuppLookup2[
(secondLookupIndex << ${SuppSecondLevelIndexBits}) &+
_getSuppSecondLevelIndex(codePoint)])
return _GraphemeClusterBreakPropertyRawValue(
_SuppData[
(dataBlockIndex << ${SuppDataOffsetBits}) &+
_getSuppDataOffset(codePoint)])
}
}
func getPropertyValue(
codePoint: UInt32
) -> _GraphemeClusterBreakPropertyValue {
return getPropertyRawValue(codePoint).cookedValue
}
}
@internal struct _UnicodeExtendedGraphemeClusterSegmenter {
let _noBoundaryRulesMatrix: ConstUnsafePointer<UInt16>
init() {
_noBoundaryRulesMatrix =
_swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix
}
/// Returns `true` if there is always a grapheme cluster break after a code
/// point with a given `Grapheme_Cluster_Break` property value.
func isBoundaryAfter(gcb: _GraphemeClusterBreakPropertyRawValue) -> Bool {
let ruleRow = _noBoundaryRulesMatrix[Int(gcb.rawValue)]
return ruleRow == 0
}
/// Returns `true` if there is a grapheme cluster break between code points
/// with given `Grapheme_Cluster_Break` property values.
func isBoundary(
gcb1: _GraphemeClusterBreakPropertyRawValue,
_ gcb2: _GraphemeClusterBreakPropertyRawValue
) -> Bool {
let ruleRow = _noBoundaryRulesMatrix[Int(gcb1.rawValue)]
return (ruleRow & (1 << UInt16(gcb2.rawValue))) == 0
}
}
// ${'Local Variables'}:
// eval: (read-only-mode 1)
// End: