Files
swift-mirror/stdlib/core/UnicodeTrie.swift.gyb
Dmitri Hrybenko 313cfcaaa7 Unicode trie generator: add tests for the generator itself that use non-default
trie parameters and fix a few bugs

The bugs did not affect correctness of the particular instance of trie created
for grapheme cluster property, because trie parameters that were confused with
each other happened to be equal.

Also, fix a trie size bug: we were creating a trie large enough to store
information for 0x200000 code points, but there are only 0x10ffff.  It saved
only 15 bytes in the grapheme cluster tree, because that extra information was
compressed with some supplementary planes that also had default values.  This
also improved trie generation time by almost 2x.


Swift SVN r19457
2014-07-02 10:29:52 +00:00

263 lines
7.9 KiB
Swift

//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// A custom trie implementation to quickly retrive Unicode property values.
//
//===----------------------------------------------------------------------===//
%{
# Note: keep these constants synchronized with the data that it is actually
# generated. There is a runtime check for this, but it is only performed in
# builds with INTERNAL_CHECKS_ENABLED.
BMPFirstLevelIndexBits = 8
BMPDataOffsetBits = 8
SuppFirstLevelIndexBits = 5
SuppSecondLevelIndexBits = 8
SuppDataOffsetBits = 8
BMPLookupBytesPerEntry = 1
BMPDataBytesPerEntry = 1
SuppLookup1BytesPerEntry = 1
SuppLookup2BytesPerEntry = 1
SuppDataBytesPerEntry = 1
TrieSize = 15889
BMPLookupBytesOffset = 0
BMPDataBytesOffset = 256
SuppLookup1BytesOffset = 12032
SuppLookup2BytesOffset = 12049
SuppDataBytesOffset = 12817
}%
import SwiftShims
@internal enum _GraphemeClusterBreakPropertyValue : Int, Printable {
case Other = 0
case CR = 1
case LF = 2
case Control = 3
case Extend = 4
case Regional_Indicator = 5
case Prepend = 6
case SpacingMark = 7
case L = 8
case V = 9
case T = 10
case LV = 11
case LVT = 12
var description: String {
switch self {
case Other:
return "Other"
case CR:
return "CR"
case LF:
return "LF"
case Control:
return "Control"
case Extend:
return "Extend"
case Regional_Indicator:
return "Regional_Indicator"
case Prepend:
return "Prepend"
case SpacingMark:
return "SpacingMark"
case L:
return "L"
case V:
return "V"
case T:
return "T"
case LV:
return "LV"
case LVT:
return "LVT"
}
}
}
// It is expensive to convert a raw enum value to an enum, so we use this type
// safe wrapper around the raw property value to avoid paying the conversion
// cost in hot code paths.
struct _GraphemeClusterBreakPropertyRawValue {
init(_ rawValue: UInt${BMPDataBytesPerEntry * 8}) {
self.rawValue = rawValue
}
var rawValue: UInt${BMPDataBytesPerEntry * 8}
// Use with care: this operation is expensive (even with optimization
// turned on the compiler generates code for a switch).
var cookedValue: _GraphemeClusterBreakPropertyValue {
return _GraphemeClusterBreakPropertyValue.fromRaw(Int(rawValue))!
}
}
@internal struct _UnicodeGraphemeClusterBreakPropertyTrie {
static func _checkParameters() {
let metadata = _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata
_sanityCheck(metadata.BMPFirstLevelIndexBits == ${BMPFirstLevelIndexBits})
_sanityCheck(metadata.BMPDataOffsetBits == ${BMPDataOffsetBits})
_sanityCheck(metadata.SuppFirstLevelIndexBits == ${SuppFirstLevelIndexBits})
_sanityCheck(metadata.SuppSecondLevelIndexBits == ${SuppSecondLevelIndexBits})
_sanityCheck(metadata.SuppDataOffsetBits == ${SuppDataOffsetBits})
_sanityCheck(metadata.BMPLookupBytesPerEntry == ${BMPLookupBytesPerEntry})
_sanityCheck(metadata.BMPDataBytesPerEntry == ${BMPDataBytesPerEntry})
_sanityCheck(metadata.SuppLookup1BytesPerEntry == ${SuppLookup1BytesPerEntry})
_sanityCheck(metadata.SuppLookup2BytesPerEntry == ${SuppLookup2BytesPerEntry})
_sanityCheck(metadata.SuppDataBytesPerEntry == ${SuppDataBytesPerEntry})
_sanityCheck(metadata.TrieSize == ${TrieSize})
_sanityCheck(metadata.BMPLookupBytesOffset == ${BMPLookupBytesOffset})
_sanityCheck(metadata.BMPDataBytesOffset == ${BMPDataBytesOffset})
_sanityCheck(metadata.SuppLookup1BytesOffset == ${SuppLookup1BytesOffset})
_sanityCheck(metadata.SuppLookup2BytesOffset == ${SuppLookup2BytesOffset})
_sanityCheck(metadata.SuppDataBytesOffset == ${SuppDataBytesOffset})
}
let _trieData: ConstUnsafePointer<UInt8>
% if BMPLookupBytesPerEntry == 1:
@transparent var _BMPLookup: ConstUnsafePointer<UInt8> {
return _trieData + ${BMPLookupBytesOffset}
}
% end
% if BMPDataBytesPerEntry == 1:
@transparent var _BMPData: ConstUnsafePointer<UInt8> {
return _trieData + ${BMPDataBytesOffset}
}
% end
% if SuppLookup1BytesPerEntry == 1:
@transparent var _SuppLookup1: ConstUnsafePointer<UInt8> {
return _trieData + ${SuppLookup1BytesOffset}
}
% end
% if SuppLookup2BytesPerEntry == 1:
@transparent var _SuppLookup2: ConstUnsafePointer<UInt8> {
return _trieData + ${SuppLookup2BytesOffset}
}
% end
% if SuppDataBytesPerEntry == 1:
@transparent var _SuppData: ConstUnsafePointer<UInt8> {
return _trieData + ${SuppDataBytesOffset}
}
% end
init() {
_UnicodeGraphemeClusterBreakPropertyTrie._checkParameters()
_trieData = _swift_stdlib_GraphemeClusterBreakPropertyTrie
}
@transparent
func _getBMPFirstLevelIndex(cp: UInt32) -> Int {
return Int(cp >> ${BMPFirstLevelIndexBits})
}
@transparent
func _getBMPDataOffset(cp: UInt32) -> Int {
return Int(cp & ((1 << ${BMPDataOffsetBits}) - 1))
}
@transparent
func _getSuppFirstLevelIndex(cp: UInt32) -> Int {
return Int(cp >> (${SuppSecondLevelIndexBits} + ${SuppDataOffsetBits}))
}
@transparent
func _getSuppSecondLevelIndex(cp: UInt32) -> Int {
return Int((cp >> ${SuppDataOffsetBits}) &
((1 << ${SuppSecondLevelIndexBits}) - 1))
}
@transparent
func _getSuppDataOffset(cp: UInt32) -> Int {
return Int(cp & ((1 << ${SuppDataOffsetBits}) - 1))
}
func getPropertyRawValue(
codePoint: UInt32
) -> _GraphemeClusterBreakPropertyRawValue {
// Note: for optimization, the code below uses '&+' instead of '+' to avoid
// a few branches. There is no possibility of overflow here.
//
// The optimizer could figure this out, but right now it keeps extra checks
// if '+' is used.
if _fastPath(codePoint <= 0xffff) {
let dataBlockIndex = Int(_BMPLookup[_getBMPFirstLevelIndex(codePoint)])
return _GraphemeClusterBreakPropertyRawValue(
_BMPData[
(dataBlockIndex << ${BMPDataOffsetBits}) &+
_getBMPDataOffset(codePoint)])
} else {
_precondition(codePoint <= 0x10ffff)
let secondLookupIndex = Int(_SuppLookup1[_getSuppFirstLevelIndex(codePoint)])
let dataBlockIndex = Int(_SuppLookup2[
(secondLookupIndex << ${SuppSecondLevelIndexBits}) &+
_getSuppSecondLevelIndex(codePoint)])
return _GraphemeClusterBreakPropertyRawValue(
_SuppData[
(dataBlockIndex << ${SuppDataOffsetBits}) &+
_getSuppDataOffset(codePoint)])
}
}
func getPropertyValue(
codePoint: UInt32
) -> _GraphemeClusterBreakPropertyValue {
return getPropertyRawValue(codePoint).cookedValue
}
}
@internal struct _UnicodeExtendedGraphemeClusterSegmenter {
let _noBoundaryRulesMatrix: ConstUnsafePointer<UInt16>
init() {
_noBoundaryRulesMatrix =
_swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix
}
/// Returns `true` if there is always a grapheme cluster break after a code
/// point with a given `Grapheme_Cluster_Break` property value.
func isBoundaryAfter(gcb: _GraphemeClusterBreakPropertyRawValue) -> Bool {
let ruleRow = _noBoundaryRulesMatrix[Int(gcb.rawValue)]
return ruleRow == 0
}
/// Returns `true` if there is a grapheme cluster break between code points
/// with given `Grapheme_Cluster_Break` property values.
func isBoundary(
gcb1: _GraphemeClusterBreakPropertyRawValue,
_ gcb2: _GraphemeClusterBreakPropertyRawValue
) -> Bool {
let ruleRow = _noBoundaryRulesMatrix[Int(gcb1.rawValue)]
return (ruleRow & (1 << UInt16(gcb2.rawValue))) == 0
}
}
// ${'Local Variables'}:
// eval: (read-only-mode 1)
// End: