Files
2025-01-14 14:16:29 -08:00

156 lines
3.7 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
import GenUtils
func emitNormData(
_ data: [(ClosedRange<UInt32>, UInt16)],
into result: inout String
) {
let uniqueData = Array(Set(data.map { $0.1 }))
// 64 bit arrays * 8 bytes = .512 KB
var bitArrays: [BitArray] = .init(repeating: .init(size: 64), count: 64)
let chunkSize = 0x110000 / 64 / 64
var chunks: [Int] = []
for i in 0 ..< 64 * 64 {
let lower = i * chunkSize
let upper = lower + chunkSize - 1
let idx = i / 64
let bit = i % 64
for scalar in lower ... upper {
if data.contains(where: { $0.0.contains(UInt32(scalar)) }) {
chunks.append(i)
bitArrays[idx][bit] = true
break
}
}
}
// Remove the trailing 0s. Currently this reduces quick look size down to
// 96 bytes from 512 bytes.
var reducedBA = Array(bitArrays.reversed())
reducedBA = Array(reducedBA.drop {
$0.words == [0x0]
})
bitArrays = reducedBA.reversed()
// Keep a record of every rank for all the bitarrays.
var ranks: [UInt16] = []
// Record our quick look ranks.
var lastRank: UInt16 = 0
for (i, _) in bitArrays.enumerated() {
guard i != 0 else {
ranks.append(0)
continue
}
var rank = UInt16(bitArrays[i - 1].words[0].nonzeroBitCount)
rank += lastRank
ranks.append(rank)
lastRank = rank
}
// Insert our quick look size at the beginning.
var size = BitArray(size: 64)
size.words = [UInt64(bitArrays.count)]
bitArrays.insert(size, at: 0)
var dataIndices: [UInt8] = []
for chunk in chunks {
var chunkBA = BitArray(size: chunkSize)
let lower = chunk * chunkSize
let upper = lower + chunkSize
let chunkDataIdx = UInt64(dataIndices.endIndex)
// Insert our chunk's data index in the upper bits of the last word of our
// bit array.
chunkBA.words[chunkBA.words.endIndex - 1] |= chunkDataIdx << 16
for scalar in lower ..< upper {
if data.contains(where: { $0.0.contains(UInt32(scalar)) }) {
chunkBA[scalar % chunkSize] = true
let data = data[data.firstIndex {
$0.0.contains(UInt32(scalar))
}!].1
let dataIdx = uniqueData.firstIndex(of: data)!
dataIndices.append(UInt8(dataIdx))
}
}
// Append our chunk bit array's rank.
var lastRank: UInt16 = 0
for (i, _) in chunkBA.words.enumerated() {
guard i != 0 else {
ranks.append(0)
continue
}
var rank = UInt16(chunkBA.words[i - 1].nonzeroBitCount)
rank += lastRank
ranks.append(rank)
lastRank = rank
}
bitArrays += chunkBA.words.map {
var ba = BitArray(size: 64)
ba.words = [$0]
return ba
}
}
emitCollection(
uniqueData,
name: "_swift_stdlib_normData_data",
into: &result
)
emitCollection(
dataIndices,
name: "_swift_stdlib_normData_data_indices",
into: &result
)
emitCollection(
ranks,
name: "_swift_stdlib_normData_ranks",
into: &result
)
emitCollection(
bitArrays,
name: "_swift_stdlib_normData",
type: "__swift_uint64_t",
into: &result
) {
assert($0.words.count == 1)
return "0x\(String($0.words[0], radix: 16, uppercase: true))"
}
}