Match the batch size for multi-file indexing to the driver's batch size

Until we have better measurements that would motivate a different batching strategy, copying the driver’s batch size seems like the most reasonable thing to do.
This commit is contained in:
Alex Hoppen
2025-09-22 09:42:52 +02:00
parent 078a996dac
commit 05c04decf2
2 changed files with 8 additions and 7 deletions

View File

@@ -45,7 +45,7 @@ package actor BuildSettingsLogger {
if let uri = uris.only {
header = "Build settings for \(uri.forLogging)"
} else if let firstUri = uris.first {
header = "Build settings for \(firstUri.forLogging) and \(firstUri) and \(uris.count - 1) others"
header = "Build settings for \(firstUri.forLogging) and \(uris.count - 1) others"
} else {
header = "Build settings for empty list"
}

View File

@@ -701,16 +701,17 @@ package struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
let languageAndTarget = TargetAndLanguage(target: fileIndexInfo.target, language: fileIndexInfo.language)
fileIndexInfosToBatch[languageAndTarget, default: []].append(fileIndexInfo)
}
// Create one partition per processor core but limit the partition size to 25 primary files. This matches the
// driver's behavior in `numberOfBatchPartitions`
// https://github.com/swiftlang/swift-driver/blob/df3d0796ed5e533d82accd7baac43d15e97b5671/Sources/SwiftDriver/Jobs/Planning.swift#L917-L1022
let partitionSize = max(fileIndexInfosToBatch.count / ProcessInfo.processInfo.activeProcessorCount, 25)
let batchedPartitions =
fileIndexInfosToBatch
.sorted { $0.key < $1.key } // Ensure we get a deterministic partition order
.flatMap { targetAndLanguage, files in
// The batch size of 5 was chosen without too many significant performance measurements because most projects
// currently indexed by SourceKit-LSP are limited by preparation time instead of indexing time and it's thus
// hard to quanify the performance characteristics of different batch sizes. 5 seems like a good trade-off to
// share work between files within the same target without overloading a single job with too many files and
// thus losing parallelism.
files.partition(intoBatchesOfSize: 5).map { (targetAndLanguage.target, targetAndLanguage.language, $0) }
files.partition(intoBatchesOfSize: partitionSize).map {
(targetAndLanguage.target, targetAndLanguage.language, $0)
}
}
return partitions + batchedPartitions
}