Match the batch size for multi-file indexing to the driver's batch size

Until we have better measurements that would motivate a different batching strategy, copying the driver’s batch size seems like the most reasonable thing to do.
2026-03-02 18:23:24 +01:00 · 2025-09-22 09:42:52 +02:00
parent 078a996dac
commit 05c04decf2
2 changed files with 8 additions and 7 deletions
--- a/Sources/BuildServerIntegration/BuildSettingsLogger.swift
+++ b/Sources/BuildServerIntegration/BuildSettingsLogger.swift
@@ -45,7 +45,7 @@ package actor BuildSettingsLogger {
    if let uri = uris.only {
      header = "Build settings for \(uri.forLogging)"
    } else if let firstUri = uris.first {
-      header = "Build settings for \(firstUri.forLogging) and \(firstUri) and \(uris.count - 1) others"
+      header = "Build settings for \(firstUri.forLogging) and \(uris.count - 1) others"
    } else {
      header = "Build settings for empty list"
    }
--- a/Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift
+++ b/Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift
@@ -701,16 +701,17 @@ package struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
      let languageAndTarget = TargetAndLanguage(target: fileIndexInfo.target, language: fileIndexInfo.language)
      fileIndexInfosToBatch[languageAndTarget, default: []].append(fileIndexInfo)
    }
+    // Create one partition per processor core but limit the partition size to 25 primary files. This matches the
+    // driver's behavior in `numberOfBatchPartitions`
+    // https://github.com/swiftlang/swift-driver/blob/df3d0796ed5e533d82accd7baac43d15e97b5671/Sources/SwiftDriver/Jobs/Planning.swift#L917-L1022
+    let partitionSize = max(fileIndexInfosToBatch.count / ProcessInfo.processInfo.activeProcessorCount, 25)
    let batchedPartitions =
      fileIndexInfosToBatch
      .sorted { $0.key < $1.key }  // Ensure we get a deterministic partition order
      .flatMap { targetAndLanguage, files in
-        // The batch size of 5 was chosen without too many significant performance measurements because most projects
-        // currently indexed by SourceKit-LSP are limited by preparation time instead of indexing time and it's thus
-        // hard to quanify the performance characteristics of different batch sizes. 5 seems like a good trade-off to
-        // share work between files within the same target without overloading a single job with too many files and
-        // thus losing parallelism.
-        files.partition(intoBatchesOfSize: 5).map { (targetAndLanguage.target, targetAndLanguage.language, $0) }
+        files.partition(intoBatchesOfSize: partitionSize).map {
+          (targetAndLanguage.target, targetAndLanguage.language, $0)
+        }
      }
    return partitions + batchedPartitions
  }