mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
[benchmark] Discard oversampled quantile values
When num_samples is less than quantile + 1, some of the measurements are repeated in the report summary. Parsed samples should strive to be a true reflection of the measured distribution, so we’ll correct this by discarding the repetated artifacts from quantile estimation. This avoids introducting a bias from this oversampling into the empirical distribution obtained from merging independent samples. See also: https://en.wikipedia.org/wiki/Oversampling_and_undersampling_in_data_analysis
This commit is contained in:
@@ -241,9 +241,18 @@ class PerformanceTestResult(object):
|
||||
if quantiles: # Variable number of columns representing quantiles
|
||||
runtimes = csv_row[3:-1] if memory else csv_row[3:]
|
||||
if delta:
|
||||
runtimes = map(lambda x: int(x) if x else 0, runtimes)
|
||||
runtimes = reduce(lambda l, x: l.append(l[-1] + x) or
|
||||
l if l else [x], runtimes, None)
|
||||
runtimes = [int(x) if x else 0 for x in runtimes]
|
||||
runtimes = reduce(lambda l, x: l.append(l[-1] + x) or # runnin
|
||||
l if l else [x], runtimes, None) # total
|
||||
num_values = len(runtimes)
|
||||
if self.num_samples < num_values: # remove repeated samples
|
||||
quantile = num_values - 1
|
||||
qs = [float(i) / float(quantile) for i in range(0, num_values)]
|
||||
indices = [max(0, int(ceil(self.num_samples * float(q))) - 1)
|
||||
for q in qs]
|
||||
runtimes = [runtimes[indices.index(i)]
|
||||
for i in range(0, self.num_samples)]
|
||||
|
||||
self.samples = PerformanceTestSamples(
|
||||
self.name,
|
||||
[Sample(None, None, int(runtime)) for runtime in runtimes])
|
||||
|
||||
Reference in New Issue
Block a user