Fix compare_perf_tests.py for running locally.

The script defaulted to a mode that no one uses without checking
whether the input was compatible with that mode.

This is the script used for run-to-run comparison of benchmark
results. The in-tree benchmarks happened to work with the script only
because of a fragile string comparison burried deep within the
script. Other out-of-tree benchmark scripts that generate results were
silently broken when using this script for comparison.
This commit is contained in:
Andrew Trick
2022-05-11 10:02:38 -07:00
parent e8194155f0
commit f09cc8cc8b
3 changed files with 35 additions and 24 deletions

View File

@@ -229,8 +229,8 @@ class PerformanceTestResult(object):
statistics for normal distribution (MEAN, SD): statistics for normal distribution (MEAN, SD):
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B) #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
And new quantiles format with variable number of columns: And new quantiles format with variable number of columns:
#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs) #,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
#,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B) #,TEST,SAMPLES,QMIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
The number of columns between MIN and MAX depends on the test driver's The number of columns between MIN and MAX depends on the test driver's
`--quantile`parameter. In both cases, the last column, MAX_RSS is optional. `--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
""" """
@@ -244,9 +244,10 @@ class PerformanceTestResult(object):
self.name = csv_row[1] # Name of the performance test self.name = csv_row[1] # Name of the performance test
self.num_samples = int(csv_row[2]) # Number of measurements taken self.num_samples = int(csv_row[2]) # Number of measurements taken
mem_index = (-1 if memory else 0) + (-3 if meta else 0)
if quantiles: # Variable number of columns representing quantiles if quantiles: # Variable number of columns representing quantiles
mem_index = (-1 if memory else 0) + (-3 if meta else 0)
runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:] runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:]
last_runtime_index = mem_index - 1
if delta: if delta:
runtimes = [int(x) if x else 0 for x in runtimes] runtimes = [int(x) if x else 0 for x in runtimes]
runtimes = functools.reduce( runtimes = functools.reduce(
@@ -277,20 +278,21 @@ class PerformanceTestResult(object):
sams.mean, sams.mean,
sams.sd, sams.sd,
) )
self.max_rss = ( # Maximum Resident Set Size (B)
int(csv_row[mem_index]) if memory else None
)
else: # Legacy format with statistics for normal distribution. else: # Legacy format with statistics for normal distribution.
self.min = int(csv_row[3]) # Minimum runtime (μs) self.min = int(csv_row[3]) # Minimum runtime (μs)
self.max = int(csv_row[4]) # Maximum runtime (μs) self.max = int(csv_row[4]) # Maximum runtime (μs)
self.mean = float(csv_row[5]) # Mean (average) runtime (μs) self.mean = float(csv_row[5]) # Mean (average) runtime (μs)
self.sd = float(csv_row[6]) # Standard Deviation (μs) self.sd = float(csv_row[6]) # Standard Deviation (μs)
self.median = int(csv_row[7]) # Median runtime (μs) self.median = int(csv_row[7]) # Median runtime (μs)
self.max_rss = ( # Maximum Resident Set Size (B) last_runtime_index = 7
int(csv_row[8]) if len(csv_row) > 8 else None
)
self.samples = None self.samples = None
self.max_rss = ( # Maximum Resident Set Size (B)
int(csv_row[mem_index]) if (
memory and len(csv_row) > (last_runtime_index + 1)
) else None
)
# Optional measurement metadata. The number of: # Optional measurement metadata. The number of:
# memory pages used, involuntary context switches and voluntary yields # memory pages used, involuntary context switches and voluntary yields
self.mem_pages, self.involuntary_cs, self.yield_count = ( self.mem_pages, self.involuntary_cs, self.yield_count = (
@@ -427,7 +429,7 @@ class LogParser(object):
self.mem_pages = int(mem_pages) self.mem_pages = int(mem_pages)
def _configure_format(self, header): def _configure_format(self, header):
self.quantiles = "MEAN" not in header self.quantiles = "QMIN" in header
self.memory = "MAX_RSS" in header self.memory = "MAX_RSS" in header
self.meta = "PAGES" in header self.meta = "PAGES" in header
self.delta = "𝚫" in header self.delta = "𝚫" in header
@@ -453,7 +455,7 @@ class LogParser(object):
Yield(len(self.samples), int(since_last_yield)) Yield(len(self.samples), int(since_last_yield))
) )
), ),
re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)"): _configure_format, re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t].*)"): _configure_format,
# Environmental statistics: memory usage and context switches # Environmental statistics: memory usage and context switches
re.compile( re.compile(
r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)" r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)"

View File

@@ -205,7 +205,7 @@ class TestPerformanceTestResult(unittest.TestCase):
self.assertEqual(r.samples, None) self.assertEqual(r.samples, None)
log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336" log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336"
r = PerformanceTestResult(log_line.split(",")) r = PerformanceTestResult(log_line.split(","), memory=True)
self.assertEqual(r.max_rss, 10510336) self.assertEqual(r.max_rss, 10510336)
def test_init_quantiles(self): def test_init_quantiles(self):
@@ -379,7 +379,11 @@ class TestPerformanceTestResult(unittest.TestCase):
)[ )[
1: 1:
] ]
results = list(map(PerformanceTestResult, [line.split(",") for line in tests]))
def makeResult(csv_row):
return PerformanceTestResult(csv_row, memory=True)
results = list(map(makeResult, [line.split(",") for line in tests]))
results[2].setup = 9 results[2].setup = 9
results[3].setup = 7 results[3].setup = 7
@@ -489,11 +493,14 @@ class OldAndNewLog(unittest.TestCase):
3,Array2D,20,335831,400221,346622,0,346622 3,Array2D,20,335831,400221,346622,0,346622
1,AngryPhonebook,20,10458,12714,11000,0,11000""" 1,AngryPhonebook,20,10458,12714,11000,0,11000"""
def makeResult(csv_row):
return PerformanceTestResult(csv_row, memory=True)
old_results = dict( old_results = dict(
[ [
(r.name, r) (r.name, r)
for r in map( for r in map(
PerformanceTestResult, makeResult,
[line.split(",") for line in old_log_content.splitlines()], [line.split(",") for line in old_log_content.splitlines()],
) )
] ]
@@ -503,7 +510,7 @@ class OldAndNewLog(unittest.TestCase):
[ [
(r.name, r) (r.name, r)
for r in map( for r in map(
PerformanceTestResult, makeResult,
[line.split(",") for line in new_log_content.splitlines()], [line.split(",") for line in new_log_content.splitlines()],
) )
] ]
@@ -557,14 +564,14 @@ Total performance tests executed: 1
def test_parse_quantiles(self): def test_parse_quantiles(self):
"""Gathers samples from reported quantiles. Handles optional memory.""" """Gathers samples from reported quantiles. Handles optional memory."""
r = LogParser.results_from_string( r = LogParser.results_from_string(
"""#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs) """#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
1,Ackermann,3,54383,54512,54601""" 1,Ackermann,3,54383,54512,54601"""
)["Ackermann"] )["Ackermann"]
self.assertEqual( self.assertEqual(
[s.runtime for s in r.samples.all_samples], [54383, 54512, 54601] [s.runtime for s in r.samples.all_samples], [54383, 54512, 54601]
) )
r = LogParser.results_from_string( r = LogParser.results_from_string(
"""#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B) """#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
1,Ackermann,3,54529,54760,55807,266240""" 1,Ackermann,3,54529,54760,55807,266240"""
)["Ackermann"] )["Ackermann"]
self.assertEqual( self.assertEqual(
@@ -574,21 +581,21 @@ Total performance tests executed: 1
def test_parse_delta_quantiles(self): def test_parse_delta_quantiles(self):
r = LogParser.results_from_string( # 2-quantile aka. median r = LogParser.results_from_string( # 2-quantile aka. median
"#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,," "#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
)["B"] )["B"]
self.assertEqual( self.assertEqual(
(r.num_samples, r.min, r.median, r.max, r.samples.count), (r.num_samples, r.min, r.median, r.max, r.samples.count),
(1, 101, 101, 101, 1), (1, 101, 101, 101, 1),
) )
r = LogParser.results_from_string( r = LogParser.results_from_string(
"#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1" "#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
)["B"] )["B"]
self.assertEqual( self.assertEqual(
(r.num_samples, r.min, r.median, r.max, r.samples.count), (r.num_samples, r.min, r.median, r.max, r.samples.count),
(2, 101, 101, 102, 2), (2, 101, 101, 102, 2),
) )
r = LogParser.results_from_string( # 20-quantiles aka. ventiles r = LogParser.results_from_string( # 20-quantiles aka. ventiles
"#,TEST,SAMPLES,MIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8," "#,TEST,SAMPLES,QMIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
+ "𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n" + "𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n"
+ "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464" + "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464"
)["DropWhileArray"] )["DropWhileArray"]
@@ -617,13 +624,13 @@ Total performance tests executed: 1
(3, 9, 50, 15, 36864), (3, 9, 50, 15, 36864),
) )
r = LogParser.results_from_string( r = LogParser.results_from_string(
"#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15" "#,TEST,SAMPLES,QMIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
)["B"] )["B"]
self.assertEqual( self.assertEqual(
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15) (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15)
) )
r = LogParser.results_from_string( r = LogParser.results_from_string(
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n" "#,TEST,SAMPLES,QMIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
+ "0,B,1,5,5,32768,8,28,15" + "0,B,1,5,5,32768,8,28,15"
)["B"] )["B"]
self.assertEqual( self.assertEqual(
@@ -831,7 +838,8 @@ class TestReportFormatter(OldAndNewLog):
self.assertEqual( self.assertEqual(
ReportFormatter.values( ReportFormatter.values(
PerformanceTestResult( PerformanceTestResult(
"1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(",") "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(","),
memory=True
) )
), ),
("AngryPhonebook", "12045", "12045", "12045", "10510336"), ("AngryPhonebook", "12045", "12045", "12045", "10510336"),

View File

@@ -634,7 +634,8 @@ final class TestRunner {
let index: (Int) -> String = let index: (Int) -> String =
{ q == 2 ? "" : q <= 20 ? base20[$0] : String($0) } { q == 2 ? "" : q <= 20 ? base20[$0] : String($0) }
let tail = (1..<q).map { prefix + index($0) } + ["MAX"] let tail = (1..<q).map { prefix + index($0) } + ["MAX"]
return [withUnit("MIN")] + tail.map(c.delta ? withDelta : withUnit) // QMIN identifies the quantile format, distinct from formats using "MIN"
return [withUnit("QMIN")] + tail.map(c.delta ? withDelta : withUnit)
} }
return ( return (
["#", "TEST", "SAMPLES"] + ["#", "TEST", "SAMPLES"] +