diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
index bb9d47968dc..57592a7a39e 100755
--- a/benchmark/scripts/Benchmark_Driver
+++ b/benchmark/scripts/Benchmark_Driver
@@ -21,14 +21,25 @@ import subprocess
 import sys
 import time
 
+from compare_perf_tests import LogParser
+
 DRIVER_DIR = os.path.dirname(os.path.realpath(__file__))
 
 
 class BenchmarkDriver(object):
-    def __init__(self, args, tests=None, _subprocess=None):
+    """Executes tests from Swift Benchmark Suite."""
+
+    def __init__(self, args, tests=None, _subprocess=None, parser=None):
+        """Initialized with command line arguments.
+
+        Optional parameters for injecting dependencies; used for testing.
+        """
         self.args = args
         self._subprocess = _subprocess or subprocess
+        self.all_tests = []
         self.tests = tests or self._get_tests()
+        self.parser = parser or LogParser()
+        self.results = {}
 
     def _invoke(self, cmd):
         return self._subprocess.check_output(
@@ -36,8 +47,10 @@ class BenchmarkDriver(object):
 
     @property
     def test_harness(self):
-        return os.path.join(self.args.tests,
-                            "Benchmark_" + self.args.optimization)
+        """Full path to test harness binary."""
+        suffix = (self.args.optimization if hasattr(self.args, 'optimization')
+                  else 'O')
+        return os.path.join(self.args.tests, "Benchmark_" + suffix)
 
     @property
     def _cmd_list_benchmarks(self):
@@ -76,6 +89,30 @@ class BenchmarkDriver(object):
         return sorted(list(
             benchmarks.intersection(set(self.all_tests)).union(indexed_names)))
 
+    def run(self, test, num_samples=None, num_iters=None,
+            verbose=None, measure_memory=False):
+        """Execute benchmark and gather results."""
+        num_samples = num_samples or 1
+        num_iters = num_iters or 0  # automatically determine N to run for 1s
+
+        cmd = self._cmd_run(
+            test, num_samples, num_iters, verbose, measure_memory)
+        output = self._invoke(cmd)
+        result = self.parser.results_from_string(output).items()[0][1]
+        return result
+
+    def _cmd_run(self, test, num_samples, num_iters, verbose, measure_memory):
+        cmd = [self.test_harness, test]
+        if num_samples > 1:
+            cmd.append('--num-samples={0}'.format(num_samples))
+        if num_iters > 0:
+            cmd.append('--num-iters={0}'.format(num_iters))
+        if verbose:
+            cmd.append('--verbose')
+        if measure_memory:
+            cmd.append('--memory')
+        return cmd
+
 
 def instrument_test(driver_path, test, num_samples):
     """Run a test and instrument its peak memory use"""
diff --git a/benchmark/scripts/CMakeLists.txt b/benchmark/scripts/CMakeLists.txt
index 0fa44ab38d7..4922f6dd840 100644
--- a/benchmark/scripts/CMakeLists.txt
+++ b/benchmark/scripts/CMakeLists.txt
@@ -42,3 +42,8 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/Benchmark_Driver
      DESTINATION "${swift-bin-dir}"
      FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
      GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
+
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/compare_perf_tests.py
+     DESTINATION "${swift-bin-dir}"
+     FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
+     GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py
index a8af2b5bf23..3755da95d05 100644
--- a/benchmark/scripts/test_Benchmark_Driver.py
+++ b/benchmark/scripts/test_Benchmark_Driver.py
@@ -19,6 +19,7 @@ import unittest
 from imp import load_source
 
 from test_utils import Mock, captured_output
+from compare_perf_tests import PerformanceTestResult
 
 # import Benchmark_Driver  # doesn't work because it misses '.py' extension
 Benchmark_Driver = load_source(
@@ -162,6 +163,8 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
             self.args, _subprocess=self.subprocess_mock)
         self.subprocess_mock.assert_called_all_expected()
         self.assertEquals(driver.tests, ['Benchmark1', 'Benchmark3'])
+        self.assertEquals(driver.all_tests,
+                          ['Benchmark1', 'Benchmark2', 'Benchmark3'])
 
     def test_filters_benchmarks_by_pattern(self):
         self.args.filters = '-f .+3'.split()
@@ -170,6 +173,58 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
             self.args, _subprocess=self.subprocess_mock)
         self.subprocess_mock.assert_called_all_expected()
         self.assertEquals(driver.tests, ['Benchmark3'])
+        self.assertEquals(driver.all_tests,
+                          ['Benchmark1', 'Benchmark2', 'Benchmark3'])
+
+
+class LogParserStub(object):
+    results_from_string_called = False
+
+    @staticmethod
+    def results_from_string(log_contents):
+        LogParserStub.results_from_string_called = True
+        r = PerformanceTestResult('3,b1,1,123,123,123,0,123'.split(','))
+        return {'b1': r}
+
+
+class TestBenchmarkDriverRunningTests(unittest.TestCase):
+    def setUp(self):
+        self.args = ArgsStub()
+        self.parser_stub = LogParserStub()
+        self.subprocess_mock = SubprocessMock()
+        self.subprocess_mock.expect(
+            '/benchmarks/Benchmark_O --list --delim=\t'.split(' '),
+            '#\tTest\t[Tags]\n1\tb1\t[tag]\n')
+        self.driver = BenchmarkDriver(
+            self.args, _subprocess=self.subprocess_mock,
+            parser=self.parser_stub)
+
+    def test_run_benchmark_with_multiple_samples(self):
+        self.driver.run('b1')
+        self.subprocess_mock.assert_called_with(
+            ('/benchmarks/Benchmark_O', 'b1'))
+        self.driver.run('b2', num_samples=5)
+        self.subprocess_mock.assert_called_with(
+            ('/benchmarks/Benchmark_O', 'b2', '--num-samples=5'))
+
+    def test_run_benchmark_with_specified_number_of_iterations(self):
+        self.driver.run('b', num_iters=1)
+        self.subprocess_mock.assert_called_with(
+            ('/benchmarks/Benchmark_O', 'b', '--num-iters=1'))
+
+    def test_run_benchmark_in_verbose_mode(self):
+        self.driver.run('b', verbose=True)
+        self.subprocess_mock.assert_called_with(
+            ('/benchmarks/Benchmark_O', 'b', '--verbose'))
+
+    def test_parse_results_from_running_benchmarks(self):
+        self.driver.run('b')
+        self.assertTrue(self.parser_stub.results_from_string_called)
+
+    def test_measure_memory(self):
+        self.driver.run('b', measure_memory=True)
+        self.subprocess_mock.assert_called_with(
+            ('/benchmarks/Benchmark_O', 'b', '--memory'))
 
 
 if __name__ == '__main__':