mtreinish · mtreinish · Jul 31, 2017 · Sep 9, 2019 · Oct 6, 2019 · Oct 6, 2019
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -38,6 +38,40 @@ jobs:
           .tox/py/bin/pip install gnureadline subunit2sql
           tox -epy
         if: runner.os == 'macOS'
+  test_dynamic:
+    name: tests-dynamic-scheduler-python${{ matrix.python-version }}-${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.8, "3.14"]
+        os: ["macOS-latest", "ubuntu-latest"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Pip cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-${{ matrix.python-version }}-pip-tests-${{ hashFiles('pyproject.toml','requirements-dev.txt','constraints.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.python-version }}-pip-tests-
+            ${{ runner.os }}-${{ matrix.python-version }}-pip-
+            ${{ runner.os }}-${{ matrix.python-version }}
+      - name: Install Deps
+        run: python -m pip install -U tox setuptools virtualenv wheel
+      - name: Install and Run Tests
+        run: tox -e py -- --dynamic
+        if: runner.os != 'macOS'
+      - name: Install and run tests macOS
+        run: |
+          tox -epy --notest
+          .tox/py/bin/pip install gnureadline subunit2sql
+          tox -epy -- --dynamic
+        if: runner.os == 'macOS'
+
   lint:
     name: pep8
     runs-on: ubuntu-latest

diff --git a/doc/source/MANUAL.rst b/doc/source/MANUAL.rst
@@ -502,6 +502,31 @@ There is also an option on ``stestr run``, ``--random`` to randomize the
 order of tests as they are passed to the workers. This is useful in certain
 use cases, especially when you want to test isolation between test cases.
 
+Dynamic Scheduler
+-----------------
+
+There is a new under development dynamic scheduler mode that is being designed
+to hopefully replace the static scheduling that stestr currently uses by
+default. This new scheduler will sort all the test ids found during discovery
+based on timing data and alphabetical order just like the static scheduler,
+but this scheduler puts the list in a unified work queue. Then each individual
+test worker will query the queue for the next test to execute. This should
+result in a better worker balance than the static partitioning used by
+default which may translate to a better wall time.
+
+You can enable this new scheduler with the ``--dynamic`` flag to
+``stestr run``. This option should also be compatible with the grouping
+options described in the :ref:`group_regex` section. Using the two in
+combination will result in the scheduler working with the group of tests
+instead of individual tests.
+
+Do note that the dynamic scheduler is still experimental and hasn't been as
+thoroughly tested as the default scheduler. It may not work in every use
+case, if you encounter an issue with the scheduler please file a bug at:
+
+https://github.com/mtreinish/stestr/issues/new?template=bug_report.md
+
+This new scheduler does not currently work on Windows.
 
 User Config Files
 -----------------

diff --git a/stestr/commands/run.py b/stestr/commands/run.py
@@ -19,6 +19,7 @@
 import os.path
 import subprocess
 import sys
+import warnings
 
 from cliff import command
 import subunit
@@ -245,6 +246,19 @@ def get_parser(self, prog_name):
             help="If set, show non-text attachments. This is "
             "generally only useful for debug purposes.",
         )
+        parser.add_argument(
+            "--dynamic",
+            action="store_true",
+            default=False,
+            help="Enable the EXPERIMENTAL dynamic scheduler. This scheduler "
+            "is designed to to improve the worker balance. Each worker will "
+            "dynamically query the next test to run which can potentially "
+            "result in better wall time. This option is still experimental "
+            "and is not guaranteed to work in all cases. This option is also not "
+            "currently supported on Windows. If you encounter issues with this "
+            "option please file a bug at: "
+            "https://github.com/mtreinish/stestr/issues/new?template=bug_report.md",
+        )
         return parser
 
     def take_action(self, parsed_args):
@@ -336,6 +350,7 @@ def take_action(self, parsed_args):
             all_attachments=all_attachments,
             show_binary_attachments=args.show_binary_attachments,
             pdb=args.pdb,
+            dynamic=args.dynamic,
         )
 
         # Always output slowest test info if requested, regardless of other
@@ -397,6 +412,7 @@ def run_command(
     all_attachments=False,
     show_binary_attachments=True,
     pdb=False,
+    dynamic=False,
 ):
     """Function to execute the run command
 
@@ -461,6 +477,7 @@ def run_command(
     :param str pdb: Takes in a single test_id to bypasses test
         discover and just execute the test specified without launching any
         additional processes. A file name may be used in place of a test name.
+    :param bool dynamic: Enable dynamic scheduling
 
     :return return_code: The exit code for the command. 0 for success and > 0
         for failures.
@@ -502,6 +519,11 @@ def run_command(
         )
         stdout.write(msg)
         return 2
+    if dynamic:
+        warnings.warn(
+            "WARNING: The dynamic scheduler is still experimental. "
+            "You might encounter issues while using it"
+        )
     if combine:
         latest_id = repo.latest_id()
         combine_id = str(latest_id)
@@ -547,7 +569,8 @@ def run_tests():
                 (
                     "subunit",
                     output.ReturnCodeToSubunit(
-                        subprocess.Popen(run_cmd, shell=True, stdout=subprocess.PIPE)
+                        subprocess.Popen(run_cmd, shell=True, stdout=subprocess.PIPE),
+                        dynamic=False,
                     ),
                 )
             ]
@@ -654,6 +677,7 @@ def run_tests():
             top_dir=top_dir,
             test_path=test_path,
             randomize=random,
+            dynamic=dynamic,
         )
         if isolated:
             result = 0
@@ -693,6 +717,7 @@ def run_tests():
                     suppress_attachments=suppress_attachments,
                     all_attachments=all_attachments,
                     show_binary_attachments=show_binary_attachments,
+                    dynamic=dynamic,
                 )
                 if run_result > result:
                     result = run_result
@@ -711,6 +736,7 @@ def run_tests():
                 suppress_attachments=suppress_attachments,
                 all_attachments=all_attachments,
                 show_binary_attachments=show_binary_attachments,
+                dynamic=dynamic,
             )
     else:
         # Where do we source data about the cause of conflicts.
@@ -780,16 +806,28 @@ def _run_tests(
     suppress_attachments=False,
     all_attachments=False,
     show_binary_attachments=False,
+    dynamic=False,
 ):
     """Run the tests cmd was parameterised with."""
     cmd.setUp()
     try:
 
         def run_tests():
-            run_procs = [
-                ("subunit", output.ReturnCodeToSubunit(proc))
-                for proc in cmd.run_tests()
-            ]
+            if not dynamic or cmd.concurrency == 1:
+                run_procs = [
+                    ("subunit", output.ReturnCodeToSubunit(proc, dynamic=False))
+                    for proc in cmd.run_tests()
+                ]
+            else:
+                run_procs = [
+                    (
+                        "subunit",
+                        output.ReturnCodeToSubunit(
+                            os.fdopen(proc["stream"]), proc["proc"]
+                        ),
+                    )
+                    for proc in cmd.run_tests()
+                ]
             if not run_procs:
                 stdout.write("The specified regex doesn't match with anything")
                 return 1

diff --git a/stestr/config_file.py b/stestr/config_file.py
@@ -113,6 +113,7 @@ def get_run_command(
         exclude_regex=None,
         randomize=False,
         parallel_class=None,
+        dynamic=False,
     ):
         """Get a test_processor.TestProcessorFixture for this config file
 
@@ -158,6 +159,7 @@ def get_run_command(
             stestr scheduler by class. If both this and the corresponding
             config file option which includes `group-regex` are set, this value
             will be used.
+        :param bool dynamic: Enable dynamic scheduling
 
         :returns: a TestProcessorFixture object for the specified config file
             and any arguments passed into this function
@@ -236,4 +238,5 @@ def group_callback(test_id, regex=re.compile(group_regex)):
             exclude_regex=exclude_regex,
             include_list=include_list,
             randomize=randomize,
+            dynamic=dynamic,
         )
diff --git a/stestr/output.py b/stestr/output.py
@@ -164,21 +164,33 @@ class ReturnCodeToSubunit:
         generating subunit.
     """
 
-    def __init__(self, process):
+    def __init__(self, process, thread=None, dynamic=True):
         """Adapt a process to a readable stream."""
         self.proc = process
         self.done = False
-        self.source = self.proc.stdout
+        if dynamic:
+            self.source = process
+            self.proc = thread
+        else:
+            self.source = self.proc.stdout
+        self.dynamic = dynamic
         self.lastoutput = bytes((b"\n")[0])
 
     def __del__(self):
-        self.proc.wait()
+        if hasattr(self.proc, "wait"):
+            self.proc.wait()
+        else:
+            self.proc.join()
 
     def _append_return_code_as_test(self):
         if self.done is True:
             return
         self.source = io.BytesIO()
-        returncode = self.proc.wait()
+        if not self.dynamic:
+            returncode = self.proc.wait()
+        else:
+            self.proc.join()
+            returncode = self.proc.exitcode
         if returncode != 0:
             if self.lastoutput != bytes((b"\n")[0]):
                 # Subunit V1 is line orientated, it has to start on a fresh

diff --git a/stestr/scheduler.py b/stestr/scheduler.py
@@ -21,6 +21,74 @@
 from stestr import selection
 
 
+def get_dynamic_test_list(
+    test_ids, repository=None, group_callback=None, randomize=False
+):
+    dynamic_test_list = []
+    _group_callback = group_callback
+    time_data = {}
+    if randomize:
+        return random.shuffle(test_ids)
+    if repository:
+        time_data = repository.get_test_times(test_ids)
+        timed_tests = time_data["known"]
+        unknown_tests = time_data["unknown"]
+    else:
+        timed_tests = {}
+        unknown_tests = set(test_ids)
+    # Group tests: generate group_id -> test_ids.
+    group_ids = collections.defaultdict(list)
+    if _group_callback is None:
+
+        def group_callback(_):
+            return None
+
+    else:
+        group_callback = _group_callback
+    for test_id in test_ids:
+        group_id = group_callback(test_id) or test_id
+        group_ids[group_id].append(test_id)
+    # Time groups: generate three sets of groups:
+    # - fully timed dict(group_id -> time),
+    # - partially timed dict(group_id -> time) and
+    # - unknown (set of group_id)
+    # We may in future treat partially timed different for scheduling, but
+    # at least today we just schedule them after the fully timed groups.
+    timed = {}
+    partial = {}
+    unknown = []
+    for group_id, group_tests in group_ids.items():
+        untimed_ids = unknown_tests.intersection(group_tests)
+        group_time = sum(
+            [
+                timed_tests[test_id]
+                for test_id in untimed_ids.symmetric_difference(group_tests)
+            ]
+        )
+        if not untimed_ids:
+            timed[group_id] = group_time
+        elif group_time:
+            partial[group_id] = group_time
+        else:
+            unknown.append(group_id)
+
+    # Scheduling is NP complete in general, so we avoid aiming for
+    # perfection. A quick approximation that is sufficient for our general
+    # needs:
+    # sort the groups by time
+    # allocate to partitions by putting each group in to the partition with
+    # the current (lowest time, shortest length[in tests])
+    def consume_queue(groups):
+        queue = sorted(groups.items(), key=operator.itemgetter(1), reverse=True)
+        dynamic_test_list.extend([group[0] for group in queue])
+
+    consume_queue(timed)
+    consume_queue(partial)
+    dynamic_test_list.extend(unknown)
+
+    return dynamic_test_list
+
+
 def partition_tests(test_ids, concurrency, repository, group_callback, randomize=False):
     """Partition test_ids by concurrency.