Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
echo "Running cram tests without coverage"
export AUGUR="${{ github.workspace }}/bin/augur"
fi
cram tests/
scripts/run-cram-parallel.py
- name: Upload coverage
if: env.COVERAGE_FILE
uses: actions/upload-artifact@v7
Expand Down
6 changes: 3 additions & 3 deletions augur/refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
from Bio import Phylo
from textwrap import dedent
from .argparse_ import ExtendOverwriteDefault
from .argparse_ import ExtendOverwriteDefault, SKIP_AUTO_DEFAULT_IN_HELP
from .dates import get_numerical_dates
from .dates.errors import InvalidYearBounds
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN, InvalidDelimiter, Metadata, read_metadata
Expand Down Expand Up @@ -174,8 +174,8 @@ def register_parser(parent_subparsers):
help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")
parser.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+", action=ExtendOverwriteDefault,
help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
parser.add_argument('--output-tree', type=str, help='file name to write tree to')
parser.add_argument('--output-node-data', type=str, help='file name to write branch lengths as node data')
parser.add_argument('--output-tree', type=str, help='file name to write tree to. If not provided a file will be created using the alignment or tree input path with a "_tt.nwk" suffix.'+SKIP_AUTO_DEFAULT_IN_HELP)
parser.add_argument('--output-node-data', type=str, help='file name to write branch lengths as node data. If not provided a file will be created using the alignment or tree input path with a ".node_data.json" suffix.'+SKIP_AUTO_DEFAULT_IN_HELP)
parser.add_argument('--use-fft', action="store_true", help="produce timetree using FFT for convolutions")
parser.add_argument('--max-iter', default=2, type=int, help="maximal number of iterations TreeTime uses for timetree inference")
parser.add_argument('--timetree', action="store_true", help="produce timetree using treetime, requires tree where branch length is in units of average number of nucleotide or protein substitutions per site (and branch lengths do not exceed 4)")
Expand Down
10 changes: 9 additions & 1 deletion docs/contribute/DEV_DOCS.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,18 @@ For example, the following command only runs unit tests related to augur mask.
./run_tests.sh -k test_mask
```

To run a specific integration test with cram, you can use the following command:
You can run specific integration test(s) with `cram` directly or via our parallel-wrapper which will use all
available CPUs by default. For instance to run `tests/functional/clades.t` these will both work:

```bash
cram tests/functional/clades.t
./scripts/run-cram-parallel.py tests/functional/clades.t
```

To run all tests in parallel simply run

```bash
./scripts/run-cram-parallel.py
```

To run cram tests locally and capture test coverage data, you can use this invocation:
Expand Down
4 changes: 2 additions & 2 deletions run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ python3 -m pytest $coverage_arg $filtered_args

# Only run functional tests if we are not running a subset of tests for pytest.
if [ "$partial_test" = 0 ]; then
echo "Running functional tests with cram"
cram tests/
echo "Running functional tests with cram in parallel via our run-cram-parallel.py runner"
./scripts/run-cram-parallel.py
else
echo "Skipping functional tests when running a subset of unit tests"
fi
Expand Down
133 changes: 133 additions & 0 deletions scripts/run-cram-parallel.py
Comment thread
jameshadfield marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python3
"""Run cram tests in parallel using a worker pool."""
import argparse
import os
import subprocess
import sys
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path

# These tests were identified as being particularly slow <https://github.com/nextstrain/augur/issues/1994>
# We can re-check these over time as we speed up individual tests.
# We run these tests first to improve parallel efficiency.
SLOW_TESTS = [
"tests/functional/merge/cram/merge-metadata.t",
"tests/functional/tree/cram/iqtree-more-threads.t",
"tests/functional/subsample/cram/proximal-subsampling.t",
"tests/functional/measurements_export.t",
"tests/functional/curate/cram/metadata-input.t",
"tests/functional/export_v2/cram/metadata-columns.t",
"tests/functional/curate/cram/titlecase.t",
"tests/functional/tree/cram/iqtree-override-args.t",
"tests/functional/subsample/cram/proximal-subsampling-errors.t",
"tests/functional/merge/cram/merge-metadata-and-sequences.t",
]


def run_test(test_file, cram_args):
start = time.monotonic()
result = subprocess.run(
["cram", *cram_args, str(test_file)],
capture_output=True,
)
elapsed = time.monotonic() - start
return test_file, result.returncode, elapsed, result.stdout, result.stderr


def main():
parser = argparse.ArgumentParser(
description=__doc__,
usage="%(prog)s [OPTIONS] [TESTS] [-- CRAM_ARGS...]",
)
parser.add_argument(
"-j", "--jobs", type=int, default=os.cpu_count(),
help="number of parallel workers (default: all available (%(default)s))",
)
parser.add_argument(
"tests", nargs="*", default=["tests/"],
help="files or directories to find .t files in (default: tests/)",
)

argv = sys.argv[1:]
if "--" in argv:
split = argv.index("--")
args = parser.parse_args(argv[:split])
cram_args = argv[split + 1:]
else:
args = parser.parse_args(argv)
cram_args = []

all_tests = []
for path in map(Path, args.tests):
if path.is_file():
all_tests.append(path)
elif path.is_dir():
all_tests.extend(path.rglob("*.t"))
else:
parser.error(f"not a file or directory: {path}")
all_tests = sorted(set(all_tests))
if not all_tests:
parser.error(f"no .t files found in {' '.join(args.tests)}")

slow_set = [Path(p) for p in SLOW_TESTS]
slow_tests = [t for t in slow_set if t in all_tests]
rest_tests = [t for t in all_tests if t not in slow_set]
test_files = slow_tests + rest_tests

cram_cmd = " ".join(["cram", *cram_args])
print(f"Running {len(test_files)} tests with {args.jobs} workers")
print(f" ({len(slow_tests)} slow tests scheduled first)")
print(f"cram invocation: {cram_cmd} <test>\n")

results = []
passed = failed = 0
wall_start = time.monotonic()

with ProcessPoolExecutor(max_workers=args.jobs) as pool:
futures = {
pool.submit(run_test, t, cram_args): t for t in test_files
}
for future in as_completed(futures):
test_file, rc, elapsed, stdout, stderr = future.result()
results.append((elapsed, rc, test_file))
status = "PASS" if rc == 0 else "FAIL"
if rc == 0:
passed += 1
else:
failed += 1
print(f" {status} {elapsed:6.1f}s {test_file}")
if rc != 0:
if stdout:
print(stdout.decode(errors="replace"))
if stderr:
print(stderr.decode(errors="replace"))

wall_elapsed = time.monotonic() - wall_start
total_cpu = sum(e for e, _, _ in results)

print(f"\n{'='*60}")
print(f"Passed: {passed} Failed: {failed} Total: {len(results)}")
print(f"Wall time: {wall_elapsed:.1f}s")
print(f"Total CPU: {total_cpu:.1f}s")
print(f"Speedup: {total_cpu / wall_elapsed:.1f}x")

failures = [(e, rc, t) for e, rc, t in results if rc != 0]
if failures:
print(f"\n{'='*60}")
print(f"Failing tests ({len(failures)}):\n")
for elapsed, rc, test_file in sorted(failures, key=lambda x: x[2]):
print(f" {elapsed:6.1f}s exit={rc} {test_file}")

print(f"\n{'='*60}")
print("Slowest tests:\n")
results.sort(reverse=True)
for elapsed, rc, test_file in results[:20]:
status = "PASS" if rc == 0 else "FAIL"
print(f" {elapsed:6.1f}s {status} {test_file}")

sys.exit(1 if failures else 0)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions tests/functional/refine/cram/keep-ids.t
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Setup
> --alignment "$TESTDIR/../data/aligned.fasta" \
> --metadata "$TESTDIR/../data/metadata.tsv" \
> --output-tree tree.nwk \
> --output-node-data branch_lengths.json \
> --timetree \
> --clock-filter-iqd 2 \
> --seed 314159 2>&1 | grep "pruning leaf" || echo "Nothing pruned"
Expand All @@ -28,6 +29,7 @@ Use --keep-ids to force-include it.
> --alignment "$TESTDIR/../data/aligned.fasta" \
> --metadata "$TESTDIR/../data/metadata.tsv" \
> --output-tree tree.nwk \
> --output-node-data branch_lengths.json \
> --timetree \
> --clock-filter-iqd 2 \
> --keep-ids include.txt \
Expand Down
3 changes: 1 addition & 2 deletions tests/functional/titers/cram/_setup.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
pushd "$TESTDIR" > /dev/null
export AUGUR="${AUGUR:-../../../../bin/augur}"
export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
set -o pipefail
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ Setup
Test titer substitution model with alignment and tree inputs and a custom prefix for the node data attributes in the output.

$ ${AUGUR} titers sub \
> --tree ../data/tree.nwk \
> --titers ../data/titers.tsv \
> --alignment ../data/aa_seq_HA1.fasta \
> --tree $TESTDIR/../data/tree.nwk \
> --titers $TESTDIR/../data/titers.tsv \
> --alignment $TESTDIR/../data/aa_seq_HA1.fasta \
> --gene-names HA1 \
> --attribute-prefix custom_prefix_ \
> --output $TMP/titers-sub.json > /dev/null
Read titers from ../data/titers.tsv, found:
Read titers from */data/titers.tsv, found: (glob)
--- 62 strains
--- 15 data sources
--- 272 total measurements
Expand Down
8 changes: 4 additions & 4 deletions tests/functional/titers/cram/titers-sub-with-tree.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ Setup
Test titer substitution model with alignment and tree inputs.

$ ${AUGUR} titers sub \
> --tree ../data/tree.nwk \
> --titers ../data/titers.tsv \
> --alignment ../data/aa_seq_HA1.fasta \
> --tree $TESTDIR/../data/tree.nwk \
> --titers $TESTDIR/../data/titers.tsv \
> --alignment $TESTDIR/../data/aa_seq_HA1.fasta \
> --gene-names HA1 \
> --output $TMP/titers-sub.json > /dev/null
Read titers from ../data/titers.tsv, found:
Read titers from */data/titers.tsv, found: (glob)
--- 62 strains
--- 15 data sources
--- 272 total measurements
Expand Down
6 changes: 3 additions & 3 deletions tests/functional/titers/cram/titers-tree-with-custom-prefix.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ Setup
Test titer tree model with a custom prefix for the node data attributes in the output.

$ ${AUGUR} titers tree \
> --tree ../data/tree.nwk \
> --titers ../data/titers.tsv \
> --tree $TESTDIR/../data/tree.nwk \
> --titers $TESTDIR/../data/titers.tsv \
> --attribute-prefix custom_prefix_ \
> --output $TMP/titers-tree.json > /dev/null
Read titers from ../data/titers.tsv, found:
Read titers from */data/titers.tsv, found: (glob)
--- 62 strains
--- 15 data sources
--- 272 total measurements
Expand Down
6 changes: 3 additions & 3 deletions tests/functional/titers/cram/titers-tree.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ Setup
Test titer tree model.

$ ${AUGUR} titers tree \
> --tree ../data/tree.nwk \
> --titers ../data/titers.tsv \
> --tree $TESTDIR/../data/tree.nwk \
> --titers $TESTDIR/../data/titers.tsv \
Comment thread
victorlin marked this conversation as resolved.
> --output $TMP/titers-tree.json > /dev/null
Read titers from ../data/titers.tsv, found:
Read titers from */data/titers.tsv, found: (glob)
--- 62 strains
--- 15 data sources
--- 272 total measurements
Expand Down
6 changes: 4 additions & 2 deletions tests/functional/tree/cram/iqtree-compressed-input.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ Setup

Build a tree with excluded sites using a compressed input file.

$ cp "$TESTDIR/../data/aligned.fasta.xz" .
$ cp "$TESTDIR/../data/excluded_sites.txt" .
$ ${AUGUR} tree \
> --alignment "$TESTDIR/../data/aligned.fasta.xz" \
> --exclude-sites "$TESTDIR/../data/excluded_sites.txt" \
> --alignment aligned.fasta.xz \
> --exclude-sites excluded_sites.txt \
> --output tree_raw.nwk &> /dev/null
5 changes: 3 additions & 2 deletions tests/functional/tree/cram/iqtree-conflicting-default-args.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ Setup
Build a tree with conflicting default arguments.
Expect error message.

$ cp "$TESTDIR/../data/aligned.fasta" .
$ ${AUGUR} tree \
> --method iqtree \
> --alignment "$TESTDIR/../data/aligned.fasta" \
> --tree-builder-args="--threads-max 1 --msa $TESTDIR/../data/aligned.fasta" \
> --alignment aligned.fasta \
> --tree-builder-args="--threads-max 1 --msa aligned.fasta" \
> --output "tree_raw.nwk"
ERROR: The following tree builder arguments conflict with hardcoded defaults. Remove these arguments and try again: --threads-max, --msa
[1]
3 changes: 2 additions & 1 deletion tests/functional/tree/cram/iqtree-extend-args.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ Setup

Build a tree, augmenting existing default arguments with custom arguments.

$ cp "$TESTDIR/../data/aligned.fasta" .
$ ${AUGUR} tree \
> --method iqtree \
> --alignment "$TESTDIR/../data/aligned.fasta" \
> --alignment aligned.fasta \
> --tree-builder-args="--polytomy" \
> --output tree_raw.nwk > /dev/null
3 changes: 2 additions & 1 deletion tests/functional/tree/cram/iqtree-model-auto.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ Setup

Try building a tree with IQ-TREE using its ModelTest functionality, by supplying a substitution model of "auto".

$ cp "$TESTDIR/../data/aligned.fasta" .
$ ${AUGUR} tree \
> --alignment "$TESTDIR/../data/aligned.fasta" \
> --alignment aligned.fasta \
> --method iqtree \
> --substitution-model auto \
> --output tree_raw.nwk \
Expand Down
3 changes: 2 additions & 1 deletion tests/functional/tree/cram/iqtree-more-threads.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ Setup

Try building a tree with IQ-TREE with more threads (4) than there are input sequences (3).

$ cp "$TESTDIR/../data/aligned.fasta" .
$ ${AUGUR} tree \
> --alignment "$TESTDIR/../data/aligned.fasta" \
> --alignment aligned.fasta \
> --method iqtree \
> --output tree_raw.nwk \
> --nthreads 4 > /dev/null
3 changes: 2 additions & 1 deletion tests/functional/tree/cram/iqtree-override-args.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ Setup
Build a tree, replacing existing default arguments with custom arguments.
Since the following custom arguments are incompatible with the default IQ-TREE arguments, this command will only work with the `--override-default-args` flag.

$ cp "$TESTDIR/../data/full_aligned.fasta" .
$ ${AUGUR} tree \
> --method iqtree \
> --alignment "$TESTDIR/../data/full_aligned.fasta" \
> --alignment full_aligned.fasta \
> --tree-builder-args="--polytomy -bb 1000 -bnni" \
> --override-default-args \
> --output tree_raw.nwk > /dev/null
5 changes: 3 additions & 2 deletions tests/functional/tree/cram/iqtree-preserve-fa.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ Setup

Build a tree with an input file that doesn't end in .fasta, and ensure it's not overwritten.

$ cp "$TESTDIR/../data/aligned.fa" .
$ ${AUGUR} tree \
> --alignment "$TESTDIR/../data/aligned.fa" \
> --alignment aligned.fa \
> --method iqtree \
> --output tree_raw.nwk \
> --nthreads 1 > /dev/null

$ sha256sum "$TESTDIR/../data/aligned.fa" | awk '{print $1}'
$ sha256sum aligned.fa | awk '{print $1}'
169a9f5f70b94e26a2c4ab2b3180d4b463112581438515557a9797adc834863d
3 changes: 2 additions & 1 deletion tests/functional/tree/cram/iqtree.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ Setup

Try building a tree with IQ-TREE.

$ cp "$TESTDIR/../data/aligned.fasta" .
$ ${AUGUR} tree \
> --alignment "$TESTDIR/../data/aligned.fasta" \
> --alignment aligned.fasta \
> --method iqtree \
> --output tree_raw.nwk \
> --nthreads 1 > /dev/null
Loading