diff --git a/augur/dates/__init__.py b/augur/dates/__init__.py index 8747d6538..fb6746210 100644 --- a/augur/dates/__init__.py +++ b/augur/dates/__init__.py @@ -7,7 +7,7 @@ from collections.abc import Iterable, Mapping from functools import cache from treetime.utils import numeric_date as tt_numeric_date, datetime_from_numeric -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Literal, Optional, Tuple, Union from augur.errors import AugurError from augur.io.print import indented_list, _n from .errors import InvalidDate @@ -77,17 +77,49 @@ def numeric_date(date): raise InvalidDate(date, f"""Ensure it is in one of the supported formats:\n{SUPPORTED_DATE_HELP_TEXT}""") -def numeric_date_type(date): - """Wraps numeric_date() for argparse usage. +def numeric_date_type_min(date) -> float: + """Wraps numeric_date() for argparse usage, taking the minimum value if resolved to a range. This raises an ArgumentTypeError from InvalidDateFormat exceptions, otherwise the custom exception message won't be shown in console output due to: https://github.com/python/cpython/blob/5c4d1f6e0e192653560ae2941a6677fbf4fbd1f2/Lib/argparse.py#L2503-L2513 + + >>> round(numeric_date_type_min("2018"), 3) + 2018.001 + """ + try: + return get_single_numeric_date(date, fmt="%Y-%m-%d", min_or_max="min") + except InvalidDate as error: + raise argparse.ArgumentTypeError(str(error)) from error + +def numeric_date_type_max(date) -> float: + """Wraps numeric_date() for argparse usage, taking the maximum value if resolved to a range. + + This raises an ArgumentTypeError from InvalidDateFormat exceptions, otherwise the custom exception message won't be shown in console output due to: + https://github.com/python/cpython/blob/5c4d1f6e0e192653560ae2941a6677fbf4fbd1f2/Lib/argparse.py#L2503-L2513 + + >>> round(numeric_date_type_max("2018"), 3) + 2018.999 """ try: - return numeric_date(date) + return get_single_numeric_date(date, fmt="%Y-%m-%d", min_or_max="max") except InvalidDate as error: raise argparse.ArgumentTypeError(str(error)) from error +def get_single_numeric_date(value, fmt, min_or_max: Literal["min", "max"]) -> float: + numeric_date = get_numerical_date_from_value(value, fmt) + + if isinstance(numeric_date, float): + return numeric_date + + if isinstance(numeric_date, tuple): + if min_or_max == "min": + return numeric_date[0] + if min_or_max == "max": + return numeric_date[1] + + raise InvalidDate(value, f"""Ensure it is in one of the supported formats:\n{SUPPORTED_DATE_HELP_TEXT}""") + + def is_date_ambiguous(date, ambiguous_by): """ Returns whether a given date string in the format of YYYY-MM-DD is ambiguous by a given part of the date (e.g., day, month, year, or any parts). @@ -259,6 +291,14 @@ def get_numerical_date_from_value(value, fmt, min_max_year=None) -> Union[float, return (date_to_numeric(start), date_to_numeric(end)) + # Check if value is an ISO 8601 duration treated as a backwards-looking relative date + try: + if not value.startswith('P'): + value = 'P' + value + return date_to_numeric(datetime.date.today() - isodate.parse_duration(value)) + except (ValueError, isodate.ISO8601Error): + pass + # Return none (silent error) if the date does not match any of the checked formats. return None diff --git a/augur/filter/__init__.py b/augur/filter/__init__.py index dfc5cafa8..bc801c34a 100644 --- a/augur/filter/__init__.py +++ b/augur/filter/__init__.py @@ -12,7 +12,7 @@ program vcftools must be available on PATH. """ from augur.argparse_ import ExtendOverwriteDefault, SKIP_AUTO_DEFAULT_IN_HELP -from augur.dates import numeric_date_type +from augur.dates import numeric_date_type_min, numeric_date_type_max from augur.filter.arguments import descriptions from augur.filter.io import column_type_pair from augur.io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS @@ -39,8 +39,8 @@ def register_arguments(parser): metadata_filter_group.add_argument('--query', help=descriptions['query']) metadata_filter_group.add_argument('--query-columns', type=column_type_pair, nargs="+", action=ExtendOverwriteDefault, help=descriptions['query_columns']) - metadata_filter_group.add_argument('--min-date', type=numeric_date_type, help=descriptions['min_date']) - metadata_filter_group.add_argument('--max-date', type=numeric_date_type, help=descriptions['max_date']) + metadata_filter_group.add_argument('--min-date', type=numeric_date_type_min, help=descriptions['min_date']) + metadata_filter_group.add_argument('--max-date', type=numeric_date_type_max, help=descriptions['max_date']) metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'], help=descriptions['exclude_ambiguous_dates_by']) metadata_filter_group.add_argument('--exclude', type=str, nargs="+", action=ExtendOverwriteDefault, help=descriptions['exclude']) metadata_filter_group.add_argument('--exclude-where', nargs='+', action=ExtendOverwriteDefault, help=descriptions['exclude_where']) diff --git a/augur/frequencies.py b/augur/frequencies.py index 2d94289a1..81af55177 100644 --- a/augur/frequencies.py +++ b/augur/frequencies.py @@ -11,7 +11,7 @@ from .errors import AugurError from .frequency_estimators import get_pivots, alignment_frequencies, tree_frequencies from .frequency_estimators import AlignmentKdeFrequencies, TreeKdeFrequencies, TreeKdeFrequenciesError -from .dates import numeric_date_type, SUPPORTED_DATE_HELP_TEXT, get_numerical_dates +from .dates import numeric_date_type_min, numeric_date_type_max, SUPPORTED_DATE_HELP_TEXT, get_numerical_dates from .io.file import open_file from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN, InvalidDelimiter, Metadata, read_metadata from .utils import write_augur_json @@ -38,9 +38,9 @@ def register_parser(parent_subparsers): help="number of units between pivots") parser.add_argument("--pivot-interval-units", type=str, default="months", choices=['months', 'weeks'], help="space pivots by months (default) or by weeks") - parser.add_argument('--min-date', type=numeric_date_type, + parser.add_argument('--min-date', type=numeric_date_type_min, help=f"date to begin frequencies calculations; may be specified as: {SUPPORTED_DATE_HELP_TEXT}") - parser.add_argument('--max-date', type=numeric_date_type, + parser.add_argument('--max-date', type=numeric_date_type_max, help=f"date to end frequencies calculations; may be specified as: {SUPPORTED_DATE_HELP_TEXT}") # Tree-specific arguments diff --git a/tests/dates/test_dates.py b/tests/dates/test_dates.py index 35cc88e5f..4686b01d1 100644 --- a/tests/dates/test_dates.py +++ b/tests/dates/test_dates.py @@ -189,3 +189,19 @@ def test_get_numerical_dates_dict_error(self): } with pytest.raises(AugurError): dates.get_numerical_dates(metadata, "%Y-%m-%d") + + @freeze_time("2000-02-20") + def test_get_numerical_date_from_value_relative_dates(self): + """Test that get_numerical_date_from_value handles ISO duration strings as relative dates.""" + assert dates.get_numerical_date_from_value("1D", "%Y-%m-%d") == pytest.approx(2000.135, abs=1e-3) + assert dates.get_numerical_date_from_value("1W", "%Y-%m-%d") == pytest.approx(2000.119, abs=1e-3) + assert dates.get_numerical_date_from_value("1M", "%Y-%m-%d") == pytest.approx(2000.053, abs=1e-3) + assert dates.get_numerical_date_from_value("1Y", "%Y-%m-%d") == pytest.approx(1999.138, abs=1e-3) + assert dates.get_numerical_date_from_value("1Y1M1W", "%Y-%m-%d") == pytest.approx(1999.034, abs=1e-3) + + @freeze_time("2000-02-20") + def test_get_numerical_date_from_value_relative_dates_with_p_prefix(self): + """Test that get_numerical_date_from_value handles ISO duration strings with P prefix.""" + assert dates.get_numerical_date_from_value("P1D", "%Y-%m-%d") == pytest.approx(2000.135, abs=1e-3) + assert dates.get_numerical_date_from_value("P1W", "%Y-%m-%d") == pytest.approx(2000.119, abs=1e-3) + assert dates.get_numerical_date_from_value("P1M", "%Y-%m-%d") == pytest.approx(2000.053, abs=1e-3) diff --git a/tests/functional/filter/cram/filter-max-date.t b/tests/functional/filter/cram/filter-max-date.t index 5bea06dc0..068ef0276 100644 --- a/tests/functional/filter/cram/filter-max-date.t +++ b/tests/functional/filter/cram/filter-max-date.t @@ -6,16 +6,16 @@ Create metadata TSV file for testing. $ cat >metadata.tsv <<~~ > strain date - > SEQ_1 2020-03-XX - > SEQ_2 2020-03-01 - > SEQ_3 2020-03-02 + > SEQ_1 2019-XX-XX + > SEQ_2 2019-12-31 + > SEQ_3 2020-01-01 > ~~ -Test that --max-date is inclusive. +Test that --max-date is inclusive even with ambiguity. $ ${AUGUR} filter \ > --metadata metadata.tsv \ - > --max-date 2020-03-01 \ + > --max-date 2019 \ > --output-strains filtered_strains.txt 2>/dev/null $ sort filtered_strains.txt SEQ_1