Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@

## __NEXT__

### Features

* filter, frequencies, refine: Added support in metadata for precise date ranges in `YYYY-MM-DD/YYYY-MM-DD` format. [#1304][] (@victorlin)

### Bug fixes

* curate format-dates: Removed redundant warning messages that were previously displayed when using `--failure-reporting "warn"`. [#1816][] (@victorlin)
* merge: Fixed a performance bug where input sequence file validation unnecessarily loaded file contents into device memory. [#1820][] (@victorlin)

[#1304]: https://github.com/nextstrain/augur/issues/1304
[#1816]: https://github.com/nextstrain/augur/pull/1816
[#1820]: https://github.com/nextstrain/augur/pull/1820

Expand Down
35 changes: 32 additions & 3 deletions augur/dates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,23 @@ def is_date_ambiguous(date, ambiguous_by):
Those should be further validated by date conversion functions.
"""

RE_AUGUR_UNKNOWN_DATE = re.compile(r'^XXXX-XX-XX$')
"""
Matches an Augur-style unknown date.
"""

RE_AUGUR_AMBIGUOUS_DATE = re.compile(r'.*XX.*')
"""
Matches an Augur-style ambiguous date with 'XX' used to mask unknown parts of the date.
Note that this can support any date format, not just YYYY-MM-DD.
"""

RE_DATE_RANGE = re.compile(r'^\d{4}-\d{2}-\d{2}/\d{4}-\d{2}-\d{2}$')
"""
Matches a date range in YYYY-MM-DD/YYYY-MM-DD format.
Note that this is a subset of the ISO 8601 time interval format.
"""

@cache
def get_numerical_date_from_value(value, fmt, min_max_year=None) -> Union[float, Tuple[float, float], None]:
value = str(value)
Expand All @@ -171,13 +182,20 @@ def get_numerical_date_from_value(value, fmt, min_max_year=None) -> Union[float,
except:
pass

# 2. Check if value is an ambiguous date in the specified format (fmt).
# 2. Check if value is an unknown date.
# This is checked before ambiguous dates since it is a subset of that with
# special handling.

if RE_AUGUR_UNKNOWN_DATE.match(value):
return (float("-inf"), float("inf"))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing this is not handled in treetime which is leading to the errors in a couple of the pathogen-repo-ci tests?


# 3. Check if value is an ambiguous date in the specified format (fmt).

if RE_AUGUR_AMBIGUOUS_DATE.match(value):
start, end = AmbiguousDate(value, fmt=fmt).range(min_max_year=min_max_year)
return (date_to_numeric(start), date_to_numeric(end))

# 3. Check formats that are always supported.
# 4. Check formats that are always supported.

if RE_NUMERIC_DATE.match(value):
return float(value)
Expand All @@ -199,7 +217,18 @@ def get_numerical_date_from_value(value, fmt, min_max_year=None) -> Union[float,
# closest in-bound value.
raise InvalidDate(value, str(error)) from error

# 4. Return none (silent error) if the date does not match any of the checked formats.
if RE_DATE_RANGE.match(value):
start, end = value.split("/")

start = datetime.datetime.strptime(start, "%Y-%m-%d")
end = datetime.datetime.strptime(end , "%Y-%m-%d")

if start > end:
raise InvalidDate(value, f"Start {start!r} is later than end {end!r}")

return (date_to_numeric(start), date_to_numeric(end))

# 5. Return none (silent error) if the date does not match any of the checked formats.

return None

Expand Down
3 changes: 3 additions & 0 deletions docs/faq/metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ Ambiguity over a range of dates is supported in the following formats:
(e.g.. ``2018``, ``2018-03``)
2. Augur-style reduced precision format, i.e. ISO 8601 format with unknown parts explicitly masked by ``XX``
(e.g. ``2018-XX-XX``, ``2018-03-XX``)
3. `<start>/<end>` range format, where `<start>` and `<end>` are exact dates in `YYYY-MM-DD` format.
This is a subset of `ISO 8601 interval format <https://en.wikipedia.org/wiki/ISO_8601#Time_intervals>__`.
(e.g. ``2017-12-01/2018-03-25``)

**Geography**

Expand Down
14 changes: 14 additions & 0 deletions tests/dates/test_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def test_get_numerical_date_from_value_not_ambiguous(self):
== pytest.approx(2000.242, abs=1e-3)
)

def test_get_numerical_date_from_value_unknown_date(self):
assert (dates.get_numerical_date_from_value("XXXX-XX-XX", "%Y-%m-%d")
== (float("-inf"), float("inf"))
)

@pytest.mark.parametrize(
"value",
[
Expand Down Expand Up @@ -134,6 +139,15 @@ def test_get_numerical_date_from_value_current_day_limit(self):
== pytest.approx(2000.138, abs=1e-3)
)

def test_get_numerical_date_from_value_range(self):
assert dates.get_numerical_date_from_value("2019-01-02/2019-03-04", fmt="unused") == (
pytest.approx(dates.numeric_date(datetime.date(year=2019, month=1, day=2)), abs=1e-3),
pytest.approx(dates.numeric_date(datetime.date(year=2019, month=3, day=4)), abs=1e-3),
)

# Using a numeric date as a bound is not valid.
assert dates.get_numerical_date_from_value("2019.0/2019-06-01", fmt="unused") == None

def test_is_date_ambiguous(self):
"""is_date_ambiguous should return true for ambiguous dates and false for valid dates."""
# Test complete date strings with ambiguous values.
Expand Down