diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1b6cd83..dad48aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . diff --git a/.github/workflows/coverage_readme.yml b/.github/workflows/coverage_readme.yml index 5e697f5..eb5db29 100644 --- a/.github/workflows/coverage_readme.yml +++ b/.github/workflows/coverage_readme.yml @@ -22,10 +22,10 @@ jobs: persist-credentials: false fetch-depth: 0 - - name: Set up Python 3.12 + - name: Set up Python 3.13 uses: actions/setup-python@v5 with: - python-version: 3.12 + python-version: 3.13 cache: 'pip' cache-dependency-path: '**/pyproject.toml' diff --git a/.gitignore b/.gitignore index 36df893..d27f79d 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,5 @@ docs/_build/ # PyBuilder target/ +.idea +.DS_Store diff --git a/README.md b/README.md index f157bb6..8fe2de1 100644 --- a/README.md +++ b/README.md @@ -514,6 +514,6 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut * Fix formatting: `ruff format --config pyproject.toml` * Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. -### Coverage and benchmraks +### Coverage and benchmarks Coverage reports are generated and added as comments to commits, and also visible in the actions log. Benchmarks are run on pull requests and are published [here]( https://ixc.github.io/python-edtf/dev/bench/) and also visible in the actions log. diff --git a/edtf/__init__.py b/edtf/__init__.py index 7bb2885..0b0bfbf 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -22,6 +22,7 @@ UncertainOrApproximate, Unspecified, UnspecifiedIntervalSection, + is_valid_edtf, parse_edtf, ) @@ -46,6 +47,7 @@ "trim_struct_time", "text_to_edtf", "parse_edtf", + "is_valid_edtf", # parser_exceptions "EDTFParseException", # parser_classes diff --git a/edtf/appsettings.py b/edtf/appsettings.py index 8e15846..e9b4d9d 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -12,7 +12,7 @@ except ImportError: EDTF = {} -SEASON_MONTHS_RANGE = EDTF.get( +SEASON_MONTHS_RANGE: dict[int, list[int]] = EDTF.get( "SEASON_MONTHS_RANGE", { # season id: [earliest_month, last_month] @@ -27,7 +27,7 @@ }, ) -SEASON_L2_MONTHS_RANGE = EDTF.get( +SEASON_L2_MONTHS_RANGE: dict[int, list[int]] = EDTF.get( "SEASON_L2_MONTHS_RANGE", { # season id: [earliest_month, last_month] @@ -67,9 +67,9 @@ }, ) -DAY_FIRST = EDTF.get("DAY_FIRST", False) # Americans! +DAY_FIRST: bool = EDTF.get("DAY_FIRST", False) # Americans! -SEASONS = EDTF.get( +SEASONS: dict[int, str] = EDTF.get( "SEASONS", { 21: "spring", @@ -78,25 +78,38 @@ 24: "winter", }, ) -INVERSE_SEASONS = EDTF.get("INVERSE_SEASONS", {v: k for k, v in SEASONS.items()}) +INVERSE_SEASONS: dict[str, int] = EDTF.get( + "INVERSE_SEASONS", {v: k for k, v in SEASONS.items()} +) # also need to interpret `fall` INVERSE_SEASONS["fall"] = 23 # changing these will break tests -PADDING_DAY_PRECISION = EDTF.get("PADDING_DAY_PRECISION", relativedelta(days=1)) -PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) -PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) -PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) -PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10)) -PADDING_CENTURY_PRECISION = EDTF.get( +PADDING_DAY_PRECISION: relativedelta = EDTF.get( + "PADDING_DAY_PRECISION", relativedelta(days=1) +) +PADDING_MONTH_PRECISION: relativedelta = EDTF.get( + "PADDING_MONTH_PRECISION", relativedelta(months=1) +) +PADDING_YEAR_PRECISION: relativedelta = EDTF.get( + "PADDING_YEAR_PRECISION", relativedelta(years=1) +) +PADDING_SEASON_PRECISION: relativedelta = EDTF.get( + "PADDING_SEASON_PRECISION", relativedelta(weeks=12) +) +PADDING_DECADE_PRECISION: relativedelta = EDTF.get( + "PADDING_DECADE_PRECISION", relativedelta(years=10) +) +PADDING_CENTURY_PRECISION: relativedelta = EDTF.get( "PADDING_CENTURY_PRECISION", relativedelta(years=100) ) -PADDING_MILLENNIUM_PRECISION = EDTF.get( +PADDING_MILLENNIUM_PRECISION: relativedelta = EDTF.get( "PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000) ) -MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) -MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) -MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) -DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) +MULTIPLIER_IF_UNCERTAIN: float = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) +MULTIPLIER_IF_APPROXIMATE: float = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) +MULTIPLIER_IF_BOTH: float = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) +DELTA_IF_UNKNOWN: relativedelta = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) +DELTA_IF_EMPTY: relativedelta = relativedelta(None) -DEBUG_PYPARSING = False +DEBUG_PYPARSING: bool = False diff --git a/edtf/convert.py b/edtf/convert.py index ee03f36..c03e2ea 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -21,7 +21,7 @@ def old_specs_to_new_specs_expression(expression): return expression -def dt_to_struct_time(dt): +def dt_to_struct_time(dt) -> struct_time: """ Convert a `datetime.date` or `datetime.datetime` to a `struct_time` representation *with zero values* for data fields that we cannot always @@ -70,8 +70,7 @@ def trim_struct_time(st: struct_time, strip_time: bool = False) -> struct_time: """ if strip_time: return struct_time(list(st[:3]) + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) + return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) def struct_time_to_jd(st: struct_time) -> float: @@ -116,7 +115,7 @@ def jd_to_struct_time(jd: float) -> struct_time: return struct_time([year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS) -def _roll_negative_time_fields(year, month, day, hour, minute, second): +def _roll_negative_time_fields(year, month, day, hour, minute, second) -> tuple: """ Fix date/time fields which have nonsense negative values for any field except for year by rolling the overall date/time value backwards, treating @@ -152,4 +151,5 @@ def _roll_negative_time_fields(year, month, day, hour, minute, second): year += int(month / 12.0) # Adjust by whole year in months year -= 1 # Subtract 1 for negative minutes month %= 12 # Convert negative month to positive remainder - return (year, month, day, hour, minute, second) + + return year, month, day, hour, minute, second diff --git a/edtf/jdutil.py b/edtf/jdutil.py index 7c0a3bd..b7a2cbb 100644 --- a/edtf/jdutil.py +++ b/edtf/jdutil.py @@ -396,7 +396,7 @@ def __sub__(self, other): return jd_to_datetime(combined) - elif isinstance(other, (datetime, dt.datetime)): + elif isinstance(other, datetime | dt.datetime): diff = datetime_to_jd(self) - datetime_to_jd(other) return dt.timedelta(diff) @@ -407,7 +407,7 @@ def __sub__(self, other): raise TypeError(s) def __rsub__(self, other): - if not isinstance(other, (datetime, dt.datetime)): + if not isinstance(other, datetime | dt.datetime): s = "jdutil.datetime supports '-' with: " s += "jdutil.datetime and datetime.datetime" raise TypeError(s) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index f28e685..077ae19 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,5 +1,6 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" +import functools import re from datetime import datetime @@ -13,19 +14,45 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r"(-?)([\dX])([\dX])([\dX])([\dX])" -LONG_YEAR_RE = r"Y(-?)([1-9]\d\d\d\d+)" -CENTURY_RE = r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" -CE_RE = r"(\d{1,4}) (ad|ce|bc|bce)" +LONG_YEAR_RE = re.compile(r"y(-?)([1-9]\d\d\d\d+)") +CENTURY_RE = re.compile(r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?") +CENTURY_RANGE = re.compile(r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]") +CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)") +ONE_DIGIT_PARTIAL_FIRST = re.compile(r"\d\D\b") +TWO_DIGIT_PARTIAL_FIRST = re.compile(r"\d\d\b") +PARTIAL_CHECK = re.compile(r"\b\d\d\d\d$") +SLASH_YEAR = re.compile(r"(\d\d\d\d)/(\d\d\d\d)") +BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b") +AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b") +APPROX_CHECK = re.compile( + r"\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|^~" +) +UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)") +UNCERTAIN_REPL = re.compile(r"(\d{4})\?") +MIGHT_BE_CENTURY = re.compile(r"(\d{2}00)s") +MIGHT_BE_DECADE = re.compile(r"(\d{3}0)s") + +APPROX_CENTURY_RE = re.compile( + r"\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" +) +UNCERTAIN_CENTURY_RE = re.compile( + r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?" +) + +APPROX_CE_RE = re.compile(r"\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)") +UNCERTAIN_CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)\?") + +MENTIONS_YEAR = re.compile(r"\byear\b.+(in|during)\b") +MENTIONS_MONTH = re.compile(r"\bmonth\b.+(in|during)\b") +MENTIONS_DAY = re.compile(r"\bday\b.+(in|during)\b") # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. -REJECT_RULES = ( - r".*dynasty.*", # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -) +REJECT_RULES = re.compile(r".*dynasty.*") # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -def text_to_edtf(text): +@functools.lru_cache +def text_to_edtf(text: str) -> str | None: """ Generate EDTF string equivalent of a given natural language date string. """ @@ -35,7 +62,7 @@ def text_to_edtf(text): t = text.lower() # try parsing the whole thing - result = text_to_edtf_date(t) + result: str | None = text_to_edtf_date(t) if not result: # split by list delims and move fwd with the first thing that returns a non-empty string. @@ -43,7 +70,8 @@ def text_to_edtf(text): for split in [",", ";", "or"]: for list_item in t.split(split): # try parsing as an interval - split by '-' - toks = list_item.split("-") + toks: list[str] = list_item.split("-") + if len(toks) == 2: d1 = toks[0].strip() d2 = toks[1].strip() @@ -51,19 +79,20 @@ def text_to_edtf(text): # match looks from the beginning of the string, search # looks anywhere. - if re.match(r"\d\D\b", d2): # 1-digit year partial e.g. 1868-9 + if re.match( + ONE_DIGIT_PARTIAL_FIRST, d2 + ): # 1-digit year partial e.g. 1868-9 if re.search( - r"\b\d\d\d\d$", d1 + PARTIAL_CHECK, d1 ): # TODO: evaluate it and see if it's a year d2 = d1[-4:-1] + d2 - elif re.match(r"\d\d\b", d2): # 2-digit year partial e.g. 1809-10 - if re.search(r"\b\d\d\d\d$", d1): + elif re.match( + TWO_DIGIT_PARTIAL_FIRST, d2 + ): # 2-digit year partial e.g. 1809-10 + if re.search(PARTIAL_CHECK, d1): d2 = d1[-4:-2] + d2 else: - century_range_match = re.search( - r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]", - f"{d1}-{d2}", - ) + century_range_match = re.search(CENTURY_RANGE, f"{d1}-{d2}") if century_range_match: g = century_range_match.groups() d1 = f"{g[0]}C" @@ -73,7 +102,7 @@ def text_to_edtf(text): r2 = text_to_edtf_date(d2) if r1 and r2: - result = r1 + "/" + r2 + result = f"{r1}/{r2}" return result # is it an either/or year "1838/1862" - that has a different @@ -82,7 +111,7 @@ def text_to_edtf(text): # This whole section could be more friendly. else: - int_match = re.search(r"(\d\d\d\d)\/(\d\d\d\d)", list_item) + int_match = re.search(SLASH_YEAR, list_item) if int_match: return f"[{int_match.group(1)}, {int_match.group(2)}]" @@ -92,21 +121,19 @@ def text_to_edtf(text): if result: break - is_before = re.findall(r"\bbefore\b", t) - is_before = is_before or re.findall(r"\bearlier\b", t) - - is_after = re.findall(r"\bafter\b", t) - is_after = is_after or re.findall(r"\bsince\b", t) - is_after = is_after or re.findall(r"\blater\b", t) + is_before = re.findall(BEFORE_CHECK, t) + is_after = re.findall(AFTER_CHECK, t) if is_before: - result = f"/{result}" # unknown is replaced with null for intervals + result = f"/{result}" elif is_after: - result = f"{result}/" # unknown is replaced with null for intervals + result = f"{result}/" + return result -def text_to_edtf_date(text): +@functools.lru_cache +def text_to_edtf_date(text: str) -> str | None: """ Return EDTF string equivalent of a given natural language date string. @@ -115,37 +142,28 @@ def text_to_edtf_date(text): differ are undefined. """ if not text: - return + return None t = text.lower() - result = "" + result: str = "" - for reject_re in REJECT_RULES: - if re.match(reject_re, t): - return + if re.match(REJECT_RULES, t): + return None # matches on '1800s'. Needs to happen before is_decade. - could_be_century = re.findall(r"(\d{2}00)s", t) + could_be_century: list = re.findall(MIGHT_BE_CENTURY, t) # matches on '1800s' and '1910s'. Removes the 's'. # Needs to happen before is_uncertain because e.g. "1860s?" - t, is_decade = re.subn(r"(\d{3}0)s", r"\1", t) + t, is_decade = re.subn(MIGHT_BE_DECADE, r"\1", t) # detect approximation signifiers # a few 'circa' abbreviations just before the year - is_approximate = re.findall(r"\b(ca?\.?) ?\d{4}", t) + is_approximate = re.findall(APPROX_CHECK, t) # the word 'circa' anywhere - is_approximate = is_approximate or re.findall(r"\bcirca\b", t) - # the word 'approx'/'around'/'about' anywhere - is_approximate = is_approximate or re.findall(r"\b(approx|around|about)", t) - # a ~ before a year-ish number - is_approximate = is_approximate or re.findall(r"\b~\d{4}", t) - # a ~ at the beginning - is_approximate = is_approximate or re.findall(r"^~", t) # detect uncertainty signifiers - t, is_uncertain = re.subn(r"(\d{4})\?", r"\1", t) - # the words uncertain/maybe/guess anywhere - is_uncertain = is_uncertain or re.findall(r"\b(uncertain|possibly|maybe|guess)", t) + t, is_uncertain = re.subn(UNCERTAIN_REPL, r"\1", t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CHECK, t) # detect century forms is_century = re.findall(CENTURY_RE, t) @@ -153,32 +171,29 @@ def text_to_edtf_date(text): # detect CE/BCE year form is_ce = re.findall(CE_RE, t) if is_century: - result = "%02dXX" % (int(is_century[0][0]) - 1,) - is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CENTURY_RE, t) - is_uncertain = is_uncertain or re.findall(CENTURY_RE + r"\?", t) + result = f"{int(is_century[0][0]) - 1:02d}XX" + is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t) try: - is_bc = is_century[0][-1] in ("bc", "bce") - if is_bc: + if is_century[0][-1] in ("bc", "bce"): result = f"-{result}" except IndexError: pass elif is_ce: - result = "%04d" % (int(is_ce[0][0])) - is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CE_RE, t) - is_uncertain = is_uncertain or re.findall(CE_RE + r"\?", t) + result = f"{int(is_ce[0][0]):04d}" + is_approximate = is_approximate or re.findall(APPROX_CE_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CE_RE, t) try: - is_bc = is_ce[0][-1] in ("bc", "bce") - if is_bc: + if is_ce[0][-1] in ("bc", "bce"): result = f"-{result}" except IndexError: pass else: # try dateutil.parse - try: # parse twice, using different defaults to see what was # parsed and what was guessed. @@ -199,40 +214,40 @@ def text_to_edtf_date(text): ) except ParserError: - return + return None except Exception: - return + return None if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. - return + return None date1 = dt1.isoformat()[:10] date2 = dt2.isoformat()[:10] # guess precision of 'unspecified' characters to use - mentions_year = re.findall(r"\byear\b.+(in|during)\b", t) - mentions_month = re.findall(r"\bmonth\b.+(in|during)\b", t) - mentions_day = re.findall(r"\bday\b.+(in|during)\b", t) + mentions_year = re.findall(MENTIONS_YEAR, t) + mentions_month = re.findall(MENTIONS_MONTH, t) + mentions_day = re.findall(MENTIONS_DAY, t) - for i in range(len(date1)): + for i, char in enumerate(date1): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. if i == 2 and could_be_century and not (is_approximate or is_uncertain): result += "X" - elif i == 3 and is_decade > 0: + elif i == 3 and is_decade: if mentions_year: - result += "X" # previously year precision - now just X + result += "X" # year precision else: - result += "X" # previously decade precision - now just X - elif date1[i] == date2[i]: + result += "X" # decade precision + elif char == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default - result += date1[i] + result += char else: # different values were produced, meaning that it's likely - # a default. Use 'X' + # a default. Use 'unspecified' result += "X" # strip off unknown chars from end of string - except the first 4 diff --git a/edtf/parser/__init__.py b/edtf/parser/__init__.py index 43197d5..9cbf3c3 100644 --- a/edtf/parser/__init__.py +++ b/edtf/parser/__init__.py @@ -1,5 +1,5 @@ from .edtf_exceptions import EDTFParseException -from .grammar import parse_edtf +from .grammar import is_valid_edtf, parse_edtf from .parser_classes import ( UA, Consecutives, @@ -26,6 +26,7 @@ __all__ = [ "parse_edtf", + "is_valid_edtf", "EDTFParseException", "EDTFObject", "Date", diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index beabf52..de84633 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -49,15 +49,15 @@ Unspecified, ) -oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) -oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) -oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) -zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) -oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) -oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) -oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) -oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) -zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) +oneThru12 = oneOf([f"{i:02}" for i in range(1, 13)]) +oneThru13 = oneOf([f"{i:02}" for i in range(1, 14)]) +oneThru23 = oneOf([f"{i:02}" for i in range(1, 24)]) +zeroThru23 = oneOf([f"{i:02}" for i in range(0, 24)]) +oneThru29 = oneOf([f"{i:02}" for i in range(1, 30)]) +oneThru30 = oneOf([f"{i:02}" for i in range(1, 31)]) +oneThru31 = oneOf([f"{i:02}" for i in range(1, 32)]) +oneThru59 = oneOf([f"{i:02}" for i in range(1, 60)]) +zeroThru59 = oneOf([f"{i:02}" for i in range(0, 60)]) digit = Word(nums, exact=1) positiveDigit = Word(nums, exact=1, excludeChars="0") @@ -343,18 +343,31 @@ def f(toks): ) -def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): +def parse_edtf( + input_string: str, + parse_all: bool = True, + fail_silently: bool = False, + debug: bool | None = None, +): if debug is None: debug = DEBUG_PYPARSING + if not input_string: raise EDTFParseException(input_string) + try: - p = edtfParser.parseString(input_string.strip(), parseAll) + p = edtfParser.parse_string(input_string.strip(), parse_all) if p: return p[0] + return None except ParseException as err: if fail_silently: return None if debug: raise raise EDTFParseException(input_string, err) from None + + +def is_valid_edtf(input_string: str) -> bool: + """Returns True if the input string was successfully parsed; False if it isn't.""" + return parse_edtf(input_string, fail_silently=True) is not None diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 1326430..f5d2581 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1,9 +1,10 @@ import calendar import math -import re +from collections.abc import Callable from datetime import date, datetime from operator import add, sub from time import struct_time +from typing import Optional from dateutil.relativedelta import relativedelta @@ -27,7 +28,7 @@ PRECISION_DAY = "day" -def days_in_month(year, month): +def days_in_month(year: int, month: int) -> int: """ Return the number of days in the given year and month, where month is 1=January to 12=December, and respecting leap years as identified by @@ -49,7 +50,7 @@ def days_in_month(year, month): }[month] -def apply_delta(op, time_struct, delta): +def apply_delta(op: Callable, time_struct: struct_time, delta) -> struct_time: """ Apply a `relativedelta` to a `struct_time` data structure. @@ -75,9 +76,9 @@ def apply_delta(op, time_struct, delta): # Adjust the year to be close to the 2000 millenium in 1,000 year # increments to try and retain accurate relative leap years - actual_year = time_struct.tm_year - millenium = int(float(actual_year) / 1000) - millenium_diff = (2 - millenium) * 1000 + actual_year: int = time_struct.tm_year + millenium: int = int(float(actual_year) / 1000) + millenium_diff: int = (2 - millenium) * 1000 adjusted_year = actual_year + millenium_diff # Apply delta to the date/time with adjusted year dt = datetime(*(adjusted_year,) + time_struct[1:6]) @@ -96,6 +97,9 @@ class EDTFObject: """ parser = None + _is_approximate: bool + _is_uncertain: bool + _uncertain_and_approximate: bool @classmethod def set_parser(cls, p): @@ -115,66 +119,67 @@ def parse_action(cls, toks): def parse(cls, s): return cls.parser.parseString(s)[0] - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}: '{str(self)}'" - def __init__(self, *args, **kwargs): - str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" - raise NotImplementedError(f"{str} is not implemented.") + def __init__(self, *args, **kwargs) -> None: + message: str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" + raise NotImplementedError(f"{message} is not implemented.") - def __str__(self): + def __str__(self) -> str: raise NotImplementedError - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): raise NotImplementedError - def lower_strict(self): + def lower_strict(self) -> struct_time: return self._strict_date(lean=EARLIEST) - def upper_strict(self): + def upper_strict(self) -> struct_time: return self._strict_date(lean=LATEST) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean: str) -> relativedelta: """ Subclasses should override this to pad based on how precise they are. """ - return relativedelta(0) + return relativedelta(None) - def get_is_approximate(self): + def get_is_approximate(self) -> bool: return getattr(self, "_is_approximate", False) - def set_is_approximate(self, val): + def set_is_approximate(self, val: bool) -> None: self._is_approximate = val - is_approximate = property(get_is_approximate, set_is_approximate) + is_approximate = property(get_is_approximate, set_is_approximate) # noqa - def get_is_uncertain(self): + def get_is_uncertain(self) -> bool: return getattr(self, "_is_uncertain", False) - def set_is_uncertain(self, val): + def set_is_uncertain(self, val: bool) -> None: self._is_uncertain = val - is_uncertain = property(get_is_uncertain, set_is_uncertain) + is_uncertain = property(get_is_uncertain, set_is_uncertain) # noqa - def get_is_uncertain_and_approximate(self): + def get_is_uncertain_and_approximate(self) -> bool: return getattr(self, "_uncertain_and_approximate", False) - def set_is_uncertain_and_approximate(self, val): + def set_is_uncertain_and_approximate(self, val: bool) -> None: self._uncertain_and_approximate = val is_uncertain_and_approximate = property( - get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + get_is_uncertain_and_approximate, # noqa + set_is_uncertain_and_approximate, # noqa ) - def lower_fuzzy(self): + def lower_fuzzy(self) -> struct_time: strict_val = self.lower_strict() return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - def upper_fuzzy(self): + def upper_fuzzy(self) -> struct_time: strict_val = self.upper_strict() return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - def __eq__(self, other): + def __eq__(self, other) -> bool: if isinstance(other, EDTFObject): return str(self) == str(other) elif isinstance(other, date): @@ -183,7 +188,7 @@ def __eq__(self, other): return self._strict_date() == trim_struct_time(other) return False - def __ne__(self, other): + def __ne__(self, other) -> bool: if isinstance(other, EDTFObject): return str(self) != str(other) elif isinstance(other, date): @@ -192,7 +197,7 @@ def __ne__(self, other): return self._strict_date() != trim_struct_time(other) return True - def __gt__(self, other): + def __gt__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() > other.lower_strict() elif isinstance(other, date): @@ -203,7 +208,7 @@ def __gt__(self, other): f"can't compare {type(self).__name__} with {type(other).__name__}" ) - def __ge__(self, other): + def __ge__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() >= other.lower_strict() elif isinstance(other, date): @@ -214,7 +219,7 @@ def __ge__(self, other): f"can't compare {type(self).__name__} with {type(other).__name__}" ) - def __lt__(self, other): + def __lt__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() < other.lower_strict() elif isinstance(other, date): @@ -225,7 +230,7 @@ def __lt__(self, other): f"can't compare {type(self).__name__} with {type(other).__name__}" ) - def __le__(self, other): + def __le__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() <= other.lower_strict() elif isinstance(other, date): @@ -241,118 +246,132 @@ def __le__(self, other): class Date(EDTFObject): - def set_year(self, y): + def __init__( # noqa + self, + year: str | None = None, + month: str | None = None, + day: str | None = None, + significant_digits=None, + **kwargs, + ): + for param in ("date", "lower", "upper"): + if param in kwargs: + self.__init__(**kwargs[param]) + return + + self._year: str | None = ( + year # Year is required, but sometimes passed in as a 'date' dict. + ) + self._month: str | None = month + self._day: str | None = day + self.significant_digits: int | None = ( + int(significant_digits) if significant_digits else None + ) + + def set_year(self, y: str | None): if y is None: raise AttributeError("Year must not be None") self._year = y - def get_year(self): + def get_year(self) -> str | None: return self._year - year = property(get_year, set_year) + year = property(get_year, set_year) # noqa - def set_month(self, m): + def set_month(self, m: str | None): self._month = m if m is None: - self.day = None + self._day = None - def get_month(self): + def get_month(self) -> str | None: return self._month - month = property(get_month, set_month) + month = property(get_month, set_month) # noqa - def __init__( - self, year=None, month=None, day=None, significant_digits=None, **kwargs - ): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return + def set_day(self, d: str | None): + self._day = d + if d is None: + self._day = None - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - self.significant_digits = ( - int(significant_digits) if significant_digits else None - ) + def get_day(self) -> str | None: + return self._day - def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + day = property(get_day, set_day) # noqa + + def __str__(self) -> str: + r = f"{self._year}" + if self._month is not None: + r += f"-{self._month}" + if self._day is not None: + r += f"-{self._day}" if self.significant_digits: r += f"S{self.significant_digits}" return r - def isoformat(self, default=date.max): - return "%s-%02d-%02d" % ( - self.year, - int(self.month or default.month), - int(self.day or default.day), - ) + def isoformat(self, default=date.max) -> str: + return f"{self._year}-{int(self._month or default.month):02d}-{int(self._day or default.day):02d}" - def lower_fuzzy(self): + def lower_fuzzy(self) -> struct_time: if not hasattr(self, "significant_digits") or not self.significant_digits: return apply_delta( sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) ) - else: - total_digits = len(self.year) - insignificant_digits = total_digits - self.significant_digits - lower_year = ( - int(self.year) - // (10**insignificant_digits) - * (10**insignificant_digits) - ) - return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - def upper_fuzzy(self): + total_digits: int = len(self._year) if self._year else 0 + i_year: int = int(self._year) if self._year else 0 + insignificant_digits: int = total_digits - self.significant_digits + lower_year: int = ( + i_year // (10**insignificant_digits) * (10**insignificant_digits) + ) + return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + def upper_fuzzy(self) -> struct_time: if not hasattr(self, "significant_digits") or not self.significant_digits: return apply_delta( add, self.upper_strict(), self._get_fuzzy_padding(LATEST) ) - else: - total_digits = len(self.year) - insignificant_digits = total_digits - self.significant_digits - upper_year = (int(self.year) // (10**insignificant_digits) + 1) * ( - 10**insignificant_digits - ) - 1 - return struct_time( - [upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS - ) - def _precise_year(self, lean): + total_digits: int = len(self._year) if self._year else 0 + i_year: int = int(self._year) if self._year else 0 + insignificant_digits: int = total_digits - self.significant_digits + upper_year: int = (i_year // (10**insignificant_digits) + 1) * ( + 10**insignificant_digits + ) - 1 + return struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + def _precise_year(self, lean: str) -> int: # Replace any ambiguous characters in the year string with 0s or 9s + if not self._year: + return 0 + if lean == EARLIEST: - return int(re.sub(r"X", r"0", self.year)) + rep = self._year.replace("X", "0") else: - return int(re.sub(r"X", r"9", self.year)) + rep = self._year.replace("X", "9") - def _precise_month(self, lean): - if self.month and self.month != "XX": + return int(rep) + + def _precise_month(self, lean: str) -> int: + if self._month and self._month != "XX": try: - return int(self.month) + return int(self._month) except ValueError as err: raise ValueError( - f"Couldn't convert {self.month} to int (in {self})" + f"Couldn't convert {self._month} to int (in {self})" ) from err - else: - return 1 if lean == EARLIEST else 12 + return 1 if lean == EARLIEST else 12 - def _precise_day(self, lean): - if not self.day or self.day == "XX": + def _precise_day(self, lean: str) -> int: + if not self._day or self._day == "XX": if lean == EARLIEST: return 1 else: return days_in_month( self._precise_year(LATEST), self._precise_month(LATEST) ) - else: - return int(self.day) + return int(self._day) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: """ Return a `time.struct_time` representation of the date. """ @@ -367,39 +386,39 @@ def _strict_date(self, lean): ) @property - def precision(self): - if self.day: + def precision(self) -> str: + if self._day: return PRECISION_DAY - if self.month: + if self._month: return PRECISION_MONTH return PRECISION_YEAR - def estimated(self): + def estimated(self) -> int: return self._precise_year(EARLIEST) class DateAndTime(EDTFObject): - def __init__(self, date, time): - self.date = date + def __init__(self, date: Date, time): # noqa: super raises not implemented + self.date: Date = date self.time = time - def __str__(self): + def __str__(self) -> str: return self.isoformat() - def isoformat(self): + def isoformat(self) -> str: return self.date.isoformat() + "T" + self.time - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: return self.date._strict_date(lean) - def __eq__(self, other): + def __eq__(self, other) -> bool: if isinstance(other, datetime): return self.isoformat() == other.isoformat() elif isinstance(other, struct_time): return self._strict_date() == trim_struct_time(other) return super().__eq__(other) - def __ne__(self, other): + def __ne__(self, other) -> bool: if isinstance(other, datetime): return self.isoformat() != other.isoformat() elif isinstance(other, struct_time): @@ -408,22 +427,20 @@ def __ne__(self, other): class Interval(EDTFObject): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa: super() raises not implemented self.lower = lower self.upper = upper def __str__(self): return f"{self.lower}/{self.upper}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: if lean == EARLIEST: - r = self.lower._strict_date(lean) - else: - r = self.upper._strict_date(lean) - return r + return self.lower._strict_date(lean) + return self.upper._strict_date(lean) @property - def precision(self): + def precision(self) -> int | None: if self.lower.precision == self.upper.precision: return self.lower.precision return None @@ -438,36 +455,36 @@ def parse_action(cls, toks): args = toks.asList() return cls(*args) - def __init__(self, *args): + def __init__(self, *args) -> None: # noqa: super() raises not implemented if len(args) != 1: raise AssertionError("UA must have exactly one argument") ua = args[0] - self.is_uncertain = "?" in ua - self.is_approximate = "~" in ua - self.is_uncertain_and_approximate = "%" in ua + self.is_uncertain: bool = "?" in ua + self.is_approximate: bool = "~" in ua + self.is_uncertain_and_approximate: bool = "%" in ua - def __str__(self): - d = "" + def __str__(self) -> str: if self.is_uncertain: - d += "?" - if self.is_approximate: - d += "~" - if self.is_uncertain_and_approximate: - d += "%" - return d + return "?" + elif self.is_approximate: + return "~" + elif self.is_uncertain_and_approximate: + return "%" + return "" - def _get_multiplier(self): + def _get_multiplier(self) -> float | None: if self.is_uncertain_and_approximate: return appsettings.MULTIPLIER_IF_BOTH elif self.is_uncertain: return appsettings.MULTIPLIER_IF_UNCERTAIN elif self.is_approximate: return appsettings.MULTIPLIER_IF_APPROXIMATE + return None class UncertainOrApproximate(EDTFObject): - def __init__(self, date, ua): + def __init__(self, date, ua): # noqa: super() raises not implemented self.date = date self.ua = ua self.is_uncertain = ua.is_uncertain if ua else False @@ -476,13 +493,12 @@ def __init__(self, date, ua): ua.is_uncertain_and_approximate if ua else False ) - def __str__(self): + def __str__(self) -> str: if self.ua: return f"{self.date}{self.ua}" - else: - return str(self.date) + return str(self.date) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST) -> tuple: return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -511,7 +527,7 @@ def _get_fuzzy_padding(self, lean): class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): + def __init__(self, sectionOpen=False, other_section_element=None): # noqa: super() raises not implemented if sectionOpen: self.is_open = True self.is_unknown = False @@ -523,22 +539,25 @@ def __init__(self, sectionOpen=False, other_section_element=None): def __str__(self): if self.is_unknown: return "" - else: - return ".." + return ".." + + def _strict_date(self, lean: str = EARLIEST) -> float | None: + if lean not in (EARLIEST, LATEST): + raise ValueError("lean must be one of EARLIEST or LATEST") - def _strict_date(self, lean): if lean == EARLIEST: if self.is_unknown: upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: return -math.inf - else: + elif lean == LATEST: if self.is_unknown: lower = self.other._strict_date(EARLIEST) return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) else: return math.inf + return None @property def precision(self): @@ -645,10 +664,10 @@ def lower_strict(self): ) else: return strict_val - else: - return self._strict_date(lean=EARLIEST) - def upper_strict(self): + return self._strict_date(lean=EARLIEST) + + def upper_strict(self) -> struct_time: if self.negative: strict_val = self._strict_date(lean=EARLIEST) if self.precision in ( @@ -673,8 +692,7 @@ def upper_strict(self): ) else: return strict_val - else: - return self._strict_date(lean=LATEST) + return self._strict_date(lean=LATEST) @property def precision(self): @@ -696,7 +714,7 @@ def precision(self): class Level1Interval(Interval): - def __init__(self, lower=None, upper=None): + def __init__(self, lower: Optional[dict] = None, upper: Optional[dict] = None): # noqa if lower: if lower["date"] == "..": self.lower = UnspecifiedIntervalSection( @@ -719,65 +737,67 @@ def __init__(self, lower=None, upper=None): self.upper = UnspecifiedIntervalSection( False, UncertainOrApproximate(**lower) ) - self.is_approximate = self.lower.is_approximate or self.upper.is_approximate - self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_approximate: bool = ( + self.lower.is_approximate or self.upper.is_approximate + ) + self.is_uncertain: bool = self.lower.is_uncertain or self.upper.is_uncertain self.is_uncertain_and_approximate = ( self.lower.is_uncertain_and_approximate or self.upper.is_uncertain_and_approximate ) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean) -> relativedelta | None: if lean == EARLIEST: return self.lower._get_fuzzy_padding(lean) elif lean == LATEST: return self.upper._get_fuzzy_padding(lean) + return None class LongYear(EDTFObject): - def __init__(self, year, significant_digits=None): - self.year = year - self.significant_digits = ( + def __init__(self, year: str, significant_digits: str | None = None): # noqa + self.year: str = year + self.significant_digits: int | None = ( int(significant_digits) if significant_digits else None ) - def __str__(self): + def __str__(self) -> str: if self.significant_digits: return f"Y{self.year}S{self.significant_digits}" - else: - return f"Y{self.year}" + return f"Y{self.year}" - def _precise_year(self): + def _precise_year(self) -> int: return int(self.year) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: py = self._precise_year() if lean == EARLIEST: return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - def estimated(self): + def estimated(self) -> int: return self._precise_year() - def lower_fuzzy(self): + def lower_fuzzy(self) -> struct_time: full_year = self._precise_year() strict_val = self.lower_strict() if not self.significant_digits: return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - else: - insignificant_digits = len(str(full_year)) - int(self.significant_digits) - if insignificant_digits <= 0: - return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - padding_value = 10**insignificant_digits - sig_digits = full_year // padding_value - lower_year = sig_digits * padding_value - return apply_delta( - sub, - struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), - self._get_fuzzy_padding(EARLIEST), - ) - def upper_fuzzy(self): + insignificant_digits = len(str(full_year)) - int(self.significant_digits) + if insignificant_digits <= 0: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + lower_year = sig_digits * padding_value + return apply_delta( + sub, + struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(EARLIEST), + ) + + def upper_fuzzy(self) -> struct_time: full_year = self._precise_year() strict_val = self.upper_strict() if not self.significant_digits: @@ -797,47 +817,42 @@ def upper_fuzzy(self): class Season(Date): - def __init__(self, year, season, **kwargs): + def __init__(self, year, season, **kwargs): # noqa self.year = year self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited # `Date` methods do their thing. self.day = None - def __str__(self): + def __str__(self) -> str: return f"{self.year}-{self.season}" - def _precise_month(self, lean): + def _precise_month(self, lean: str) -> int: rng = appsettings.SEASON_L2_MONTHS_RANGE[int(self.season)] if lean == EARLIEST: return rng[0] - else: - return rng[1] + + return rng[1] # (* ************************** Level 2 *************************** *) class PartialUncertainOrApproximate(Date): - def set_year(self, y): # Year can be None. - self._year = y - - year = property(Date.get_year, set_year) - - def __init__( + def __init__( # noqa self, year=None, month=None, day=None, - year_ua=False, - month_ua=False, - day_ua=False, - year_month_ua=False, - month_day_ua=False, + year_ua: UA | None = None, + month_ua: UA | None = None, + day_ua: UA | None = None, + year_month_ua: UA | None = None, + month_day_ua: UA | None = None, ssn=None, - season_ua=False, - all_ua=False, - year_ua_b=False, + season_ua: UA | None = None, + all_ua: UA | None = None, + year_ua_b: UA | None = None, ): self.year = year self.month = month @@ -866,19 +881,19 @@ def __init__( season_ua, all_ua, ] - self.is_uncertain = any( + self.is_uncertain: bool = any( item.is_uncertain for item in uas if hasattr(item, "is_uncertain") ) - self.is_approximate = any( + self.is_approximate: bool = any( item.is_approximate for item in uas if hasattr(item, "is_approximate") ) - self.is_uncertain_and_approximate = any( + self.is_uncertain_and_approximate: bool = any( item.is_uncertain_and_approximate for item in uas if hasattr(item, "is_uncertain_and_approximate") ) - def __str__(self): + def __str__(self) -> str: if self.season_ua: return f"{self.season}{self.season_ua}" @@ -918,28 +933,33 @@ def __str__(self): return result - def _precise_year(self, lean): + def set_year(self, y): # Year can be None. + self._year = y + + year = property(Date.get_year, set_year) # noqa + + def _precise_year(self, lean: str) -> int: if self.season: return self.season._precise_year(lean) return super()._precise_year(lean) - def _precise_month(self, lean): + def _precise_month(self, lean: str) -> int: if self.season: return self.season._precise_month(lean) return super()._precise_month(lean) - def _precise_day(self, lean): + def _precise_day(self, lean: str) -> int: if self.season: return self.season._precise_day(lean) return super()._precise_day(lean) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean: str) -> struct_time: """ This is not a perfect interpretation as fuzziness is introduced for redundant uncertainly modifiers e.g. (2006~)~ will get two sets of fuzziness. """ - result = relativedelta(0) + result = relativedelta(None) if self.year_ua: result += ( @@ -1001,7 +1021,7 @@ class PartialUnspecified(Unspecified): class Consecutives(Interval): # Treating Consecutive ranges as intervals where one bound is optional - def __init__(self, lower=None, upper=None): + def __init__(self, lower=None, upper=None): # noqa if lower and not isinstance(lower, EDTFObject): self.lower = Date.parse(lower) else: @@ -1012,33 +1032,34 @@ def __init__(self, lower=None, upper=None): else: self.upper = upper - def __str__(self): - return "{}..{}".format(self.lower or "", self.upper or "") + def __str__(self) -> str: + return f"{self.lower or ''}..{self.upper or ''}" class EarlierConsecutives(Level1Interval): - def __str__(self): + def __str__(self) -> str: return f"{self.lower}{self.upper}" class LaterConsecutives(Level1Interval): - def __str__(self): + def __str__(self) -> str: return f"{self.lower}{self.upper}" class OneOfASet(EDTFObject): + def __init__(self, *args): # noqa + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - - def __str__(self): - return "[{}]".format(", ".join([str(o) for o in self.objects])) + def __str__(self) -> str: + out: str = ", ".join([str(o) for o in self.objects]) + return f"[{out}]" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST) -> float: strict_dates = [x._strict_date(lean) for x in self.objects] # Accounting for possible 'inf' and '-inf' values if lean == LATEST: @@ -1060,38 +1081,40 @@ def _strict_date(self, lean): class MultipleDates(EDTFObject): + def __init__(self, *args): # noqa + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - - def __str__(self): - return "{{{}}}".format(", ".join([str(o) for o in self.objects])) + def __str__(self) -> str: + out: str = ", ".join([str(o) for o in self.objects]) + return f"{{{out}}}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST) -> float: if lean == LATEST: return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) + return min([x._strict_date(lean) for x in self.objects]) class Level2Interval(Level1Interval): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa # Check whether incoming lower/upper values are single-item lists, and # if so take just the first item. This works around what I *think* is a - # bug in the grammer that provides us with single-item lists of + # bug in the grammar that provides us with single-item lists of # `PartialUncertainOrApproximate` items for lower/upper values. - if isinstance(lower, (tuple, list)) and len(lower) == 1: + if isinstance(lower, tuple | list) and len(lower) == 1: self.lower = lower[0] else: self.lower = lower - if isinstance(lower, (tuple, list)) and len(upper) == 1: + + if isinstance(lower, tuple | list) and len(upper) == 1: self.upper = upper[0] else: self.upper = upper + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain self.is_uncertain_and_approximate = ( @@ -1105,23 +1128,22 @@ class Level2Season(Season): class ExponentialYear(LongYear): - def __init__(self, base, exponent, significant_digits=None): + def __init__(self, base, exponent, significant_digits=None): # noqa self.base = base self.exponent = exponent self.significant_digits = ( int(significant_digits) if significant_digits else None ) - def _precise_year(self): + def _precise_year(self) -> int: return int(self.base) * 10 ** int(self.exponent) - def get_year(self): + def get_year(self) -> str: if self.significant_digits: return f"{self.base}E{self.exponent}S{self.significant_digits}" - else: - return f"{self.base}E{self.exponent}" + return f"{self.base}E{self.exponent}" - year = property(get_year) + year = property(get_year) # noqa - def estimated(self): + def estimated(self) -> int: return self._precise_year() diff --git a/edtf/py.typed b/edtf/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index e916189..f2b2213 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,14 @@ [project] name = "edtf" version = "5.0.0" +license = { file = "LICENSE" } +keywords = ['edtf'] dependencies = [ "python-dateutil", "pyparsing", ] description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" -requires-python = ">=3.8" +requires-python = ">=3.10" readme = {file = "README.md", content-type = "text/markdown"} authors = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au"}, @@ -16,7 +18,8 @@ authors = [ { name = "Mark Finger" }, { name = "Sabine Müller" }, { name = "Cole Crawford" }, - { name = "Klaus Rettinghaus" } + { name = "Klaus Rettinghaus" }, + { name = "Andrew Hankinson", email = "andrew.hankinson@rism.digital" }, ] maintainers = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au" } @@ -47,6 +50,8 @@ test = [ "coverage", "pytest-cov", "junitparser", + "mypy>=1.15.0", + "pip>=25.1.1", ] [project.urls] @@ -109,7 +114,7 @@ exclude_lines = [ [tool.ruff] # Python 3.8 -target-version = "py38" +target-version = "py311" extend-exclude = [ '**/migrations/*',