From 55d5ff8473f99a7ec5d0bda4b509149d56d1de08 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 16:59:21 +0100 Subject: [PATCH 1/9] CU-8699h2yv2: Avoid using pkg_resources (deprecated) --- medcat/utils/envsnapshot.py | 52 +++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index f3418f54..e1c55c7e 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -1,4 +1,3 @@ -import pkg_resources import platform import logging import importlib.metadata @@ -39,25 +38,37 @@ def get_direct_dependencies(include_extras: bool) -> list[str]: def _update_installed_dependencies_recursive( gathered: dict[str, str], - package: pkg_resources.Distribution) -> dict[str, str]: - if package.project_name.lower() in gathered: + package: importlib.metadata.Distribution) -> dict[str, str]: + pkg_name = package.metadata["Name"].lower() + # print("Looking at", repr(pkg_name)) + if pkg_name in gathered: logger.debug("Trying to update already found transitive dependency " - "'%'", package.egg_name) + "'%'", pkg_name) return gathered - for req in package.requires(): - if req.project_name.lower() in gathered: - logger.debug("Trying to look up already found transitive " - "dependency '%'", req.project_name) - continue # don't look for it again + requirements = package.requires + if not requirements: + return gathered + for req_name_and_ver in requirements: + req_name_cs = DEP_NAME_PATTERN.match(req_name_and_ver).group(0) # type: ignore + req_name = req_name_cs.lower() + # to avoid recursion issues + gathered[req_name] = None # type: ignore try: - dep = pkg_resources.get_distribution(req.project_name) - except pkg_resources.DistributionNotFound as e: - logger.warning("Unable to locate requirement '%s':", - req.project_name, exc_info=e) - continue + dep = importlib.metadata.distribution(req_name) + except importlib.metadata.PackageNotFoundError as e1: + # try case sensitive as well + try: + dep = importlib.metadata.distribution(req_name_cs) + except importlib.metadata.PackageNotFoundError: + # NOTE: only log warning if it WASN't and extra + if 'extra' not in req_name_and_ver: + logger.warning( + "Unable to locate requirement '%s':", + req_name, exc_info=e1) + gathered.pop(req_name) + continue _update_installed_dependencies_recursive(gathered, dep) - # do this after so its dependencies get explored - gathered[dep.project_name.lower()] = dep.version + gathered[req_name] = dep.version return gathered @@ -73,7 +84,7 @@ def get_transitive_deps(direct_deps: list[str]) -> dict[str, str]: # map from name to version so as to avoid multiples of the same package all_transitive_deps: dict[str, str] = {} for dep in direct_deps: - package = pkg_resources.get_distribution(dep) + package = importlib.metadata.distribution(dep) _update_installed_dependencies_recursive(all_transitive_deps, package) return all_transitive_deps @@ -89,10 +100,11 @@ def get_installed_dependencies(include_extras: bool) -> dict[str, str]: """ direct_deps = get_direct_dependencies(include_extras) installed_packages: dict[str, str] = {} - for package in pkg_resources.working_set: - if package.project_name.lower() not in direct_deps: + for package in importlib.metadata.distributions(): + req_name = package.metadata["Name"].lower() + if req_name not in direct_deps: continue - installed_packages[package.project_name.lower()] = package.version + installed_packages[req_name] = package.version return installed_packages From 8f73ee116cad90116f6f29d9435b0ba8f77b5f78 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 19:40:29 +0100 Subject: [PATCH 2/9] CU-8699h2yv2: Update relevancy check during dependency calculations --- medcat/utils/envsnapshot.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index e1c55c7e..57df89fe 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -2,6 +2,7 @@ import logging import importlib.metadata import re +from sys import version_info as cur_ver_info from pydantic import BaseModel @@ -12,6 +13,8 @@ DEP_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9\-_]+') +PY_VER_PATTERN = re.compile( + r""".*?python_version\s*(==|!=|<=|>=|<|>)\s*(['"])([^'"]+)\2""") def get_direct_dependencies(include_extras: bool) -> list[str]: @@ -36,6 +39,21 @@ def get_direct_dependencies(include_extras: bool) -> list[str]: return reqs +def _is_relevant(req_name_and_ver: str) -> bool: + if 'extra' in req_name_and_ver: + return False + ver_match = PY_VER_PATTERN.match(req_name_and_ver) + if ver_match: + comp = ver_match.group(1) + ver_nums = ver_match.group(3).split(".") + exp_ver = (int(ver_nums[0]), int(ver_nums[1])) + cur_ver = cur_ver_info.major, cur_ver_info.minor + # eg. 3.10 < 3.11 + to_eval = f"{cur_ver} {comp} {exp_ver}" + return bool(eval(to_eval)) + return True + + def _update_installed_dependencies_recursive( gathered: dict[str, str], package: importlib.metadata.Distribution) -> dict[str, str]: @@ -60,11 +78,10 @@ def _update_installed_dependencies_recursive( try: dep = importlib.metadata.distribution(req_name_cs) except importlib.metadata.PackageNotFoundError: - # NOTE: only log warning if it WASN't and extra - if 'extra' not in req_name_and_ver: + if _is_relevant(req_name_and_ver): logger.warning( - "Unable to locate requirement '%s':", - req_name, exc_info=e1) + "Unable to locate requirement '%s' ('%s'):", + req_name, req_name_and_ver, exc_info=e1) gathered.pop(req_name) continue _update_installed_dependencies_recursive(gathered, dep) From 2c919f6827af2fe62ba752795f37cf9242f62bfc Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 20:54:50 +0100 Subject: [PATCH 3/9] CU-8699h2yv2: Simplify getting of transitive dependencies --- medcat/utils/envsnapshot.py | 98 +++++++++++++++---------------------- 1 file changed, 39 insertions(+), 59 deletions(-) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index 57df89fe..cd217ccd 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -2,7 +2,6 @@ import logging import importlib.metadata import re -from sys import version_info as cur_ver_info from pydantic import BaseModel @@ -13,8 +12,6 @@ DEP_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9\-_]+') -PY_VER_PATTERN = re.compile( - r""".*?python_version\s*(==|!=|<=|>=|<|>)\s*(['"])([^'"]+)\2""") def get_direct_dependencies(include_extras: bool) -> list[str]: @@ -39,56 +36,6 @@ def get_direct_dependencies(include_extras: bool) -> list[str]: return reqs -def _is_relevant(req_name_and_ver: str) -> bool: - if 'extra' in req_name_and_ver: - return False - ver_match = PY_VER_PATTERN.match(req_name_and_ver) - if ver_match: - comp = ver_match.group(1) - ver_nums = ver_match.group(3).split(".") - exp_ver = (int(ver_nums[0]), int(ver_nums[1])) - cur_ver = cur_ver_info.major, cur_ver_info.minor - # eg. 3.10 < 3.11 - to_eval = f"{cur_ver} {comp} {exp_ver}" - return bool(eval(to_eval)) - return True - - -def _update_installed_dependencies_recursive( - gathered: dict[str, str], - package: importlib.metadata.Distribution) -> dict[str, str]: - pkg_name = package.metadata["Name"].lower() - # print("Looking at", repr(pkg_name)) - if pkg_name in gathered: - logger.debug("Trying to update already found transitive dependency " - "'%'", pkg_name) - return gathered - requirements = package.requires - if not requirements: - return gathered - for req_name_and_ver in requirements: - req_name_cs = DEP_NAME_PATTERN.match(req_name_and_ver).group(0) # type: ignore - req_name = req_name_cs.lower() - # to avoid recursion issues - gathered[req_name] = None # type: ignore - try: - dep = importlib.metadata.distribution(req_name) - except importlib.metadata.PackageNotFoundError as e1: - # try case sensitive as well - try: - dep = importlib.metadata.distribution(req_name_cs) - except importlib.metadata.PackageNotFoundError: - if _is_relevant(req_name_and_ver): - logger.warning( - "Unable to locate requirement '%s' ('%s'):", - req_name, req_name_and_ver, exc_info=e1) - gathered.pop(req_name) - continue - _update_installed_dependencies_recursive(gathered, dep) - gathered[req_name] = dep.version - return gathered - - def get_transitive_deps(direct_deps: list[str]) -> dict[str, str]: """Get the transitive dependencies of the direct dependencies. @@ -98,12 +45,45 @@ def get_transitive_deps(direct_deps: list[str]) -> dict[str, str]: Returns: dict[str, str]: The dependency names and their corresponding versions. """ - # map from name to version so as to avoid multiples of the same package - all_transitive_deps: dict[str, str] = {} - for dep in direct_deps: - package = importlib.metadata.distribution(dep) - _update_installed_dependencies_recursive(all_transitive_deps, package) - return all_transitive_deps + all_deps: dict[str, str] = {} + to_process = set(direct_deps) + processed = set() + # list installed packages for ease of use + installed_packages = { + dist.metadata['name'].lower() + for dist in importlib.metadata.distributions()} + + while to_process: + package = to_process.pop() + if package in processed: + continue + + processed.add(package) + + try: + dist = importlib.metadata.distribution(package) + except importlib.metadata.PackageNotFoundError: + # NOTE: if not installed, we won't bother + # after all, if we can save the model, clearly + # everything is working + continue + requires = dist.requires or [] + + for req in requires: + match = DEP_NAME_PATTERN.match(req) + if match is None: + raise ValueError(f"Malformed dependency: {req}") + dep_name = match.group(0).lower() + if (dep_name and dep_name not in processed and + dep_name in installed_packages): + all_deps[dep_name] = importlib.metadata.distribution( + dep_name).version + to_process.add(dep_name) + + for direct in direct_deps: + # remove direct dependencies if they were added + all_deps.pop(direct, None) + return all_deps def get_installed_dependencies(include_extras: bool) -> dict[str, str]: From 66e9870d67edff3decab677856fcc80468489a11 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 20:58:57 +0100 Subject: [PATCH 4/9] CU-8699h2yv2: Unify metadata name access --- medcat/utils/envsnapshot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index cd217ccd..d43e206b 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -98,7 +98,7 @@ def get_installed_dependencies(include_extras: bool) -> dict[str, str]: direct_deps = get_direct_dependencies(include_extras) installed_packages: dict[str, str] = {} for package in importlib.metadata.distributions(): - req_name = package.metadata["Name"].lower() + req_name = package.metadata["name"].lower() if req_name not in direct_deps: continue installed_packages[req_name] = package.version From 370ee945acbdaeebaf360149a35b25c298b7c0eb Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 21:07:21 +0100 Subject: [PATCH 5/9] CU-8699h2yv2: Imrpove getting of installed dependencies --- medcat/utils/envsnapshot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index d43e206b..ebcbb648 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -99,7 +99,10 @@ def get_installed_dependencies(include_extras: bool) -> dict[str, str]: installed_packages: dict[str, str] = {} for package in importlib.metadata.distributions(): req_name = package.metadata["name"].lower() - if req_name not in direct_deps: + req_name_underscores = req_name.replace("-", "_") + req_name_dashes = req_name.replace("_", "-") + if all(cn not in direct_deps for cn in + [req_name, req_name_underscores, req_name_dashes]): continue installed_packages[req_name] = package.version return installed_packages From a2364cead54cfaf688184097ab53294783240938 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 21:16:58 +0100 Subject: [PATCH 6/9] CU-8699h2yv2: Add convenience method to figure out if a dependency is installed --- medcat/utils/envsnapshot.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index ebcbb648..6bb9a244 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -108,6 +108,28 @@ def get_installed_dependencies(include_extras: bool) -> dict[str, str]: return installed_packages +def is_dependency_installed(dependency: str) -> bool: + """Checks whether a dependency is installed. + + This takes into account changes such as '-' vs '_'. + For example, `typing-extensions` is a direct dependency, + but its module path will be `typing_extension` and that's + how we can find it as an installed dependency. + + Args: + dependency (str): The dependency in question. + + Returns: + bool: Whether the depedency has been installed. + """ + installed_deps = get_installed_dependencies(True) + dep_name = dependency.lower() + dep_name_underscores = dependency.replace("-", "_") + dep_name_dashes = dependency.replace("_", "-") + options = [dep_name, dep_name_underscores, dep_name_dashes] + return any(option in installed_deps for option in options) + + class Environment(BaseModel, AbstractSerialisable): dependencies: dict[str, str] transitive_deps: dict[str, str] From 82a049804cf7510fe1f5f709ab22d16d4daaf423 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 21:17:39 +0100 Subject: [PATCH 7/9] CU-8699h2yv2: Remove unnecessary option for installation targets --- medcat/utils/envsnapshot.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index 6bb9a244..c8403766 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -125,8 +125,7 @@ def is_dependency_installed(dependency: str) -> bool: installed_deps = get_installed_dependencies(True) dep_name = dependency.lower() dep_name_underscores = dependency.replace("-", "_") - dep_name_dashes = dependency.replace("_", "-") - options = [dep_name, dep_name_underscores, dep_name_dashes] + options = [dep_name, dep_name_underscores] return any(option in installed_deps for option in options) From 25705984964ae7f40814523bf755a670e6b5097b Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 21:19:43 +0100 Subject: [PATCH 8/9] CU-8699h2yv2: Remove unnecessary option for installed dependency targets --- medcat/utils/envsnapshot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/medcat/utils/envsnapshot.py b/medcat/utils/envsnapshot.py index c8403766..c345ad53 100644 --- a/medcat/utils/envsnapshot.py +++ b/medcat/utils/envsnapshot.py @@ -99,10 +99,11 @@ def get_installed_dependencies(include_extras: bool) -> dict[str, str]: installed_packages: dict[str, str] = {} for package in importlib.metadata.distributions(): req_name = package.metadata["name"].lower() - req_name_underscores = req_name.replace("-", "_") + # NOTE: we're checking against the '-' typed package name not + # the import name (which will have _ instead) req_name_dashes = req_name.replace("_", "-") if all(cn not in direct_deps for cn in - [req_name, req_name_underscores, req_name_dashes]): + [req_name, req_name_dashes]): continue installed_packages[req_name] = package.version return installed_packages From efe0433f5814d4eb6b4005ab42886eaa9dcd075e Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Jun 2025 21:20:06 +0100 Subject: [PATCH 9/9] CU-8699h2yv2: Update tests to correctly identify installed dependencies --- tests/utils/test_envsnapshot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_envsnapshot.py b/tests/utils/test_envsnapshot.py index fac78253..393c0b09 100644 --- a/tests/utils/test_envsnapshot.py +++ b/tests/utils/test_envsnapshot.py @@ -30,7 +30,7 @@ def test_dir_deps_have_no_version(self): def test_all_dir_deps_have_been_installed(self): for dep in self.dir_deps: with self.subTest(dep): - self.assertIn(dep, self.installed_deps) + self.assertTrue(envsnapshot.is_dependency_installed(dep)) def test_all_deps_add_to_correct(self): # NOTE: didn't account for test/dev deps